Exemple #1
0
func (p *postInfo) FullOriginal() string {
	if p.HasOriginal() {
		var u = url.URL{Path: p.FullFile() + "/" + p.Original}
		return u.EscapedPath()
	}
	return p.FullFile()
}
Exemple #2
0
func (c *chain) match(URL *url.URL) bool {
	path := strings.Split(URL.EscapedPath(), "/")
	lenPath := len(path)
	query := URL.Query()

	if c.lenPattern > lenPath {
		return false
	}

	if c.pattern[c.lenPattern-1] != "*" && c.lenPattern < lenPath {
		return false
	}

	for key, value := range c.pattern {
		if len(value) == 0 {
			if len(path[key]) == 0 {
				continue
			}

			return false
		}

		if value[0] == ':' {
			query.Add(value[1:], path[key])
			continue
		}

		if value[0] != '*' && value != path[key] {
			return false
		}
	}

	URL.RawQuery = query.Encode()
	return true
}
Exemple #3
0
func (s *SQLStore) Complete(u *url.URL) (err error) {
	tx, err := s.DB.Beginx()
	if err != nil {
		return
	}
	defer func() {
		if err != nil {
			tx.Rollback() // TODO: handle error
		} else {
			err = tx.Commit()
		}
	}()

	if _, err = tx.Exec(`
	UPDATE url SET done = TRUE
	WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`,
		u.Scheme,
		u.Host,
		u.EscapedPath(),
		u.Query().Encode(),
	); err != nil {
		return
	}
	_, err = tx.Exec(`UPDATE count SET finish_count = finish_count + 1`)
	return
}
Exemple #4
0
func (s *SQLStore) UpdateFunc(u *url.URL, f func(*crawler.URL)) (err error) {
	tx, err := s.DB.Beginx()
	if err != nil {
		return err
	}
	defer func() {
		if err != nil {
			tx.Rollback()
		} else {
			err = tx.Commit()
		}
	}()
	var w wrapper
	if err = tx.QueryRowx(
		`SELECT * FROM url
	    WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`,
		u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(),
	).StructScan(&w); err != nil {
		return
	}
	uu := w.ToURL()
	f(uu)
	w.fromURL(uu)
	_, err = s.DB.NamedExec(`
	UPDATE url SET num_error = :num_error, num_visit = :num_visit, last = :last, status = :status
	WHERE scheme = :scheme AND host = :host AND path = :path AND query = :query`, w)
	return

}
Exemple #5
0
func (s *SQLStore) GetDepth(u *url.URL) (depth int, err error) {
	err = s.DB.QueryRow(
		`SELECT depth FROM url
    	WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`,
		u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(),
	).Scan(&depth)
	return
}
Exemple #6
0
func (p *pattern) Match(u *url.URL) bool {
	us, uh, up := u.String(), u.Host, u.EscapedPath()
	dir, file := path.Split(up)
	f := matchString
	return f(us, p.Reject, p.Accept) &&
		f(uh, p.ExcludeHost, p.Host) &&
		f(dir, p.ExcludeDir, p.Dir) &&
		f(file, p.ExcludeFile, p.File)
}
func appFromDeisRemote(remote *url.URL) (string, error) {
	re := regexp.MustCompile("^/([a-zA-Z0-9-_.]+).git")
	matches := re.FindStringSubmatch(remote.EscapedPath())

	if len(matches) == 0 {
		return "", ErrRemoteNotApp
	}

	return string(matches[1]), nil
}
func TestExpand(t *testing.T) {
	for i, test := range expandTests {
		u := url.URL{
			Path: test.in,
		}
		Expand(&u, test.expansions)
		got := u.EscapedPath()
		if got != test.want {
			t.Errorf("got %q expected %q in test %d", got, test.want, i+1)
		}
	}
}
Exemple #9
0
// Has searches the trie to check whether there are similar URLs. It will
// return true either the number of children of some node on the lookup
// path is greater than or equal to the threshold, or an exact match is
// found.
func (t *Trie) Has(u *url.URL, threshold func(depth int) int) bool {
	depth := 0
	pnode := &t.root
	segments := strings.Split(u.EscapedPath(), "/")
	// Consider github.com/{user}. If the number of users is equal to
	// threshold, github.com/someone-stored/{repo} should still be enabled.
	for _, seg := range segments[1:] {
		depth++
		if pnode == nil || pnode.child == nil {
			return false
		}
		child, ok := pnode.child[seg]
		if !ok {
			if threshold != nil && len(pnode.child) >= threshold(depth) {
				return true
			}
			return false
		}
		pnode = child
	}

	query := sorted(u.Query())
	if len(query) == 0 {
		return true
	} else if pnode == nil {
		return false
	}
	primary := pnode.query
	qnode := &QueryNode{next: primary}

	for _, kv := range query {
		depth++
		if qnode == nil {
			return false
		} else if primary = qnode.next; primary == nil {
			return false
		}
		secondary := primary[kv.k]
		if secondary == nil {
			return false
		}
		var ok bool
		qnode, ok = secondary[kv.v]
		if !ok {
			if threshold != nil && len(secondary) >= threshold(depth) {
				return true
			}
			return false
		}
	}
	// Totally match
	return true
}
Exemple #10
0
func (s *SQLStore) GetFunc(u *url.URL, f func(*crawler.URL)) error {
	var w wrapper
	if err := s.DB.QueryRowx(
		`SELECT * FROM url
	    WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`,
		u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(),
	).StructScan(&w); err != nil {
		return err
	}
	f(w.ToURL())
	return nil
}
Exemple #11
0
func getURIPath(u *url.URL) string {
	var uri string

	if len(u.Opaque) > 0 {
		uri = "/" + strings.Join(strings.Split(u.Opaque, "/")[3:], "/")
	} else {
		uri = u.EscapedPath()
	}

	if len(uri) == 0 {
		uri = "/"
	}

	return uri
}
Exemple #12
0
func (d *Downloader) genPath(u *url.URL) string {
	pth := u.EscapedPath()
	if strings.HasSuffix(pth, "/") {
		pth += "index.html"
	} else if path.Ext(pth) == "" {
		pth += "/index.html"
	}
	if u.RawQuery != "" {
		pth += "?" + u.Query().Encode()
	}
	return filepath.Join(
		d.Dir,
		u.Host,
		filepath.FromSlash(path.Clean(pth)),
	)
}
Exemple #13
0
func (p *pattern) MatchPart(u *url.URL, part int) bool {
	us, uh, up := u.String(), u.Host, u.EscapedPath()
	dir, file := path.Split(up)
	f := matchString
	switch part {
	case PartURL:
		return f(us, p.Reject, p.Accept)
	case PartHost:
		return f(uh, p.ExcludeHost, p.Host)
	case PartDir:
		return f(dir, p.ExcludeDir, p.Dir)
	case PartFile:
		return f(file, p.ExcludeFile, p.File)
	}
	return false
}
Exemple #14
0
// SBSgetServersByDatacenter returns a list of servers associated with data center @dc.
func (c *Client) SBSgetServersByDatacenter(dc string) (res []string, err error) {
	var u = url.URL{Path: fmt.Sprintf("datacenters/%s/servers", dc)}
	err = c.getSBSResponse("GET", u.EscapedPath(), nil, &res)
	return
}
Exemple #15
0
func escapeForUrl(q string) string {
	u := url.URL{Path: q}
	return strings.Replace(u.EscapedPath(), "/", "%2f", -1)
}
Exemple #16
0
// Add adds a URL to the trie. It will cancel and return false if the
// number of children of some node on the path exceeds the threshold
// computed using the depth of the node. The depth of root node is 0.
func (t *Trie) Add(u *url.URL, threshold func(depth int) int) bool {
	var (
		depth    = 0
		pnode    = &t.root
		segments = strings.Split(u.EscapedPath(), "/")
		m        map[string]*PathNode
		prev     string
		ok       bool
	)
	for _, seg := range segments[1:] {
		depth++
		if pnode == nil {
			pnode = newPathNode()
			m[prev] = pnode
		}
		if m = pnode.child; m == nil {
			m = make(map[string]*PathNode, 1)
			pnode.child = m
		}
		if pnode, ok = m[seg]; !ok {
			if threshold != nil && len(m) >= threshold(depth) {
				return false
			}
			m[seg] = nil
		}
		prev = seg
	}

	query := sorted(u.Query())
	if len(query) == 0 {
		return true
	} else if pnode == nil {
		pnode = newPathNode()
		m[prev] = pnode
	} // DON'T use 'else if'!
	if pnode.query == nil {
		pnode.query = make(map[string]map[string]*QueryNode, 1)
	}

	var (
		primary   = pnode.query
		qnode     = &QueryNode{next: primary}
		secondary map[string]*QueryNode
	)
	for _, kv := range query {
		depth++
		if qnode == nil {
			qnode = newQueryNode()
			secondary[prev] = qnode
		}
		if primary = qnode.next; primary == nil {
			primary = make(map[string]map[string]*QueryNode, 1)
			qnode.next = primary
		}
		if secondary = primary[kv.k]; secondary == nil {
			if threshold != nil && len(primary) >= threshold(depth) {
				return false
			}
			secondary = make(map[string]*QueryNode, 1)
			primary[kv.k] = secondary
		}
		if qnode, ok = secondary[kv.v]; !ok {
			if threshold != nil && len(secondary) >= threshold(depth) {
				return false
			}
			secondary[kv.v] = nil
		}
		prev = kv.v
	}
	return true
}