func (p *postInfo) FullOriginal() string { if p.HasOriginal() { var u = url.URL{Path: p.FullFile() + "/" + p.Original} return u.EscapedPath() } return p.FullFile() }
func (c *chain) match(URL *url.URL) bool { path := strings.Split(URL.EscapedPath(), "/") lenPath := len(path) query := URL.Query() if c.lenPattern > lenPath { return false } if c.pattern[c.lenPattern-1] != "*" && c.lenPattern < lenPath { return false } for key, value := range c.pattern { if len(value) == 0 { if len(path[key]) == 0 { continue } return false } if value[0] == ':' { query.Add(value[1:], path[key]) continue } if value[0] != '*' && value != path[key] { return false } } URL.RawQuery = query.Encode() return true }
func (s *SQLStore) Complete(u *url.URL) (err error) { tx, err := s.DB.Beginx() if err != nil { return } defer func() { if err != nil { tx.Rollback() // TODO: handle error } else { err = tx.Commit() } }() if _, err = tx.Exec(` UPDATE url SET done = TRUE WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`, u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(), ); err != nil { return } _, err = tx.Exec(`UPDATE count SET finish_count = finish_count + 1`) return }
func (s *SQLStore) UpdateFunc(u *url.URL, f func(*crawler.URL)) (err error) { tx, err := s.DB.Beginx() if err != nil { return err } defer func() { if err != nil { tx.Rollback() } else { err = tx.Commit() } }() var w wrapper if err = tx.QueryRowx( `SELECT * FROM url WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`, u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(), ).StructScan(&w); err != nil { return } uu := w.ToURL() f(uu) w.fromURL(uu) _, err = s.DB.NamedExec(` UPDATE url SET num_error = :num_error, num_visit = :num_visit, last = :last, status = :status WHERE scheme = :scheme AND host = :host AND path = :path AND query = :query`, w) return }
func (s *SQLStore) GetDepth(u *url.URL) (depth int, err error) { err = s.DB.QueryRow( `SELECT depth FROM url WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`, u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(), ).Scan(&depth) return }
func (p *pattern) Match(u *url.URL) bool { us, uh, up := u.String(), u.Host, u.EscapedPath() dir, file := path.Split(up) f := matchString return f(us, p.Reject, p.Accept) && f(uh, p.ExcludeHost, p.Host) && f(dir, p.ExcludeDir, p.Dir) && f(file, p.ExcludeFile, p.File) }
func appFromDeisRemote(remote *url.URL) (string, error) { re := regexp.MustCompile("^/([a-zA-Z0-9-_.]+).git") matches := re.FindStringSubmatch(remote.EscapedPath()) if len(matches) == 0 { return "", ErrRemoteNotApp } return string(matches[1]), nil }
func TestExpand(t *testing.T) { for i, test := range expandTests { u := url.URL{ Path: test.in, } Expand(&u, test.expansions) got := u.EscapedPath() if got != test.want { t.Errorf("got %q expected %q in test %d", got, test.want, i+1) } } }
// Has searches the trie to check whether there are similar URLs. It will // return true either the number of children of some node on the lookup // path is greater than or equal to the threshold, or an exact match is // found. func (t *Trie) Has(u *url.URL, threshold func(depth int) int) bool { depth := 0 pnode := &t.root segments := strings.Split(u.EscapedPath(), "/") // Consider github.com/{user}. If the number of users is equal to // threshold, github.com/someone-stored/{repo} should still be enabled. for _, seg := range segments[1:] { depth++ if pnode == nil || pnode.child == nil { return false } child, ok := pnode.child[seg] if !ok { if threshold != nil && len(pnode.child) >= threshold(depth) { return true } return false } pnode = child } query := sorted(u.Query()) if len(query) == 0 { return true } else if pnode == nil { return false } primary := pnode.query qnode := &QueryNode{next: primary} for _, kv := range query { depth++ if qnode == nil { return false } else if primary = qnode.next; primary == nil { return false } secondary := primary[kv.k] if secondary == nil { return false } var ok bool qnode, ok = secondary[kv.v] if !ok { if threshold != nil && len(secondary) >= threshold(depth) { return true } return false } } // Totally match return true }
func (s *SQLStore) GetFunc(u *url.URL, f func(*crawler.URL)) error { var w wrapper if err := s.DB.QueryRowx( `SELECT * FROM url WHERE scheme = $1 AND host = $2 AND path = $3 AND query = $4`, u.Scheme, u.Host, u.EscapedPath(), u.Query().Encode(), ).StructScan(&w); err != nil { return err } f(w.ToURL()) return nil }
func getURIPath(u *url.URL) string { var uri string if len(u.Opaque) > 0 { uri = "/" + strings.Join(strings.Split(u.Opaque, "/")[3:], "/") } else { uri = u.EscapedPath() } if len(uri) == 0 { uri = "/" } return uri }
func (d *Downloader) genPath(u *url.URL) string { pth := u.EscapedPath() if strings.HasSuffix(pth, "/") { pth += "index.html" } else if path.Ext(pth) == "" { pth += "/index.html" } if u.RawQuery != "" { pth += "?" + u.Query().Encode() } return filepath.Join( d.Dir, u.Host, filepath.FromSlash(path.Clean(pth)), ) }
func (p *pattern) MatchPart(u *url.URL, part int) bool { us, uh, up := u.String(), u.Host, u.EscapedPath() dir, file := path.Split(up) f := matchString switch part { case PartURL: return f(us, p.Reject, p.Accept) case PartHost: return f(uh, p.ExcludeHost, p.Host) case PartDir: return f(dir, p.ExcludeDir, p.Dir) case PartFile: return f(file, p.ExcludeFile, p.File) } return false }
// SBSgetServersByDatacenter returns a list of servers associated with data center @dc. func (c *Client) SBSgetServersByDatacenter(dc string) (res []string, err error) { var u = url.URL{Path: fmt.Sprintf("datacenters/%s/servers", dc)} err = c.getSBSResponse("GET", u.EscapedPath(), nil, &res) return }
func escapeForUrl(q string) string { u := url.URL{Path: q} return strings.Replace(u.EscapedPath(), "/", "%2f", -1) }
// Add adds a URL to the trie. It will cancel and return false if the // number of children of some node on the path exceeds the threshold // computed using the depth of the node. The depth of root node is 0. func (t *Trie) Add(u *url.URL, threshold func(depth int) int) bool { var ( depth = 0 pnode = &t.root segments = strings.Split(u.EscapedPath(), "/") m map[string]*PathNode prev string ok bool ) for _, seg := range segments[1:] { depth++ if pnode == nil { pnode = newPathNode() m[prev] = pnode } if m = pnode.child; m == nil { m = make(map[string]*PathNode, 1) pnode.child = m } if pnode, ok = m[seg]; !ok { if threshold != nil && len(m) >= threshold(depth) { return false } m[seg] = nil } prev = seg } query := sorted(u.Query()) if len(query) == 0 { return true } else if pnode == nil { pnode = newPathNode() m[prev] = pnode } // DON'T use 'else if'! if pnode.query == nil { pnode.query = make(map[string]map[string]*QueryNode, 1) } var ( primary = pnode.query qnode = &QueryNode{next: primary} secondary map[string]*QueryNode ) for _, kv := range query { depth++ if qnode == nil { qnode = newQueryNode() secondary[prev] = qnode } if primary = qnode.next; primary == nil { primary = make(map[string]map[string]*QueryNode, 1) qnode.next = primary } if secondary = primary[kv.k]; secondary == nil { if threshold != nil && len(primary) >= threshold(depth) { return false } secondary = make(map[string]*QueryNode, 1) primary[kv.k] = secondary } if qnode, ok = secondary[kv.v]; !ok { if threshold != nil && len(secondary) >= threshold(depth) { return false } secondary[kv.v] = nil } prev = kv.v } return true }