func validateQuery(query string) error { // Parse the query and see whether the resulting trigram query is // non-empty. This is to catch queries like “package:debian”. fakeUrl, err := url.Parse(query) if err != nil { return err } rewritten := search.RewriteQuery(*fakeUrl) log.Printf("rewritten query = %q\n", rewritten.String()) re, err := dcsregexp.Compile(rewritten.Query().Get("q")) if err != nil { return err } indexQuery := index.RegexpQuery(re.Syntax) log.Printf("trigram = %v, sub = %v", indexQuery.Trigram, indexQuery.Sub) if len(indexQuery.Trigram) == 0 && len(indexQuery.Sub) == 0 { return fmt.Errorf("Empty index query") } return nil }
func maybeStartQuery(queryid, src, query string) bool { stateMu.Lock() defer stateMu.Unlock() querystate, running := state[queryid] // XXX: Starting a new query while there may still be clients reading that // query is not a great idea. Best fix may be to make getEvent() use a // querystate instead of the string identifier. if !running || time.Since(querystate.started) > 30*time.Minute { // See if we can garbage-collect old queries. if !running && len(state) >= 10 { log.Printf("Trying to garbage collect queries (currently %d)\n", len(state)) for queryid, s := range state { if len(state) < 10 { break } if !s.done { continue } for _, state := range s.perBackend { state.tempFile.Close() } delete(state, queryid) } log.Printf("Garbage collection done. %d queries remaining", len(state)) } backends := strings.Split(*common.SourceBackends, ",") state[queryid] = queryState{ started: time.Now(), query: query, newEvent: sync.NewCond(&sync.Mutex{}), filesTotal: make([]int, len(backends)), filesProcessed: make([]int, len(backends)), filesMu: &sync.Mutex{}, perBackend: make([]*perBackendState, len(backends)), tempFilesMu: &sync.Mutex{}, } activeQueries.Add(1) var err error dir := filepath.Join(*queryResultsPath, queryid) if err := os.MkdirAll(dir, os.FileMode(0755)); err != nil { log.Printf("[%s] could not create %q: %v\n", queryid, dir, err) failQuery(queryid) return false } // TODO: it’d be so much better if we would correctly handle ESPACE errors // in the code below (and above), but for that we need to carefully test it. ensureEnoughSpaceAvailable() for i := 0; i < len(backends); i++ { state[queryid].filesTotal[i] = -1 path := filepath.Join(dir, fmt.Sprintf("unsorted_%d.capnproto", i)) f, err := os.Create(path) if err != nil { log.Printf("[%s] could not create %q: %v\n", queryid, path, err) failQuery(queryid) return false } state[queryid].perBackend[i] = &perBackendState{ packagePool: stringpool.NewStringPool(), tempFile: f, tempFileWriter: bufio.NewWriterSize(f, 65536), allPackages: make(map[string]bool), } } log.Printf("initial results = %v\n", state[queryid]) // Rewrite the query into a query for source backends. fakeUrl, err := url.Parse("?" + query) if err != nil { log.Fatal(err) } rewritten := search.RewriteQuery(*fakeUrl) type streamingRequest struct { Query string URL string } request := streamingRequest{ Query: rewritten.Query().Get("q"), URL: rewritten.String(), } log.Printf("[%s] querying for %q\n", queryid, request.Query) sourceQuery, err := json.Marshal(&request) if err != nil { log.Fatal(err) } for idx, backend := range backends { go queryBackend(queryid, backend, idx, sourceQuery) } return false } return true }
// maybeStartQuery starts a specified query if that query does not already // exist. Returns whether the query existed and any errors during query // creation. func maybeStartQuery(queryid, src, query string) (bool, error) { if queryExists(queryid) { return true, nil } querystate := queryState{ started: time.Now(), query: query, newEvent: sync.NewCond(&sync.Mutex{}), filesTotal: make([]int, len(common.SourceBackendStubs)), filesProcessed: make([]int, len(common.SourceBackendStubs)), filesMu: &sync.Mutex{}, perBackend: make([]*perBackendState, len(common.SourceBackendStubs)), tempFilesMu: &sync.Mutex{}, } dir := filepath.Join(*queryResultsPath, queryid) if err := os.MkdirAll(dir, os.FileMode(0755)); err != nil { return false, fmt.Errorf("could not create %q: %v", dir, err) } // TODO: it’d be so much better if we would correctly handle ESPACE errors // in the code below (and above), but for that we need to carefully test it. ensureEnoughSpaceAvailable() for i := 0; i < len(common.SourceBackendStubs); i++ { querystate.filesTotal[i] = -1 path := filepath.Join(dir, fmt.Sprintf("unsorted_%d.pb", i)) f, err := os.Create(path) if err != nil { return false, fmt.Errorf("could not create %q: %v", path, err) } querystate.perBackend[i] = &perBackendState{ packagePool: stringpool.NewStringPool(), tempFile: f, tempFileWriter: bufio.NewWriterSize(f, 65536), allPackages: make(map[string]bool), } } log.Printf("querystate = %v\n", querystate) // Rewrite the query into a query for source backends. fakeUrl, err := url.Parse("?" + query) if err != nil { log.Fatal(err) } rewritten := search.RewriteQuery(*fakeUrl) searchRequest := &pb.SearchRequest{ Query: rewritten.Query().Get("q"), RewrittenUrl: rewritten.String(), } log.Printf("[%s] querying for %+v\n", queryid, searchRequest) if err := startQuery(queryid, querystate); err != nil { // Another goroutine must have raced us since we called queryExists(). return true, nil } for idx, backend := range common.SourceBackendStubs { go queryBackend(queryid, src, backend, idx, searchRequest) } return false, nil }