Esempio n. 1
0
func validateQuery(query string) error {
	// Parse the query and see whether the resulting trigram query is
	// non-empty. This is to catch queries like “package:debian”.
	fakeUrl, err := url.Parse(query)
	if err != nil {
		return err
	}
	rewritten := search.RewriteQuery(*fakeUrl)
	log.Printf("rewritten query = %q\n", rewritten.String())
	re, err := dcsregexp.Compile(rewritten.Query().Get("q"))
	if err != nil {
		return err
	}
	indexQuery := index.RegexpQuery(re.Syntax)
	log.Printf("trigram = %v, sub = %v", indexQuery.Trigram, indexQuery.Sub)
	if len(indexQuery.Trigram) == 0 && len(indexQuery.Sub) == 0 {
		return fmt.Errorf("Empty index query")
	}
	return nil
}
Esempio n. 2
0
func maybeStartQuery(queryid, src, query string) bool {
	stateMu.Lock()
	defer stateMu.Unlock()
	querystate, running := state[queryid]
	// XXX: Starting a new query while there may still be clients reading that
	// query is not a great idea. Best fix may be to make getEvent() use a
	// querystate instead of the string identifier.
	if !running || time.Since(querystate.started) > 30*time.Minute {
		// See if we can garbage-collect old queries.
		if !running && len(state) >= 10 {
			log.Printf("Trying to garbage collect queries (currently %d)\n", len(state))
			for queryid, s := range state {
				if len(state) < 10 {
					break
				}
				if !s.done {
					continue
				}
				for _, state := range s.perBackend {
					state.tempFile.Close()
				}
				delete(state, queryid)
			}
			log.Printf("Garbage collection done. %d queries remaining", len(state))
		}
		backends := strings.Split(*common.SourceBackends, ",")
		state[queryid] = queryState{
			started:        time.Now(),
			query:          query,
			newEvent:       sync.NewCond(&sync.Mutex{}),
			filesTotal:     make([]int, len(backends)),
			filesProcessed: make([]int, len(backends)),
			filesMu:        &sync.Mutex{},
			perBackend:     make([]*perBackendState, len(backends)),
			tempFilesMu:    &sync.Mutex{},
		}

		activeQueries.Add(1)

		var err error
		dir := filepath.Join(*queryResultsPath, queryid)
		if err := os.MkdirAll(dir, os.FileMode(0755)); err != nil {
			log.Printf("[%s] could not create %q: %v\n", queryid, dir, err)
			failQuery(queryid)
			return false
		}

		// TODO: it’d be so much better if we would correctly handle ESPACE errors
		// in the code below (and above), but for that we need to carefully test it.
		ensureEnoughSpaceAvailable()

		for i := 0; i < len(backends); i++ {
			state[queryid].filesTotal[i] = -1
			path := filepath.Join(dir, fmt.Sprintf("unsorted_%d.capnproto", i))
			f, err := os.Create(path)
			if err != nil {
				log.Printf("[%s] could not create %q: %v\n", queryid, path, err)
				failQuery(queryid)
				return false
			}
			state[queryid].perBackend[i] = &perBackendState{
				packagePool:    stringpool.NewStringPool(),
				tempFile:       f,
				tempFileWriter: bufio.NewWriterSize(f, 65536),
				allPackages:    make(map[string]bool),
			}
		}
		log.Printf("initial results = %v\n", state[queryid])

		// Rewrite the query into a query for source backends.
		fakeUrl, err := url.Parse("?" + query)
		if err != nil {
			log.Fatal(err)
		}
		rewritten := search.RewriteQuery(*fakeUrl)
		type streamingRequest struct {
			Query string
			URL   string
		}
		request := streamingRequest{
			Query: rewritten.Query().Get("q"),
			URL:   rewritten.String(),
		}
		log.Printf("[%s] querying for %q\n", queryid, request.Query)
		sourceQuery, err := json.Marshal(&request)
		if err != nil {
			log.Fatal(err)
		}

		for idx, backend := range backends {
			go queryBackend(queryid, backend, idx, sourceQuery)
		}
		return false
	}

	return true
}
Esempio n. 3
0
// maybeStartQuery starts a specified query if that query does not already
// exist. Returns whether the query existed and any errors during query
// creation.
func maybeStartQuery(queryid, src, query string) (bool, error) {
	if queryExists(queryid) {
		return true, nil
	}

	querystate := queryState{
		started:        time.Now(),
		query:          query,
		newEvent:       sync.NewCond(&sync.Mutex{}),
		filesTotal:     make([]int, len(common.SourceBackendStubs)),
		filesProcessed: make([]int, len(common.SourceBackendStubs)),
		filesMu:        &sync.Mutex{},
		perBackend:     make([]*perBackendState, len(common.SourceBackendStubs)),
		tempFilesMu:    &sync.Mutex{},
	}

	dir := filepath.Join(*queryResultsPath, queryid)
	if err := os.MkdirAll(dir, os.FileMode(0755)); err != nil {
		return false, fmt.Errorf("could not create %q: %v", dir, err)
	}

	// TODO: it’d be so much better if we would correctly handle ESPACE errors
	// in the code below (and above), but for that we need to carefully test it.
	ensureEnoughSpaceAvailable()

	for i := 0; i < len(common.SourceBackendStubs); i++ {
		querystate.filesTotal[i] = -1
		path := filepath.Join(dir, fmt.Sprintf("unsorted_%d.pb", i))
		f, err := os.Create(path)
		if err != nil {
			return false, fmt.Errorf("could not create %q: %v", path, err)
		}
		querystate.perBackend[i] = &perBackendState{
			packagePool:    stringpool.NewStringPool(),
			tempFile:       f,
			tempFileWriter: bufio.NewWriterSize(f, 65536),
			allPackages:    make(map[string]bool),
		}
	}
	log.Printf("querystate = %v\n", querystate)

	// Rewrite the query into a query for source backends.
	fakeUrl, err := url.Parse("?" + query)
	if err != nil {
		log.Fatal(err)
	}
	rewritten := search.RewriteQuery(*fakeUrl)
	searchRequest := &pb.SearchRequest{
		Query:        rewritten.Query().Get("q"),
		RewrittenUrl: rewritten.String(),
	}
	log.Printf("[%s] querying for %+v\n", queryid, searchRequest)
	if err := startQuery(queryid, querystate); err != nil {
		// Another goroutine must have raced us since we called queryExists().
		return true, nil
	}
	for idx, backend := range common.SourceBackendStubs {
		go queryBackend(queryid, src, backend, idx, searchRequest)
	}
	return false, nil
}