Example #1
0
func sendProgressUpdate(stream proto.SourceBackend_SearchServer, connMu *sync.Mutex, filesProcessed, filesTotal int) error {
	connMu.Lock()
	defer connMu.Unlock()
	return stream.Send(&proto.SearchReply{
		Type: proto.SearchReply_PROGRESS_UPDATE,
		ProgressUpdate: &proto.ProgressUpdate{
			FilesProcessed: uint64(filesProcessed),
			FilesTotal:     uint64(filesTotal),
		},
	})
}
Example #2
0
// Reads a single JSON request from the TCP connection, performs the search and
// sends results back over the TCP connection as they appear.
func (s *server) Search(in *proto.SearchRequest, stream proto.SourceBackend_SearchServer) error {
	connMu := new(sync.Mutex)
	logprefix := fmt.Sprintf("[%q]", in.Query)

	// Ask the local index backend for all the filenames.
	resp, err := indexBackend.Files(context.Background(), &proto.FilesRequest{Query: in.Query})
	if err != nil {
		return fmt.Errorf("%s Error querying index backend for query %q: %v\n", logprefix, in.Query, err)
	}

	// Parse the (rewritten) URL to extract all ranking options/keywords.
	rewritten, err := url.Parse(in.RewrittenUrl)
	if err != nil {
		return err
	}
	rankingopts := ranking.RankingOptsFromQuery(rewritten.Query())

	// Rank all the paths.
	files := make(ranking.ResultPaths, 0, len(resp.Path))
	for _, filename := range resp.Path {
		result := ranking.ResultPath{Path: filename}
		result.Rank(&rankingopts)
		if result.Ranking > -1 {
			files = append(files, result)
		}
	}

	// Filter all files that should be excluded.
	files = filterByKeywords(rewritten, files)

	// While not strictly necessary, this will lead to better results being
	// discovered (and returned!) earlier, so let’s spend a few cycles on
	// sorting the list of potential files first.
	sort.Sort(files)

	re, err := regexp.Compile(in.Query)
	if err != nil {
		return fmt.Errorf("%s Could not compile regexp: %v\n", logprefix, err)
	}

	log.Printf("%s regexp = %q, %d possible files\n", logprefix, re, len(files))

	// Send the first progress update so that clients know how many files are
	// going to be searched.
	if err := sendProgressUpdate(stream, connMu, 0, len(files)); err != nil {
		return fmt.Errorf("%s %v\n", logprefix, err)
	}

	// The tricky part here is “flow control”: if we just start grepping like
	// crazy, we will eventually run out of memory because all our writes are
	// blocked on the connection (and the goroutines need to keep the write
	// buffer in memory until the write is done).
	//
	// So instead, we start 1000 worker goroutines and feed them work through a
	// single channel. Due to these goroutines being blocked on writing,
	// the grepping will naturally become slower.
	work := make(chan ranking.ResultPath)
	progress := make(chan int)

	var wg sync.WaitGroup
	// We add the additional 1 for the progress updater goroutine. It also
	// needs to be done before we can return, otherwise it will try to use the
	// (already closed) network connection, which is a fatal error.
	wg.Add(len(files) + 1)

	go func() {
		for _, file := range files {
			work <- file
		}
		close(work)
	}()

	go func() {
		cnt := 0
		errorShown := false
		var lastProgressUpdate time.Time
		progressInterval := 2*time.Second + time.Duration(rand.Int63n(int64(500*time.Millisecond)))
		for cnt < len(files) {
			add := <-progress
			cnt += add

			if time.Since(lastProgressUpdate) > progressInterval {
				if err := sendProgressUpdate(stream, connMu, cnt, len(files)); err != nil {
					if !errorShown {
						log.Printf("%s %v\n", logprefix, err)
						// We need to read the 'progress' channel, so we cannot
						// just exit the loop here. Instead, we suppress all
						// error messages after the first one.
						errorShown = true
					}
				}
				lastProgressUpdate = time.Now()
			}
		}

		if err := sendProgressUpdate(stream, connMu, len(files), len(files)); err != nil {
			log.Printf("%s %v\n", logprefix, err)
		}
		close(progress)

		wg.Done()
	}()

	querystr := ranking.NewQueryStr(in.Query)

	numWorkers := 1000
	if len(files) < 1000 {
		numWorkers = len(files)
	}
	for i := 0; i < numWorkers; i++ {
		go func() {
			re, err := regexp.Compile(in.Query)
			if err != nil {
				log.Printf("%s\n", err)
				return
			}

			grep := regexp.Grep{
				Regexp: re,
				Stdout: os.Stdout,
				Stderr: os.Stderr,
			}

			for file := range work {
				sourcePkgName := file.Path[file.SourcePkgIdx[0]:file.SourcePkgIdx[1]]
				if rankingopts.Pathmatch {
					file.Ranking += querystr.Match(&file.Path)
				}
				if rankingopts.Sourcepkgmatch {
					file.Ranking += querystr.Match(&sourcePkgName)
				}
				if rankingopts.Weighted {
					file.Ranking += 0.1460 * querystr.Match(&file.Path)
					file.Ranking += 0.0008 * querystr.Match(&sourcePkgName)
				}

				// TODO: figure out how to safely clone a dcs/regexp
				matches := grep.File(path.Join(*unpackedPath, file.Path))
				for _, match := range matches {
					match.Ranking = ranking.PostRank(rankingopts, &match, &querystr)
					match.PathRank = file.Ranking
					//match.Path = match.Path[len(*unpackedPath):]
					// NB: populating match.Ranking happens in
					// cmd/dcs-web/querymanager because it depends on at least
					// one other result.

					// TODO: ideally, we’d get proto.Match structs from grep.File(), let’s do that after profiling the decoding performance

					path := match.Path[len(*unpackedPath):]
					connMu.Lock()
					if err := stream.Send(&proto.SearchReply{
						Type: proto.SearchReply_MATCH,
						Match: &proto.Match{
							Path:     path,
							Line:     uint32(match.Line),
							Package:  path[:strings.Index(path, "/")],
							Ctxp2:    match.Ctxp2,
							Ctxp1:    match.Ctxp1,
							Context:  match.Context,
							Ctxn1:    match.Ctxn1,
							Ctxn2:    match.Ctxn2,
							Pathrank: match.PathRank,
							Ranking:  match.Ranking,
						},
					}); err != nil {
						connMu.Unlock()
						log.Printf("%s %v\n", logprefix, err)
						// Drain the work channel, but without doing any work.
						// This effectively exits the worker goroutine(s)
						// cleanly.
						for _ = range work {
						}
						break
					}
					connMu.Unlock()
				}

				progress <- 1

				wg.Done()
			}
		}()
	}

	wg.Wait()

	log.Printf("%s Sent all results.\n", logprefix)
	return nil
}