func (r *Router) HandleInsert(w http.ResponseWriter, req *http.Request) { var rows []gumshoe.RowMap if err := json.NewDecoder(req.Body).Decode(&rows); err != nil { WriteError(w, err, http.StatusBadRequest) return } Log.Printf("Inserting %d rows", len(rows)) shardedRows := make([][]gumshoe.RowMap, len(r.Shards)) for _, row := range rows { // Check that the columns match the schema we have for col := range row { if !r.validColumnName(col) { writeInvalidColumnError(w, col) return } } shardIdx := r.Hash(row) shardedRows[shardIdx] = append(shardedRows[shardIdx], row) } var wg wait.Group for i := range shardedRows { i := i wg.Go(func(_ <-chan struct{}) error { shard := r.Shards[i] b, err := json.Marshal(shardedRows[i]) if err != nil { panic("unexpected marshal error") } shardReq, err := http.NewRequest("PUT", "http://"+shard+"/insert", bytes.NewReader(b)) if err != nil { panic("could not make http request") } shardReq.Header.Set("Content-Type", "application/json") resp, err := r.Client.Do(shardReq) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return NewHTTPError(resp, shard) } return nil }) } if err := wg.Wait(); err != nil { WriteError(w, err, http.StatusInternalServerError) } }
func (r *Router) HandleSingleDimension(w http.ResponseWriter, req *http.Request) { name := req.URL.Query().Get(":name") if name == "" { http.Error(w, "must provide dimension name", http.StatusBadRequest) return } var wg wait.Group dimValues := make(map[string]struct{}) var mu sync.Mutex for i := range r.Shards { i := i wg.Go(func(_ <-chan struct{}) error { shard := r.Shards[i] resp, err := r.Client.Get("http://" + shard + "/dimension_tables/" + name) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return NewHTTPError(resp, shard) } var result []string if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { return err } mu.Lock() for _, s := range result { dimValues[s] = struct{}{} } mu.Unlock() return nil }) } if err := wg.Wait(); err != nil { WriteError(w, err, http.StatusInternalServerError) return } var results []string for s := range dimValues { results = append(results, s) } sort.Strings(results) WriteJSONResponse(w, results) }
func mergeDB(newDB, db *gumshoe.DB, parallelism, flushSegments int) error { resp := db.MakeRequest() defer resp.Done() allSegments := findSegments(resp.StaticTable) progress := NewProgress("segments processed", len(allSegments)) progress.Print() segments := make(chan *timestampSegment) var wg wait.Group for i := 0; i < parallelism; i++ { wg.Go(func(quit <-chan struct{}) error { for { select { case <-quit: return nil case segment, ok := <-segments: if !ok { return nil } if err := mergeSegment(newDB, db, segment); err != nil { return err } progress.Add(1) } } }) } wg.Go(func(quit <-chan struct{}) error { flushSegmentCount := 0 for _, segment := range allSegments { select { case <-quit: return nil default: select { case <-quit: return nil case segments <- segment: flushSegmentCount++ if flushSegmentCount == flushSegments { flushSegmentCount = 0 if err := newDB.Flush(); err != nil { return err } } } } } close(segments) return nil }) err := wg.Wait() if err != nil { return err } return newDB.Flush() }
func (r *Router) HandleQuery(w http.ResponseWriter, req *http.Request) { start := time.Now() queryID := randomID() // used to make tracking a single query throught he logs easier if req.URL.Query().Get("format") != "" { WriteError(w, errors.New("non-standard query formats not supported"), 500) return } query, err := gumshoe.ParseJSONQuery(req.Body) if err != nil { WriteError(w, err, http.StatusBadRequest) return } Log.Printf("[%s] got query: %s", queryID, query) for _, agg := range query.Aggregates { if agg.Type == gumshoe.AggregateAvg { // TODO(caleb): Handle as described in the doc. WriteError(w, errors.New("average aggregates not handled by the router"), 500) return } if !r.validColumnName(agg.Column) { writeInvalidColumnError(w, agg.Column) return } } for _, grouping := range query.Groupings { if !r.validColumnName(grouping.Column) { writeInvalidColumnError(w, grouping.Column) return } } for _, filter := range query.Filters { if !r.validColumnName(filter.Column) { writeInvalidColumnError(w, filter.Column) return } } b, err := json.Marshal(query) if err != nil { panic("unexpected marshal error") } var ( wg wait.Group mu sync.Mutex // protects result, resultMap result []gumshoe.RowMap // rest only for grouping case groupingCol string groupingColIntConv bool resultMap = make(map[interface{}]*lockedRowMap) ) if len(query.Groupings) > 0 { groupingCol = query.Groupings[0].Name groupingColIntConv = r.convertColumnToIntegral(query.Groupings[0].Column) } for i := range r.Shards { i := i wg.Go(func(_ <-chan struct{}) error { shard := r.Shards[i] url := "http://" + shard + "/query?format=stream" resp, err := r.Client.Post(url, "application/json", bytes.NewReader(b)) if err != nil { return err } defer resp.Body.Close() if resp.StatusCode != 200 { return NewHTTPError(resp, shard) } decoder := json.NewDecoder(resp.Body) var m map[string]int if err := decoder.Decode(&m); err != nil { return err } if len(query.Groupings) == 0 { // non-grouping case row := make(gumshoe.RowMap) if err := decoder.Decode(&row); err != nil { return err } mu.Lock() if len(result) == 0 { result = []gumshoe.RowMap{row} } else { r.mergeRows(result[0], row, query) } mu.Unlock() if err := decoder.Decode(&row); err != io.EOF { if err == nil { return errors.New("got multiple results for a non-group-by query") } return err } return nil } // grouping case var rowSize int for { row := make(gumshoe.RowMap, rowSize) if err := decoder.Decode(&row); err != nil { if err == io.EOF { break } return err } rowSize = len(row) groupByValue := row[groupingCol] if groupingColIntConv && groupByValue != nil { groupByValue = int64(groupByValue.(float64)) } mu.Lock() cur := resultMap[groupByValue] if cur == nil { resultMap[groupByValue] = &lockedRowMap{row: row} mu.Unlock() continue } // downgrade lock cur.mu.Lock() mu.Unlock() r.mergeRows(cur.row, row, query) cur.mu.Unlock() } return nil }) } if err := wg.Wait(); err != nil { WriteError(w, err, http.StatusInternalServerError) return } // For the grouping case, we need to flatten the results from resultMap. if len(query.Groupings) > 0 { for _, lr := range resultMap { result = append(result, lr.row) } } Log.Printf("[%s] fetched and merged query results from %d shards in %s (%d combined rows)", queryID, len(r.Shards), time.Since(start), len(result)) WriteJSONResponse(w, Result{ Results: result, DurationMS: int(time.Since(start).Seconds() * 1000), }) }