Example #1
0
func mergeDB(newDB, db *gumshoe.DB, parallelism, flushSegments int) error {
	resp := db.MakeRequest()
	defer resp.Done()

	allSegments := findSegments(resp.StaticTable)
	progress := NewProgress("segments processed", len(allSegments))
	progress.Print()
	segments := make(chan *timestampSegment)
	var wg wait.Group
	for i := 0; i < parallelism; i++ {
		wg.Go(func(quit <-chan struct{}) error {
			for {
				select {
				case <-quit:
					return nil
				case segment, ok := <-segments:
					if !ok {
						return nil
					}
					if err := mergeSegment(newDB, db, segment); err != nil {
						return err
					}
					progress.Add(1)
				}
			}
		})
	}

	wg.Go(func(quit <-chan struct{}) error {
		flushSegmentCount := 0
		for _, segment := range allSegments {
			select {
			case <-quit:
				return nil
			default:
				select {
				case <-quit:
					return nil
				case segments <- segment:
					flushSegmentCount++
					if flushSegmentCount == flushSegments {
						flushSegmentCount = 0
						if err := newDB.Flush(); err != nil {
							return err
						}
					}
				}
			}
		}
		close(segments)
		return nil
	})

	err := wg.Wait()
	if err != nil {
		return err
	}
	return newDB.Flush()
}
Example #2
0
func (r *Router) HandleInsert(w http.ResponseWriter, req *http.Request) {
	var rows []gumshoe.RowMap
	if err := json.NewDecoder(req.Body).Decode(&rows); err != nil {
		WriteError(w, err, http.StatusBadRequest)
		return
	}
	Log.Printf("Inserting %d rows", len(rows))

	shardedRows := make([][]gumshoe.RowMap, len(r.Shards))
	for _, row := range rows {
		// Check that the columns match the schema we have
		for col := range row {
			if !r.validColumnName(col) {
				writeInvalidColumnError(w, col)
				return
			}
		}
		shardIdx := r.Hash(row)
		shardedRows[shardIdx] = append(shardedRows[shardIdx], row)
	}
	var wg wait.Group
	for i := range shardedRows {
		i := i
		wg.Go(func(_ <-chan struct{}) error {
			shard := r.Shards[i]
			b, err := json.Marshal(shardedRows[i])
			if err != nil {
				panic("unexpected marshal error")
			}
			shardReq, err := http.NewRequest("PUT", "http://"+shard+"/insert", bytes.NewReader(b))
			if err != nil {
				panic("could not make http request")
			}
			shardReq.Header.Set("Content-Type", "application/json")
			resp, err := r.Client.Do(shardReq)
			if err != nil {
				return err
			}
			defer resp.Body.Close()
			if resp.StatusCode != 200 {
				return NewHTTPError(resp, shard)
			}
			return nil
		})
	}
	if err := wg.Wait(); err != nil {
		WriteError(w, err, http.StatusInternalServerError)
	}
}
Example #3
0
func (r *Router) HandleSingleDimension(w http.ResponseWriter, req *http.Request) {
	name := req.URL.Query().Get(":name")
	if name == "" {
		http.Error(w, "must provide dimension name", http.StatusBadRequest)
		return
	}

	var wg wait.Group
	dimValues := make(map[string]struct{})
	var mu sync.Mutex
	for i := range r.Shards {
		i := i
		wg.Go(func(_ <-chan struct{}) error {
			shard := r.Shards[i]
			resp, err := r.Client.Get("http://" + shard + "/dimension_tables/" + name)
			if err != nil {
				return err
			}
			defer resp.Body.Close()
			if resp.StatusCode != 200 {
				return NewHTTPError(resp, shard)
			}
			var result []string
			if err := json.NewDecoder(resp.Body).Decode(&result); err != nil {
				return err
			}
			mu.Lock()
			for _, s := range result {
				dimValues[s] = struct{}{}
			}
			mu.Unlock()
			return nil
		})
	}
	if err := wg.Wait(); err != nil {
		WriteError(w, err, http.StatusInternalServerError)
		return
	}

	var results []string
	for s := range dimValues {
		results = append(results, s)
	}
	sort.Strings(results)
	WriteJSONResponse(w, results)
}
Example #4
0
func (r *Router) HandleQuery(w http.ResponseWriter, req *http.Request) {
	start := time.Now()
	queryID := randomID() // used to make tracking a single query throught he logs easier
	if req.URL.Query().Get("format") != "" {
		WriteError(w, errors.New("non-standard query formats not supported"), 500)
		return
	}
	query, err := gumshoe.ParseJSONQuery(req.Body)
	if err != nil {
		WriteError(w, err, http.StatusBadRequest)
		return
	}
	Log.Printf("[%s] got query: %s", queryID, query)
	for _, agg := range query.Aggregates {
		if agg.Type == gumshoe.AggregateAvg {
			// TODO(caleb): Handle as described in the doc.
			WriteError(w, errors.New("average aggregates not handled by the router"), 500)
			return
		}
		if !r.validColumnName(agg.Column) {
			writeInvalidColumnError(w, agg.Column)
			return
		}
	}
	for _, grouping := range query.Groupings {
		if !r.validColumnName(grouping.Column) {
			writeInvalidColumnError(w, grouping.Column)
			return
		}
	}
	for _, filter := range query.Filters {
		if !r.validColumnName(filter.Column) {
			writeInvalidColumnError(w, filter.Column)
			return
		}
	}
	b, err := json.Marshal(query)
	if err != nil {
		panic("unexpected marshal error")
	}
	var (
		wg     wait.Group
		mu     sync.Mutex // protects result, resultMap
		result []gumshoe.RowMap
		// rest only for grouping case
		groupingCol        string
		groupingColIntConv bool
		resultMap          = make(map[interface{}]*lockedRowMap)
	)
	if len(query.Groupings) > 0 {
		groupingCol = query.Groupings[0].Name
		groupingColIntConv = r.convertColumnToIntegral(query.Groupings[0].Column)
	}
	for i := range r.Shards {
		i := i
		wg.Go(func(_ <-chan struct{}) error {
			shard := r.Shards[i]
			url := "http://" + shard + "/query?format=stream"
			resp, err := r.Client.Post(url, "application/json", bytes.NewReader(b))
			if err != nil {
				return err
			}
			defer resp.Body.Close()
			if resp.StatusCode != 200 {
				return NewHTTPError(resp, shard)
			}

			decoder := json.NewDecoder(resp.Body)
			var m map[string]int
			if err := decoder.Decode(&m); err != nil {
				return err
			}

			if len(query.Groupings) == 0 {
				// non-grouping case
				row := make(gumshoe.RowMap)
				if err := decoder.Decode(&row); err != nil {
					return err
				}
				mu.Lock()
				if len(result) == 0 {
					result = []gumshoe.RowMap{row}
				} else {
					r.mergeRows(result[0], row, query)
				}
				mu.Unlock()
				if err := decoder.Decode(&row); err != io.EOF {
					if err == nil {
						return errors.New("got multiple results for a non-group-by query")
					}
					return err
				}
				return nil
			}

			// grouping case
			var rowSize int
			for {
				row := make(gumshoe.RowMap, rowSize)
				if err := decoder.Decode(&row); err != nil {
					if err == io.EOF {
						break
					}
					return err
				}
				rowSize = len(row)
				groupByValue := row[groupingCol]
				if groupingColIntConv && groupByValue != nil {
					groupByValue = int64(groupByValue.(float64))
				}
				mu.Lock()
				cur := resultMap[groupByValue]
				if cur == nil {
					resultMap[groupByValue] = &lockedRowMap{row: row}
					mu.Unlock()
					continue
				}
				// downgrade lock
				cur.mu.Lock()
				mu.Unlock()
				r.mergeRows(cur.row, row, query)
				cur.mu.Unlock()
			}
			return nil
		})
	}
	if err := wg.Wait(); err != nil {
		WriteError(w, err, http.StatusInternalServerError)
		return
	}

	// For the grouping case, we need to flatten the results from resultMap.
	if len(query.Groupings) > 0 {
		for _, lr := range resultMap {
			result = append(result, lr.row)
		}
	}

	Log.Printf("[%s] fetched and merged query results from %d shards in %s (%d combined rows)",
		queryID, len(r.Shards), time.Since(start), len(result))

	WriteJSONResponse(w, Result{
		Results:    result,
		DurationMS: int(time.Since(start).Seconds() * 1000),
	})
}