func (ds *Datastore) JoinBlock(ctx context.Context, block *Block) (*Block, error) { ds.blocksLock.Lock() defer ds.blocksLock.Unlock() var lastB *Block for _, b := range ds.blocks { if b.EndKey() < block.EndKey() && (lastB == nil || b.EndKey() > lastB.EndKey()) { lastB = b continue } } if lastB == nil { return nil, fmt.Errorf("Unable to find block before %s", block.EndKey()) } openinstrument.Logf(ctx, "Found previous block %s: %s", lastB.ID(), lastB.EndKey()) openinstrument.Logf(ctx, "Copying %d streams from %s to %s", lastB.NumStreams(), lastB.ID(), block.ID()) r, err := lastB.GetAllStreams(ctx) if err != nil { return nil, fmt.Errorf("Unable to read prior block: %s", err) } block.AddStreams(r) openinstrument.Logf(ctx, "Deleting old block %s", lastB.ID()) if err := lastB.Delete(); err != nil { openinstrument.Logf(ctx, "Unable to delete old block file: %s", err) } delete(ds.blocks, lastB.EndKey()) defer block.Flush() return block, nil }
func (ds *Datastore) readBlockHeader(ctx context.Context, filename string) { block := NewBlock(ctx, "", BlockIDFromFilename(filename), ds.Path) file, err := protofile.Read(block.Filename()) if err != nil { openinstrument.Logf(ctx, "Error opening proto log file %s: %s", block.Filename(), err) return } defer file.Close() if n, err := file.Read(block.Block.Header); n < 1 || err != nil { openinstrument.Logf(ctx, "Block file %s has a corrupted header: %s\n", block.Filename(), err) return } if block.Block.Header.Version != 2 { openinstrument.Logf(ctx, "Block file %s has incorrect version identifier '%v'\n", block.Filename(), block.Block.Header.Version) return } block.Block.EndKey = block.Block.Header.EndKey if block.EndKey() == "" { openinstrument.Logf(ctx, "Block %s does not have an end key, ignoring", block.Filename()) return } // Update cached number of streams and values block.UpdateIndexedCount() ds.insertBlock(ctx, block) openinstrument.Logf(ctx, "Read block %s containing %d streams\n", block.ID(), len(block.Block.Header.Index)) }
func (block *Block) CompactRequired(ctx context.Context) bool { if block.Block.LoggedStreams > 10000 { openinstrument.Logf(ctx, "Block %s has %d log streams, compacting", block, block.Block.LoggedStreams) return true } if block.Block.LoggedValues > maxLogValues { openinstrument.Logf(ctx, "Block %s has %d log values, compacting", block, block.Block.LoggedValues) return true } return false }
func (block *Block) SplitRequired(ctx context.Context) bool { if block.Block.IndexedStreams <= 1 { return false } if block.Block.IndexedStreams > splitPointStreams { openinstrument.Logf(ctx, "Block %s has %d indexed streams, splitting", block, block.Block.IndexedStreams) return true } if block.Block.IndexedValues >= splitPointValues { openinstrument.Logf(ctx, "Block %s has %d indexed values, splitting", block, block.Block.IndexedValues) return true } return false }
func (block *Block) getIndexedStream(ctx context.Context, index *oproto.BlockHeaderIndex) *oproto.ValueStream { file, err := protofile.Read(block.Filename()) if err != nil { if !os.IsNotExist(err) { openinstrument.Logf(ctx, "Can't read block file %s: %s\n", block, err) } return nil } defer file.Close() stream := &oproto.ValueStream{} if n, err := file.ReadAt(int64(index.Offset), stream); n < 1 || err != nil { openinstrument.Logf(ctx, "Couldn't read ValueStream at %s:%d: %s", block, index.Offset, err) return nil } return stream }
func NewBlock(ctx context.Context, endKey, id, dsPath string) *Block { if id == "" { u, err := uuid.NewV4() if err != nil { openinstrument.Logf(ctx, "Error generating UUID for new datastore block filename: %s", err) return nil } id = u.String() } return &Block{ LogStreams: make(map[string]*oproto.ValueStream, 0), NewStreams: make([]*oproto.ValueStream, 0), dsPath: dsPath, Block: &oproto.Block{ Header: &oproto.BlockHeader{ Version: uint32(2), Index: make([]*oproto.BlockHeaderIndex, 0), }, Id: id, EndKey: endKey, State: oproto.Block_UNKNOWN, Node: store_config.Get().GetTaskName(), }, } }
func (block *Block) RunLengthEncodeStreams(ctx context.Context, streams map[string]*oproto.ValueStream) map[string]*oproto.ValueStream { // Run-length encode all streams in parallel var sl sync.Mutex var outputValues int wg := &sync.WaitGroup{} newStreams := make(map[string]*oproto.ValueStream, 0) for _, stream := range streams { wg.Add(1) go func(stream *oproto.ValueStream) { defer wg.Done() // Sort values by timestamp value.By(func(a, b *oproto.Value) bool { return a.Timestamp < b.Timestamp }).Sort(stream.Value) // Run-length encode values stream = rle.Encode(stream) sl.Lock() newStreams[variable.ProtoToString(stream.Variable)] = stream outputValues += len(stream.Value) sl.Unlock() }(stream) } wg.Wait() openinstrument.Logf(ctx, "Run-length encoded %d streams to %d", len(newStreams), outputValues) return newStreams }
// readBlocks opens up every block file, reading it to determine the block layout. // This is only called once when the datastore is opened the first time. func (ds *Datastore) readBlocks(ctx context.Context) bool { startTime := time.Now() names, err := openinstrument.ReadDirNames(ds.Path) if err != nil { openinstrument.Logf(ctx, "Can't read existing blocks: %s\n", err) return false } // Index all the outstanding recordlogs in parallel waitgroup := new(sync.WaitGroup) for _, filename := range names { if matched, _ := regexp.MatchString("^block\\..+$", filename); matched { if matched, _ := regexp.MatchString("\\.(log|new\\.[0-9]+)$", filename); matched { continue } waitgroup.Add(1) go func(filename string) { defer waitgroup.Done() ds.readBlockHeader(ctx, filename) }(filename) } } waitgroup.Wait() waitgroup = new(sync.WaitGroup) for _, filename := range names { if matched, _ := regexp.MatchString("^block\\..+\\.log$", filename); matched { waitgroup.Add(1) go func(filename string) { defer waitgroup.Done() ds.readBlockLog(ctx, filename) }(filename) } } waitgroup.Wait() for _, block := range ds.Blocks() { block.SetState(ctx, oproto.Block_LIVE) block.UpdateSize() } openinstrument.Logf(ctx, "Read all datastore blocks in %v", time.Since(startTime)) return true }
func (ds *Datastore) background(ctx context.Context) { // Background processing of blocks flush_tick := time.Tick(5 * time.Second) compact_tick := time.Tick(1 * time.Minute) for { select { case <-ctx.Done(): log.Println("Context complete, closing background goroutine") return case <-flush_tick: ds.Flush() case <-compact_tick: logCtx, l := openinstrument.GetContextWithLog(ctx) for _, block := range ds.Blocks() { // Compact any blocks that need it if block.CompactRequired(logCtx) { if err := block.Compact(logCtx); err != nil { openinstrument.Logf(logCtx, "Error compacting block: %s\n", err) } } // Split any blocks that need it if block.SplitRequired(logCtx) { if _, _, err := ds.SplitBlock(logCtx, block); err != nil { openinstrument.Logf(logCtx, "Error splitting block: %s\n", err) } openinstrument.Logf(logCtx, "Finished splitting block %s", block) } } if len(l.Log) > 0 { log.Printf("Compact tick log:\n%s", openinstrument.StringLog(logCtx)) } } } }
// Reader builds a channel that will return streams for a supplied Variable. // If min/maxTimestamp are not nil, streams will only be returned if SOME values inside the stream match. // The supplied variable may be a search or a single. // The streams returned may be out of order with respect to variable names or timestamps. func (ds *Datastore) Reader(ctx context.Context, v *variable.Variable) <-chan *oproto.ValueStream { varName := v.String() openinstrument.Logf(ctx, "Creating Reader for %s between %d and %d\n", varName, v.MinTimestamp, v.MaxTimestamp) out := make(chan *oproto.ValueStream, 100) go func() { defer close(out) ds.blocksLock.RLock() defer ds.blocksLock.RUnlock() for _, block := range ds.blocks { for stream := range block.Reader(ctx, v) { out <- stream } } }() return out }
// Writer builds a channel that can accept ValueStreams for writing to the datastore. // Any ValueStreams written to this channel will eventually be flushed to disk, // but they will be immediately available for use. // The writes to disk are not guaranteed until Flush is called. func (ds *Datastore) Writer(ctx context.Context) chan<- *oproto.ValueStream { in := make(chan *oproto.ValueStream, 10) go func() { for stream := range in { // Write this stream varName := variable.ProtoToString(stream.Variable) if block := ds.findBlock(ctx, varName); block != nil { //openinstrument.Logf(ctx, "Writing stream for variable %s to block %s", varName, block.ID()) block.AddStream(stream) } else { openinstrument.Logf(ctx, "Unable to find block to write variable %s", varName) } } }() return in }
func InspectVariable(ctx context.Context, ds *datastore.Datastore, w http.ResponseWriter, req *http.Request) { t, err := template.ParseFiles(fmt.Sprintf("%s/inspect_variable.html", *templatePath)) if err != nil { openinstrument.Logf(ctx, "Couldn't find template file: %s", err) return } type varInfo struct { Name string FirstTimestamp time.Time LastTimestamp time.Time } p := struct { Title string Query string Variables []varInfo }{ Title: "Inspect Variable", Query: req.FormValue("q"), Variables: make([]varInfo, 0), } if p.Query == "" { w.WriteHeader(404) fmt.Fprintf(w, "Specify q=") return } v := variable.NewFromString(p.Query) c := ds.Reader(ctx, v) for stream := range c { lt := stream.Value[len(stream.Value)-1].EndTimestamp if lt == 0 { lt = stream.Value[len(stream.Value)-1].Timestamp } p.Variables = append(p.Variables, varInfo{ Name: variable.ProtoToString(stream.Variable), FirstTimestamp: time.Unix(int64(stream.Value[0].Timestamp/1000), 0), LastTimestamp: time.Unix(int64(lt/1000), 0), }) } err = t.Execute(w, p) if err != nil { log.Println(err) } }
// findBlock gets a datastore block that can have the variable written to. // If one doesn't exist, a new block is created. func (ds *Datastore) findBlock(ctx context.Context, variableName string) *Block { // Search for a block with end key greater than the current key // TODO(dparrish): Binary search for block ds.blocksLock.RLock() for _, key := range ds.blockKeys { if key >= variableName { ds.blocksLock.RUnlock() return ds.blocks[key] } } ds.blocksLock.RUnlock() // Create a new block block := NewBlock(ctx, variableName, "", ds.Path) ds.insertBlock(ctx, block) openinstrument.Logf(ctx, "Creating new block for %s\n", variableName) return block }
func (ds *Datastore) readBlockLog(ctx context.Context, filename string) { block := NewBlock(ctx, "", BlockIDFromFilename(filename), ds.Path) file, err := protofile.Read(block.logFilename()) if err != nil { openinstrument.Logf(ctx, "Error opening proto log file %s: %s", block.logFilename(), err) } defer file.Close() // Read all the streams from the log file reader := file.ValueStreamReader(ctx, 100) for stream := range reader { varName := variable.ProtoToString(stream.Variable) if varName > block.EndKey() { block.Block.EndKey = varName } locker := block.LogWriteLocker() locker.Lock() existingstream, found := block.LogStreams[varName] if found { existingstream.Value = append(existingstream.Value, stream.Value...) } else { block.LogStreams[varName] = stream } locker.Unlock() } if func() *Block { for _, existingblock := range ds.Blocks() { if existingblock.ID() == block.ID() { locker := existingblock.LogWriteLocker() locker.Lock() existingblock.LogStreams = block.LogStreams locker.Unlock() // Update cached number of streams and values existingblock.UpdateLoggedCount() return existingblock } } return nil }() == nil { // There is no existing block file for this log. block.UpdateLoggedCount() ds.insertBlock(ctx, block) } }
// Write writes a map of ValueStreams to a single block file on disk. // The values inside each ValueStream will be sorted and run-length-encoded before writing. func (block *Block) Write(ctx context.Context, streams map[string]*oproto.ValueStream) error { // Build the header with a 0-index for each variable block.Block.Header.Index = []*oproto.BlockHeaderIndex{} block.Block.Header.EndKey = "" block.Block.Header.StartTimestamp = 0 block.Block.Header.EndTimestamp = 0 streams = block.RunLengthEncodeStreams(ctx, streams) for v, stream := range streams { if v > block.Block.Header.EndKey { block.Block.Header.EndKey = v } // Add this stream to the index block.Block.Header.Index = append(block.Block.Header.Index, &oproto.BlockHeaderIndex{ Variable: stream.Variable, Offset: uint64(1), // This must be set non-zero so that the protobuf marshals it to non-empty MinTimestamp: stream.Value[0].Timestamp, MaxTimestamp: stream.Value[len(stream.Value)-1].Timestamp, NumValues: uint32(len(stream.Value)), }) if block.Block.Header.StartTimestamp == 0 || stream.Value[0].Timestamp < block.Block.Header.StartTimestamp { block.Block.Header.StartTimestamp = stream.Value[0].Timestamp } if stream.Value[len(stream.Value)-1].Timestamp > block.Block.Header.EndTimestamp { block.Block.Header.EndTimestamp = stream.Value[len(stream.Value)-1].Timestamp } } // Start writing to the new block file newfilename := fmt.Sprintf("%s.new.%d", block.Filename(), os.Getpid()) newfile, err := protofile.Write(newfilename) if err != nil { newfile.Close() return fmt.Errorf("Can't write to %s: %s\n", newfilename, err) } newfile.Write(block.Block.Header) blockEnd := newfile.Tell() // Write all the ValueStreams indexPos := make(map[string]uint64) var outValues uint32 for _, stream := range streams { indexPos[variable.ProtoToString(stream.Variable)] = uint64(newfile.Tell()) newfile.Write(stream) outValues += uint32(len(stream.Value)) } // Update the offsets in the header, now that all the data has been written for _, index := range block.Block.Header.Index { index.Offset = indexPos[variable.ProtoToString(index.Variable)] } newfile.WriteAt(0, block.Block.Header) if blockEnd < newfile.Tell() { // Sanity check, just in case goprotobuf breaks something again newfile.Close() os.Remove(newfilename) log.Fatalf("Error writing block file %s, header overwrote data", newfilename) } newfile.Sync() newfile.Close() block.UpdateIndexedCount() openinstrument.Logf(ctx, "Wrote %d streams / %d values to %s", len(streams), outValues, newfilename) openinstrument.Logf(ctx, "Block log contains %d stream", len(block.Block.Header.Index)) // Rename the temporary file into place if err := os.Rename(newfilename, block.Filename()); err != nil { return fmt.Errorf("Error renaming: %s", err) } return nil }
// SplitBlock splits a single block into multiple (usually 2) smaller blocks. // The new blocks' contents are immedately written to disk and reopened by the Datatstore. // The old block is removed from disk once the new contents are available. // This will block writes to a block for the duration of the reindexing. func (ds *Datastore) SplitBlock(ctx context.Context, block *Block) (*Block, *Block, error) { defer block.UpdateIndexedCount() defer block.UpdateLoggedCount() defer block.UpdateUnloggedCount() // Compact the block before continuing, to make sure everything is flushed to disk block.Compact(ctx) // Work out the optimal split point splitPoint, leftEndKey := block.GetOptimalSplitPoint(ctx) if splitPoint == 0 { return nil, nil, fmt.Errorf("Could not split block %s: not enough streams", block) } openinstrument.Logf(ctx, "Calculated optimal split point at %d (%s)", splitPoint, leftEndKey) // Read in the whole block leftBlock := NewBlock(ctx, leftEndKey, "", ds.Path) leftStreams := make(map[string]*oproto.ValueStream) rightStreams := make(map[string]*oproto.ValueStream) streams, err := block.GetIndexedStreams(ctx) if err != nil { return nil, nil, fmt.Errorf("Couldn't read old block file: %s", err) } var leftError, rightError error func() { locker := block.LogWriteLocker() locker.Lock() defer locker.Unlock() for stream := range streams { varName := variable.ProtoToString(stream.Variable) if varName <= leftBlock.EndKey() { leftStreams[varName] = stream } else { rightStreams[varName] = stream } } wg := new(sync.WaitGroup) wg.Add(2) go func() { leftError = leftBlock.Write(ctx, leftStreams); wg.Done() }() go func() { rightError = block.Write(ctx, rightStreams); wg.Done() }() wg.Wait() }() if leftError != nil { return nil, nil, fmt.Errorf("Error writing left block: %s", leftError) } if rightError != nil { return nil, nil, fmt.Errorf("Error writing right block: %s", rightError) } ds.insertBlock(ctx, leftBlock) defer leftBlock.UpdateIndexedCount() defer leftBlock.UpdateLoggedCount() defer leftBlock.UpdateUnloggedCount() openinstrument.Logf(ctx, "Split complete, left contains %d streams, right contains %d", len(leftStreams), len(rightStreams)) return leftBlock, block, nil }
func (block *Block) Compact(ctx context.Context) error { openinstrument.Logf(ctx, "Compacting block %s\n", block) startTime := time.Now() // Update cached number of streams and values defer block.UpdateIndexedCount() defer block.UpdateLoggedCount() defer block.UpdateUnloggedCount() block.protoLock.Lock() defer block.protoLock.Unlock() block.Block.State = oproto.Block_COMPACTING block.compactStartTime = time.Now() block.newStreamsLock.Lock() defer block.newStreamsLock.Unlock() block.logLock.Lock() defer block.logLock.Unlock() streams := make(map[string]*oproto.ValueStream, 0) // Apply the retention policy during compaction p, err := store_config.Get().GetRetentionPolicy(ctx) if err != nil { return fmt.Errorf("Error getting retention policy from config store: %s", err) } policy := retentionpolicy.New(&p) endKey := "" appendValues := func(stream *oproto.ValueStream) { if stream.Variable == nil { openinstrument.Logf(ctx, "Skipping reading stream that contains no variable") return } varName := variable.ProtoToString(stream.Variable) out := policy.Apply(stream) if len(out.Value) == 0 { //openinstrument.Logf(ctx, "Dropping stream for variable %s", varName) return } outstream, found := streams[varName] if found { outstream.Value = append(outstream.Value, stream.Value...) } else { streams[varName] = stream } if varName > endKey { endKey = varName } } // Append logged streams for _, stream := range block.LogStreams { appendValues(stream) } openinstrument.Logf(ctx, "Block log contains %d streams", len(streams)) // Append indexed streams reader, err := block.GetIndexedStreams(ctx) if err != nil { openinstrument.Logf(ctx, "Unable to read block: %s", err) } else { for stream := range reader { appendValues(stream) } openinstrument.Logf(ctx, "Compaction read block containing %d streams", len(streams)) } // Append unlogged (new) streams if len(block.NewStreams) > 0 { for _, stream := range block.NewStreams { appendValues(stream) } openinstrument.Logf(ctx, "Compaction added %d unlogged streams, total: %d streams", len(block.NewStreams), len(streams)) } // The end key may have changed if streams have been dropped block.Block.EndKey = endKey if err = block.Write(ctx, streams); err != nil { openinstrument.Logf(ctx, "Error writing: %s", err) return err } // Delete the log file os.Remove(block.logFilename()) openinstrument.Logf(ctx, "Deleted log file %s", block.logFilename()) block.LogStreams = make(map[string]*oproto.ValueStream) block.NewStreams = make([]*oproto.ValueStream, 0) block.compactEndTime = time.Now() block.Block.State = oproto.Block_LIVE block.UpdateSize() openinstrument.Logf(ctx, "Finished compaction of %s in %v", block, time.Since(startTime)) return nil }