// TODO: handle "no such file" func (input *S3SplitFileInput) readS3File(runner pipeline.InputRunner, d *pipeline.Deliverer, sr *pipeline.SplitterRunner, s3Key string) (err error) { runner.LogMessage(fmt.Sprintf("Preparing to read: %s", s3Key)) if input.bucket == nil { runner.LogMessage(fmt.Sprintf("Dude, where's my bucket: %s", s3Key)) return } var lastGoodOffset uint64 var attempt uint32 RetryS3: for attempt = 1; attempt <= input.S3Retries; attempt++ { for r := range S3FileIterator(input.bucket, s3Key, lastGoodOffset) { record := r.Record err := r.Err if err != nil && err != io.EOF { runner.LogError(fmt.Errorf("Error in attempt %d reading %s at offset %d: %s", attempt, s3Key, lastGoodOffset, err)) atomic.AddInt64(&input.processMessageFailures, 1) continue RetryS3 } if len(record) > 0 { lastGoodOffset += uint64(r.BytesRead) atomic.AddInt64(&input.processMessageCount, 1) atomic.AddInt64(&input.processMessageBytes, int64(len(record))) (*sr).DeliverRecord(record, *d) } } break } return }
func (rpsi *RedisPubSubInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) error { var ( dRunner pipeline.DecoderRunner decoder pipeline.Decoder pack *pipeline.PipelinePack e error ok bool ) // Get the InputRunner's chan to receive empty PipelinePacks packSupply := ir.InChan() if rpsi.conf.DecoderName != "" { if dRunner, ok = h.DecoderRunner(rpsi.conf.DecoderName, fmt.Sprintf("%s-%s", ir.Name(), rpsi.conf.DecoderName)); !ok { return fmt.Errorf("Decoder not found: %s", rpsi.conf.DecoderName) } decoder = dRunner.Decoder() } //Connect to the channel psc := redis.PubSubConn{Conn: rpsi.conn} psc.PSubscribe(rpsi.conf.Channel) for { switch n := psc.Receive().(type) { case redis.PMessage: // Grab an empty PipelinePack from the InputRunner pack = <-packSupply pack.Message.SetType("redis_pub_sub") pack.Message.SetLogger(n.Channel) pack.Message.SetPayload(string(n.Data)) pack.Message.SetTimestamp(time.Now().UnixNano()) var packs []*pipeline.PipelinePack if decoder == nil { packs = []*pipeline.PipelinePack{pack} } else { packs, e = decoder.Decode(pack) } if packs != nil { for _, p := range packs { ir.Inject(p) } } else { if e != nil { ir.LogError(fmt.Errorf("Couldn't parse Redis message: %s", n.Data)) } pack.Recycle(nil) } case redis.Subscription: ir.LogMessage(fmt.Sprintf("Subscription: %s %s %d\n", n.Kind, n.Channel, n.Count)) if n.Count == 0 { return errors.New("No channel to subscribe") } case error: fmt.Printf("error: %v\n", n) return n } } return nil }
func (s *SandboxInput) Run(ir pipeline.InputRunner, h pipeline.PluginHelper) (err error) { s.sb.InjectMessage(func(payload, payload_type, payload_name string) int { pack := <-ir.InChan() if err := proto.Unmarshal([]byte(payload), pack.Message); err != nil { pack.Recycle() return 1 } if s.tz != time.UTC { const layout = "2006-01-02T15:04:05.999999999" // remove the incorrect UTC tz info t := time.Unix(0, pack.Message.GetTimestamp()) t = t.In(time.UTC) ct, _ := time.ParseInLocation(layout, t.Format(layout), s.tz) pack.Message.SetTimestamp(ct.UnixNano()) } ir.Inject(pack) atomic.AddInt64(&s.processMessageCount, 1) atomic.AddInt64(&s.processMessageBytes, int64(len(payload))) return 0 }) ticker := ir.Ticker() for true { retval := s.sb.ProcessMessage(nil) if retval <= 0 { // Sandbox is in polling mode if retval < 0 { atomic.AddInt64(&s.processMessageFailures, 1) em := s.sb.LastError() if len(em) > 0 { ir.LogError(errors.New(em)) } } if ticker == nil { ir.LogMessage("single run completed") break } select { // block until stop or poll interval case <-s.stopChan: case <-ticker: } } else { // Sandbox is shutting down em := s.sb.LastError() if !strings.HasSuffix(em, "shutting down") { ir.LogError(errors.New(em)) } break } } s.reportLock.Lock() if s.sbc.PreserveData { err = s.sb.Destroy(s.preservationFile) } else { err = s.sb.Destroy("") } s.sb = nil s.reportLock.Unlock() return }
func (input *S3SplitFileInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // Begin listing the files (either straight from S3 or from a cache) // Write matching filenames on a "lister" channel // Read from the lister channel: // - fetch the filename // - read records from it // - write them to a "reader" channel var ( wg sync.WaitGroup i uint32 ) wg.Add(1) go func() { runner.LogMessage("Starting S3 list") iteratorLoop: for r := range S3Iterator(input.bucket, input.S3BucketPrefix, input.schema) { select { case <-input.stop: runner.LogMessage("Stopping S3 list") break iteratorLoop default: } if r.Err != nil { runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err)) } else { basename := r.Key.Key[strings.LastIndex(r.Key.Key, "/")+1:] if input.objectMatch == nil || input.objectMatch.MatchString(basename) { runner.LogMessage(fmt.Sprintf("Found: %s", r.Key.Key)) input.listChan <- r.Key.Key } else { runner.LogMessage(fmt.Sprintf("Skipping: %s", r.Key.Key)) } } } // All done listing, close the channel runner.LogMessage("All done listing. Closing channel") close(input.listChan) wg.Done() }() // Run a pool of concurrent readers. for i = 0; i < input.S3WorkerCount; i++ { wg.Add(1) go input.fetcher(runner, &wg, i) } wg.Wait() return nil }
func (input *S3SplitFileInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) { var ( s3Key string startTime time.Time duration float64 ) fetcherName := fmt.Sprintf("S3Reader%d", workerId) deliverer := runner.NewDeliverer(fetcherName) defer deliverer.Done() splitterRunner := runner.NewSplitterRunner(fetcherName) ok := true for ok { select { case s3Key, ok = <-input.listChan: if !ok { // Channel is closed => we're shutting down, exit cleanly. // runner.LogMessage("Fetcher all done! shutting down.") break } startTime = time.Now().UTC() err := input.readS3File(runner, &deliverer, &splitterRunner, s3Key) atomic.AddInt64(&input.processFileCount, 1) leftovers := splitterRunner.GetRemainingData() lenLeftovers := len(leftovers) if lenLeftovers > 0 { atomic.AddInt64(&input.processFileDiscardedBytes, int64(lenLeftovers)) runner.LogError(fmt.Errorf("Trailing data, possible corruption: %d bytes left in stream at EOF: %s", lenLeftovers, s3Key)) } if err != nil && err != io.EOF { runner.LogError(fmt.Errorf("Error reading %s: %s", s3Key, err)) atomic.AddInt64(&input.processFileFailures, 1) continue } duration = time.Now().UTC().Sub(startTime).Seconds() runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", s3Key, duration)) case <-input.stop: for _ = range input.listChan { // Drain the channel without processing the files. // Technically the S3Iterator can still add one back on to the // channel but this ensures there is room so it won't block. } ok = false } } wg.Done() }
func (input *S3OffsetInput) fetcher(runner pipeline.InputRunner, wg *sync.WaitGroup, workerId uint32) { var ( loc MessageLocation startTime time.Time duration float64 headers map[string][]string record []byte err error ) headers = map[string][]string{ "Range": []string{""}, } fetcherName := fmt.Sprintf("S3Reader%d", workerId) deliverer := runner.NewDeliverer(fetcherName) defer deliverer.Done() splitterRunner := runner.NewSplitterRunner(fetcherName) ok := true for ok { select { case loc, ok = <-input.offsetChan: if !ok { // Channel is closed => we're shutting down, exit cleanly. runner.LogMessage("Fetcher all done! shutting down.") break } startTime = time.Now().UTC() // Read one message from the given location headers["Range"][0] = fmt.Sprintf("bytes=%d-%d", loc.Offset, loc.Offset+loc.Length-1) atomic.AddInt64(&input.processMessageCount, 1) atomic.AddInt64(&input.processMessageBytes, int64(loc.Length)) for attempt := uint32(1); attempt <= input.S3Retries; attempt++ { record, err = getClientRecord(input.bucket, &loc, headers) if err != nil { runner.LogMessage(fmt.Sprintf("Error #%d fetching %s @ %d+%d: %s\n", attempt, loc.Key, loc.Offset, loc.Length, err)) } else { break } } if err != nil { atomic.AddInt64(&input.processMessageFailures, 1) continue } splitterRunner.DeliverRecord(record, deliverer) duration = time.Now().UTC().Sub(startTime).Seconds() runner.LogMessage(fmt.Sprintf("Successfully fetched %s in %.2fs ", loc.Key, duration)) case <-input.stop: runner.LogMessage("Stopping fetcher...") for _ = range input.offsetChan { // Drain the channel without processing anything. } ok = false } } wg.Done() }
func (input *S3OffsetInput) Run(runner pipeline.InputRunner, helper pipeline.PluginHelper) error { // List offset metadata index files // For each index D >= start and <= end // Read index D // Write offsets for any desired clients to offsetChan // Meanwhile, for each item in offsetChan // Go fetch that record, inject resulting message into pipeline. var ( wg sync.WaitGroup i uint32 emptySchema Schema ) if input.metaFileName != "" { wg.Add(1) go func() { reader, err := os.Open(input.metaFileName) if err != nil { runner.LogMessage(fmt.Sprintf("Error opening metadata file '%s': %s", input.metaFileName, err)) } defer reader.Close() err = input.parseMessageLocations(reader, input.metaFileName) if err != nil { runner.LogMessage(fmt.Sprintf("Error reading metadata: %s", err)) } // All done with metadata, close the channel runner.LogMessage("All done with metadata. Closing channel") close(input.offsetChan) wg.Done() }() } else if input.metaBucket != nil { wg.Add(1) go func() { runner.LogMessage("Starting S3 list") iteratorLoop: for r := range S3Iterator(input.metaBucket, input.S3MetaBucketPrefix, emptySchema) { select { case <-input.stop: runner.LogMessage("Stopping S3 list") break iteratorLoop default: } if r.Err != nil { runner.LogError(fmt.Errorf("Error getting S3 list: %s", r.Err)) } else { base := path.Base(r.Key.Key)[0:8] // Check if r is in the desired date range. if base >= input.StartDate && base <= input.EndDate { err := input.grep(r) if err != nil { runner.LogMessage(fmt.Sprintf("Error reading index: %s", err)) } } } } // All done listing, close the channel runner.LogMessage("All done listing. Closing channel") close(input.offsetChan) wg.Done() }() } else { runner.LogMessage("Nothing to do, no metadata available. Closing channel") close(input.offsetChan) wg.Done() } // Run a pool of concurrent readers. for i = 0; i < input.S3WorkerCount; i++ { wg.Add(1) go input.fetcher(runner, &wg, i) } wg.Wait() return nil }