func main() { flagMatch := flag.String("match", "TRUE", "message_matcher filter expression") flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]") flagOutput := flag.String("output", "", "output filename, defaults to stdout") flagTail := flag.Bool("tail", false, "don't exit on EOF") flagOffset := flag.Int64("offset", 0, "starting offset for the input file in bytes") flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes") flag.Parse() if flag.NArg() != 1 { flag.PrintDefaults() os.Exit(1) } if *flagMaxMessageSize < math.MaxUint32 { maxSize := uint32(*flagMaxMessageSize) message.SetMaxMessageSize(maxSize) } else { fmt.Printf("Message size is too large: %d\n", flagMaxMessageSize) os.Exit(8) } var err error var match *message.MatcherSpecification if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil { fmt.Printf("Match specification - %s\n", err) os.Exit(2) } var file *os.File if file, err = os.Open(flag.Arg(0)); err != nil { fmt.Printf("%s\n", err) os.Exit(3) } defer file.Close() var out *os.File if "" == *flagOutput { out = os.Stdout } else { if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { fmt.Printf("%s\n", err) os.Exit(4) } defer out.Close() } var offset int64 if offset, err = file.Seek(*flagOffset, 0); err != nil { fmt.Printf("%s\n", err) os.Exit(5) } sRunner, err := makeSplitterRunner() if err != nil { fmt.Println(err) os.Exit(7) } msg := new(message.Message) var processed, matched int64 fmt.Printf("Input:%s Offset:%d Match:%s Format:%s Tail:%t Output:%s\n", flag.Arg(0), *flagOffset, *flagMatch, *flagFormat, *flagTail, *flagOutput) for true { n, record, err := sRunner.GetRecordFromStream(file) if n > 0 && n != len(record) { fmt.Printf("Corruption detected at offset: %d bytes: %d\n", offset, n-len(record)) } if err != nil { if err == io.EOF { if !*flagTail || "count" == *flagFormat { break } time.Sleep(time.Duration(500) * time.Millisecond) } else { break } } else { if len(record) > 0 { processed += 1 headerLen := int(record[1]) + message.HEADER_FRAMING_SIZE if err = proto.Unmarshal(record[headerLen:], msg); err != nil { fmt.Printf("Error unmarshalling message at offset: %d error: %s\n", offset, err) continue } if !match.Match(msg) { continue } matched += 1 switch *flagFormat { case "count": // no op case "json": contents, _ := json.Marshal(msg) fmt.Fprintf(out, "%s\n", contents) case "heka": fmt.Fprintf(out, "%s", record) default: fmt.Fprintf(out, "Timestamp: %s\n"+ "Type: %s\n"+ "Hostname: %s\n"+ "Pid: %d\n"+ "UUID: %s\n"+ "Logger: %s\n"+ "Payload: %s\n"+ "EnvVersion: %s\n"+ "Severity: %d\n"+ "Fields: %+v\n\n", time.Unix(0, msg.GetTimestamp()), msg.GetType(), msg.GetHostname(), msg.GetPid(), msg.GetUuidString(), msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(), msg.GetSeverity(), msg.Fields) } } } offset += int64(n) } fmt.Printf("Processed: %d, matched: %d messages\n", processed, matched) if err != nil { fmt.Printf("%s\n", err) os.Exit(6) } }
// Save matching client records locally to the given output file in the given // format. func save(recordChannel <-chan s3splitfile.S3Record, match *message.MatcherSpecification, format string, out *os.File, done chan<- int) { processed := 0 matched := 0 bytes := 0 msg := new(message.Message) ok := true for ok { r, ok := <-recordChannel if !ok { // Channel is closed done <- bytes break } bytes += len(r.Record) processed += 1 headerLen := int(r.Record[1]) + message.HEADER_FRAMING_SIZE messageBytes := r.Record[headerLen:] unsnappy, decodeErr := snappy.Decode(nil, messageBytes) if decodeErr == nil { messageBytes = unsnappy } if err := proto.Unmarshal(messageBytes, msg); err != nil { fmt.Fprintf(os.Stderr, "Error unmarshalling message %d in %s, error: %s\n", processed, r.Key, err) continue } if !match.Match(msg) { continue } matched += 1 switch format { case "count": // no op case "json": contents, _ := json.Marshal(msg) fmt.Fprintf(out, "%s\n", contents) case "heka": fmt.Fprintf(out, "%s", r.Record) case "offsets": // Use offsets mode for indexing the S3 files by clientId clientId, ok := msg.GetFieldValue("clientId") recordLength := len(r.Record) - headerLen if ok { fmt.Fprintf(out, "%s\t%s\t%d\t%d\n", r.Key, clientId, (r.Offset + uint64(headerLen)), recordLength) } else { fmt.Fprintf(os.Stderr, "Missing client id in %s @ %d+%d\n", r.Key, r.Offset, recordLength) } default: fmt.Fprintf(out, "Timestamp: %s\n"+ "Type: %s\n"+ "Hostname: %s\n"+ "Pid: %d\n"+ "UUID: %s\n"+ "Logger: %s\n"+ "Payload: %s\n"+ "EnvVersion: %s\n"+ "Severity: %d\n"+ "Fields: %+v\n\n", time.Unix(0, msg.GetTimestamp()), msg.GetType(), msg.GetHostname(), msg.GetPid(), msg.GetUuidString(), msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(), msg.GetSeverity(), msg.Fields) } } fmt.Fprintf(os.Stderr, "Processed: %d, matched: %d messages (%.2f MB)\n", processed, matched, (float64(bytes) / 1024.0 / 1024.0)) }