Example #1
0
func main() {
	flagMatch := flag.String("match", "TRUE", "message_matcher filter expression")
	flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]")
	flagOutput := flag.String("output", "", "output filename, defaults to stdout")
	flagTail := flag.Bool("tail", false, "don't exit on EOF")
	flagOffset := flag.Int64("offset", 0, "starting offset for the input file in bytes")
	flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes")
	flag.Parse()

	if flag.NArg() != 1 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	if *flagMaxMessageSize < math.MaxUint32 {
		maxSize := uint32(*flagMaxMessageSize)
		message.SetMaxMessageSize(maxSize)
	} else {
		fmt.Printf("Message size is too large: %d\n", flagMaxMessageSize)
		os.Exit(8)
	}

	var err error
	var match *message.MatcherSpecification
	if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil {
		fmt.Printf("Match specification - %s\n", err)
		os.Exit(2)
	}

	var file *os.File
	if file, err = os.Open(flag.Arg(0)); err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(3)
	}
	defer file.Close()

	var out *os.File
	if "" == *flagOutput {
		out = os.Stdout
	} else {
		if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
			fmt.Printf("%s\n", err)
			os.Exit(4)
		}
		defer out.Close()
	}

	var offset int64
	if offset, err = file.Seek(*flagOffset, 0); err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(5)
	}

	sRunner, err := makeSplitterRunner()
	if err != nil {
		fmt.Println(err)
		os.Exit(7)
	}
	msg := new(message.Message)
	var processed, matched int64

	fmt.Printf("Input:%s  Offset:%d  Match:%s  Format:%s  Tail:%t  Output:%s\n",
		flag.Arg(0), *flagOffset, *flagMatch, *flagFormat, *flagTail, *flagOutput)
	for true {
		n, record, err := sRunner.GetRecordFromStream(file)
		if n > 0 && n != len(record) {
			fmt.Printf("Corruption detected at offset: %d bytes: %d\n", offset, n-len(record))
		}
		if err != nil {
			if err == io.EOF {
				if !*flagTail || "count" == *flagFormat {
					break
				}
				time.Sleep(time.Duration(500) * time.Millisecond)
			} else {
				break
			}
		} else {
			if len(record) > 0 {
				processed += 1
				headerLen := int(record[1]) + message.HEADER_FRAMING_SIZE
				if err = proto.Unmarshal(record[headerLen:], msg); err != nil {
					fmt.Printf("Error unmarshalling message at offset: %d error: %s\n", offset, err)
					continue
				}

				if !match.Match(msg) {
					continue
				}
				matched += 1

				switch *flagFormat {
				case "count":
					// no op
				case "json":
					contents, _ := json.Marshal(msg)
					fmt.Fprintf(out, "%s\n", contents)
				case "heka":
					fmt.Fprintf(out, "%s", record)
				default:
					fmt.Fprintf(out, "Timestamp: %s\n"+
						"Type: %s\n"+
						"Hostname: %s\n"+
						"Pid: %d\n"+
						"UUID: %s\n"+
						"Logger: %s\n"+
						"Payload: %s\n"+
						"EnvVersion: %s\n"+
						"Severity: %d\n"+
						"Fields: %+v\n\n",
						time.Unix(0, msg.GetTimestamp()), msg.GetType(),
						msg.GetHostname(), msg.GetPid(), msg.GetUuidString(),
						msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(),
						msg.GetSeverity(), msg.Fields)
				}
			}
		}
		offset += int64(n)
	}
	fmt.Printf("Processed: %d, matched: %d messages\n", processed, matched)
	if err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(6)
	}
}
Example #2
0
// Save matching client records locally to the given output file in the given
// format.
func save(recordChannel <-chan s3splitfile.S3Record, match *message.MatcherSpecification, format string, out *os.File, done chan<- int) {
	processed := 0
	matched := 0
	bytes := 0
	msg := new(message.Message)
	ok := true
	for ok {
		r, ok := <-recordChannel
		if !ok {
			// Channel is closed
			done <- bytes
			break
		}

		bytes += len(r.Record)

		processed += 1
		headerLen := int(r.Record[1]) + message.HEADER_FRAMING_SIZE
		messageBytes := r.Record[headerLen:]
		unsnappy, decodeErr := snappy.Decode(nil, messageBytes)
		if decodeErr == nil {
			messageBytes = unsnappy
		}
		if err := proto.Unmarshal(messageBytes, msg); err != nil {
			fmt.Fprintf(os.Stderr, "Error unmarshalling message %d in %s, error: %s\n", processed, r.Key, err)
			continue
		}

		if !match.Match(msg) {
			continue
		}

		matched += 1

		switch format {
		case "count":
			// no op
		case "json":
			contents, _ := json.Marshal(msg)
			fmt.Fprintf(out, "%s\n", contents)
		case "heka":
			fmt.Fprintf(out, "%s", r.Record)
		case "offsets":
			// Use offsets mode for indexing the S3 files by clientId
			clientId, ok := msg.GetFieldValue("clientId")
			recordLength := len(r.Record) - headerLen
			if ok {
				fmt.Fprintf(out, "%s\t%s\t%d\t%d\n", r.Key, clientId, (r.Offset + uint64(headerLen)), recordLength)
			} else {
				fmt.Fprintf(os.Stderr, "Missing client id in %s @ %d+%d\n", r.Key, r.Offset, recordLength)
			}
		default:
			fmt.Fprintf(out, "Timestamp: %s\n"+
				"Type: %s\n"+
				"Hostname: %s\n"+
				"Pid: %d\n"+
				"UUID: %s\n"+
				"Logger: %s\n"+
				"Payload: %s\n"+
				"EnvVersion: %s\n"+
				"Severity: %d\n"+
				"Fields: %+v\n\n",
				time.Unix(0, msg.GetTimestamp()), msg.GetType(),
				msg.GetHostname(), msg.GetPid(), msg.GetUuidString(),
				msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(),
				msg.GetSeverity(), msg.Fields)
		}
	}
	fmt.Fprintf(os.Stderr, "Processed: %d, matched: %d messages (%.2f MB)\n", processed, matched, (float64(bytes) / 1024.0 / 1024.0))
}