// Creates and returns a new MatchRunner if possible, or a relevant error if // not. func NewMatchRunner(filter, signer string) (matcher *MatchRunner, err error) { var spec *message.MatcherSpecification if spec, err = message.CreateMatcherSpecification(filter); err != nil { return } matcher = &MatchRunner{ spec: spec, signer: signer, inChan: make(chan *PipelinePack, Globals().PluginChanSize), } return }
// Creates and returns a new MatchRunner if possible, or a relevant error if // not. func NewMatchRunner(filter, signer string, runner PluginRunner, chanSize int, matchChan chan *PipelinePack) (matcher *MatchRunner, err error) { var spec *message.MatcherSpecification if spec, err = message.CreateMatcherSpecification(filter); err != nil { return } retry, _ := NewRetryHelper(RetryOptions{ MaxDelay: "1s", Delay: "50ms", MaxRetries: -1, }) matcher = &MatchRunner{ spec: spec, signer: signer, inChan: make(chan *PipelinePack, chanSize), matchChan: matchChan, pluginRunner: runner, retry: retry, } return }
func main() { flagMatch := flag.String("match", "TRUE", "message_matcher filter expression") flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]") flagOutput := flag.String("output", "", "output filename, defaults to stdout") flagTail := flag.Bool("tail", false, "don't exit on EOF") flagOffset := flag.Int64("offset", 0, "starting offset for the input file in bytes") flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes") flag.Parse() if flag.NArg() != 1 { flag.PrintDefaults() os.Exit(1) } if *flagMaxMessageSize < math.MaxUint32 { maxSize := uint32(*flagMaxMessageSize) message.SetMaxMessageSize(maxSize) } else { fmt.Printf("Message size is too large: %d\n", flagMaxMessageSize) os.Exit(8) } var err error var match *message.MatcherSpecification if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil { fmt.Printf("Match specification - %s\n", err) os.Exit(2) } var file *os.File if file, err = os.Open(flag.Arg(0)); err != nil { fmt.Printf("%s\n", err) os.Exit(3) } defer file.Close() var out *os.File if "" == *flagOutput { out = os.Stdout } else { if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { fmt.Printf("%s\n", err) os.Exit(4) } defer out.Close() } var offset int64 if offset, err = file.Seek(*flagOffset, 0); err != nil { fmt.Printf("%s\n", err) os.Exit(5) } sRunner, err := makeSplitterRunner() if err != nil { fmt.Println(err) os.Exit(7) } msg := new(message.Message) var processed, matched int64 fmt.Printf("Input:%s Offset:%d Match:%s Format:%s Tail:%t Output:%s\n", flag.Arg(0), *flagOffset, *flagMatch, *flagFormat, *flagTail, *flagOutput) for true { n, record, err := sRunner.GetRecordFromStream(file) if n > 0 && n != len(record) { fmt.Printf("Corruption detected at offset: %d bytes: %d\n", offset, n-len(record)) } if err != nil { if err == io.EOF { if !*flagTail || "count" == *flagFormat { break } time.Sleep(time.Duration(500) * time.Millisecond) } else { break } } else { if len(record) > 0 { processed += 1 headerLen := int(record[1]) + message.HEADER_FRAMING_SIZE if err = proto.Unmarshal(record[headerLen:], msg); err != nil { fmt.Printf("Error unmarshalling message at offset: %d error: %s\n", offset, err) continue } if !match.Match(msg) { continue } matched += 1 switch *flagFormat { case "count": // no op case "json": contents, _ := json.Marshal(msg) fmt.Fprintf(out, "%s\n", contents) case "heka": fmt.Fprintf(out, "%s", record) default: fmt.Fprintf(out, "Timestamp: %s\n"+ "Type: %s\n"+ "Hostname: %s\n"+ "Pid: %d\n"+ "UUID: %s\n"+ "Logger: %s\n"+ "Payload: %s\n"+ "EnvVersion: %s\n"+ "Severity: %d\n"+ "Fields: %+v\n\n", time.Unix(0, msg.GetTimestamp()), msg.GetType(), msg.GetHostname(), msg.GetPid(), msg.GetUuidString(), msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(), msg.GetSeverity(), msg.Fields) } } } offset += int64(n) } fmt.Printf("Processed: %d, matched: %d messages\n", processed, matched) if err != nil { fmt.Printf("%s\n", err) os.Exit(6) } }
func TestInjectMessage(t *testing.T) { var sbc SandboxConfig tests := []string{ "lua types", "cloudwatch metric", "external reference", "array only", "private keys", "special characters", "message field all types", "internal reference", } outputs := []string{ `{"value":1}1.2 string nil true false`, `{"StatisticValues":[{"Minimum":0,"SampleCount":0,"Sum":0,"Maximum":0},{"Minimum":0,"SampleCount":0,"Sum":0,"Maximum":0}],"Dimensions":[{"Name":"d1","Value":"v1"},{"Name":"d2","Value":"v2"}],"MetricName":"example","Timestamp":0,"Value":0,"Unit":"s"}`, `{"a":{"y":2,"x":1}}`, `[1,2,3]`, `{"x":1,"_m":1,"_private":[1,2]}`, `{"special\tcharacters":"\"\t\r\n\b\f\\\/"}`, "\x10\x80\x94\xeb\xdc\x03\x52\x13\x0a\x06\x6e\x75\x6d\x62\x65\x72\x10\x03\x39\x00\x00\x00\x00\x00\x00\xf0\x3f\x52\x2c\x0a\x07\x6e\x75\x6d\x62\x65\x72\x73\x10\x03\x1a\x05\x63\x6f\x75\x6e\x74\x3a\x18\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x08\x40\x52\x0e\x0a\x05\x62\x6f\x6f\x6c\x73\x10\x04\x42\x03\x01\x00\x00\x52\x0a\x0a\x04\x62\x6f\x6f\x6c\x10\x04\x40\x01\x52\x10\x0a\x06\x73\x74\x72\x69\x6e\x67\x22\x06\x73\x74\x72\x69\x6e\x67\x52\x15\x0a\x07\x73\x74\x72\x69\x6e\x67\x73\x22\x02\x73\x31\x22\x02\x73\x32\x22\x02\x73\x33", `{"y":[2],"x":[1,2,3],"ir":[1,2,3]}`, } if false { // lua jit values outputs[1] = `{"Timestamp":0,"Value":0,"StatisticValues":[{"SampleCount":0,"Sum":0,"Maximum":0,"Minimum":0},{"SampleCount":0,"Sum":0,"Maximum":0,"Minimum":0}],"Unit":"s","MetricName":"example","Dimensions":[{"Name":"d1","Value":"v1"},{"Name":"d2","Value":"v2"}]}` } sbc.ScriptFilename = "./testsupport/inject_message.lua" sbc.MemoryLimit = 100000 sbc.InstructionLimit = 1000 sbc.OutputLimit = 8000 pack := getTestPack() sb, err := lua.CreateLuaSandbox(&sbc) if err != nil { t.Errorf("%s", err) } err = sb.Init("", "") if err != nil { t.Errorf("%s", err) } cnt := 0 sb.InjectMessage(func(p, pt, pn string) int { if len(pt) == 0 { // no type is a Heka protobuf message if p[18:] != outputs[cnt] { // ignore the UUID t.Errorf("Output is incorrect, expected: \"%x\" received: \"%x\"", outputs[cnt], p[18:]) } } else { if p != outputs[cnt] { t.Errorf("Output is incorrect, expected: \"%s\" received: \"%s\"", outputs[cnt], p) } } if cnt == 6 { msg := new(message.Message) err := proto.Unmarshal([]byte(p), msg) if err != nil { t.Errorf("%s", err) } if msg.GetTimestamp() != 1e9 { t.Errorf("Timestamp expected %d received %d", int(1e9), pack.Message.GetTimestamp()) } if field := msg.FindFirstField("numbers"); field != nil { if field.GetRepresentation() != "count" { t.Errorf("'numbers' representation expected count received %s", 1e9, field.GetRepresentation()) } } else { t.Errorf("'numbers' field not found") } tests := []string{ "Timestamp == 1000000000", "Fields[number] == 1", "Fields[numbers][0][0] == 1 && Fields[numbers][0][1] == 2 && Fields[numbers][0][2] == 3", "Fields[string] == 'string'", "Fields[strings][0][0] == 's1' && Fields[strings][0][1] == 's2' && Fields[strings][0][2] == 's3'", "Fields[bool] == TRUE", "Fields[bools][0][0] == TRUE && Fields[bools][0][1] == FALSE && Fields[bools][0][2] == FALSE", } for _, v := range tests { ms, _ := message.CreateMatcherSpecification(v) match := ms.Match(msg) if !match { t.Errorf("Test failed %s", v) } } } cnt++ return 0 }) for _, v := range tests { pack.Message.SetPayload(v) r := sb.ProcessMessage(pack) if r != 0 { t.Errorf("ProcessMessage should return 0, received %d %s", r, sb.LastError()) } } sb.Destroy("") if cnt != len(tests) { t.Errorf("InjectMessage was called %d times, expected %d", cnt, len(tests)) } }
func main() { flagMatch := flag.String("match", "TRUE", "message_matcher filter expression") flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]") flagOutput := flag.String("output", "", "output filename, defaults to stdout") flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin") flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name") flagAWSKey := flag.String("aws-key", "", "AWS Key") flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key") flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region") flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes") flagWorkers := flag.Uint64("workers", 16, "number of parallel workers") flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection") flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete") flag.Parse() if !*flagStdin && flag.NArg() < 1 { flag.PrintDefaults() os.Exit(1) } if *flagMaxMessageSize < math.MaxUint32 { maxSize := uint32(*flagMaxMessageSize) message.SetMaxMessageSize(maxSize) } else { fmt.Fprintf(os.Stderr, "Message size is too large: %d\n", flagMaxMessageSize) os.Exit(8) } workers := 1 if *flagWorkers == 0 { fmt.Fprintf(os.Stderr, "Cannot run with zero workers. Using 1.\n") } else if *flagWorkers < 2000 { workers = int(*flagWorkers) } else { fmt.Fprintf(os.Stderr, "Too many workers: %d. Use a reasonable value (up to a few hundred).\n", flagWorkers) os.Exit(8) } var connectTimeout uint32 if *flagConnectTimeout < math.MaxUint32 { connectTimeout = uint32(*flagConnectTimeout) } else { fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout) os.Exit(8) } var readTimeout uint32 if *flagReadTimeout < math.MaxUint32 { readTimeout = uint32(*flagReadTimeout) } else { fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout) os.Exit(8) } var err error var match *message.MatcherSpecification if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil { fmt.Fprintf(os.Stderr, "Match specification - %s\n", err) os.Exit(2) } var out *os.File if "" == *flagOutput { out = os.Stdout } else { if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil { fmt.Fprintf(os.Stderr, "%s\n", err) os.Exit(3) } defer out.Close() } auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now()) if err != nil { fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err) os.Exit(4) } region, ok := aws.Regions[*flagAWSRegion] if !ok { fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n") os.Exit(5) } s := s3.New(auth, region) if connectTimeout > 0 { s.ConnectTimeout = time.Duration(connectTimeout) * time.Second } if readTimeout > 0 { s.ReadTimeout = time.Duration(readTimeout) * time.Second } bucket := s.Bucket(*flagBucket) filenameChannel := make(chan string, 1000) recordChannel := make(chan s3splitfile.S3Record, 1000) doneChannel := make(chan string, 1000) allDone := make(chan int) for i := 1; i <= workers; i++ { go cat(bucket, filenameChannel, recordChannel, doneChannel) } go save(recordChannel, match, *flagFormat, out, allDone) startTime := time.Now().UTC() totalFiles := 0 pendingFiles := 0 if *flagStdin { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { filename := scanner.Text() totalFiles++ pendingFiles++ filenameChannel <- filename if pendingFiles >= 1000 { waitFor(doneChannel, 1) pendingFiles-- } } close(filenameChannel) } else { for _, filename := range flag.Args() { totalFiles++ pendingFiles++ filenameChannel <- filename if pendingFiles >= 1000 { waitFor(doneChannel, 1) pendingFiles-- } } close(filenameChannel) } fmt.Fprintf(os.Stderr, "Waiting for last %d files\n", pendingFiles) waitFor(doneChannel, pendingFiles) close(recordChannel) bytesRead := <-allDone // All done! Win! duration := time.Now().UTC().Sub(startTime).Seconds() mb := float64(bytesRead) / 1024.0 / 1024.0 if duration == 0.0 { duration = 1.0 } fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration)) }