Beispiel #1
0
// Creates and returns a new MatchRunner if possible, or a relevant error if
// not.
func NewMatchRunner(filter, signer string) (matcher *MatchRunner, err error) {
	var spec *message.MatcherSpecification
	if spec, err = message.CreateMatcherSpecification(filter); err != nil {
		return
	}
	matcher = &MatchRunner{
		spec:   spec,
		signer: signer,
		inChan: make(chan *PipelinePack, Globals().PluginChanSize),
	}
	return
}
Beispiel #2
0
// Creates and returns a new MatchRunner if possible, or a relevant error if
// not.
func NewMatchRunner(filter, signer string, runner PluginRunner, chanSize int,
	matchChan chan *PipelinePack) (matcher *MatchRunner, err error) {

	var spec *message.MatcherSpecification
	if spec, err = message.CreateMatcherSpecification(filter); err != nil {
		return
	}
	retry, _ := NewRetryHelper(RetryOptions{
		MaxDelay:   "1s",
		Delay:      "50ms",
		MaxRetries: -1,
	})
	matcher = &MatchRunner{
		spec:         spec,
		signer:       signer,
		inChan:       make(chan *PipelinePack, chanSize),
		matchChan:    matchChan,
		pluginRunner: runner,
		retry:        retry,
	}
	return
}
Beispiel #3
0
func main() {
	flagMatch := flag.String("match", "TRUE", "message_matcher filter expression")
	flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]")
	flagOutput := flag.String("output", "", "output filename, defaults to stdout")
	flagTail := flag.Bool("tail", false, "don't exit on EOF")
	flagOffset := flag.Int64("offset", 0, "starting offset for the input file in bytes")
	flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes")
	flag.Parse()

	if flag.NArg() != 1 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	if *flagMaxMessageSize < math.MaxUint32 {
		maxSize := uint32(*flagMaxMessageSize)
		message.SetMaxMessageSize(maxSize)
	} else {
		fmt.Printf("Message size is too large: %d\n", flagMaxMessageSize)
		os.Exit(8)
	}

	var err error
	var match *message.MatcherSpecification
	if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil {
		fmt.Printf("Match specification - %s\n", err)
		os.Exit(2)
	}

	var file *os.File
	if file, err = os.Open(flag.Arg(0)); err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(3)
	}
	defer file.Close()

	var out *os.File
	if "" == *flagOutput {
		out = os.Stdout
	} else {
		if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
			fmt.Printf("%s\n", err)
			os.Exit(4)
		}
		defer out.Close()
	}

	var offset int64
	if offset, err = file.Seek(*flagOffset, 0); err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(5)
	}

	sRunner, err := makeSplitterRunner()
	if err != nil {
		fmt.Println(err)
		os.Exit(7)
	}
	msg := new(message.Message)
	var processed, matched int64

	fmt.Printf("Input:%s  Offset:%d  Match:%s  Format:%s  Tail:%t  Output:%s\n",
		flag.Arg(0), *flagOffset, *flagMatch, *flagFormat, *flagTail, *flagOutput)
	for true {
		n, record, err := sRunner.GetRecordFromStream(file)
		if n > 0 && n != len(record) {
			fmt.Printf("Corruption detected at offset: %d bytes: %d\n", offset, n-len(record))
		}
		if err != nil {
			if err == io.EOF {
				if !*flagTail || "count" == *flagFormat {
					break
				}
				time.Sleep(time.Duration(500) * time.Millisecond)
			} else {
				break
			}
		} else {
			if len(record) > 0 {
				processed += 1
				headerLen := int(record[1]) + message.HEADER_FRAMING_SIZE
				if err = proto.Unmarshal(record[headerLen:], msg); err != nil {
					fmt.Printf("Error unmarshalling message at offset: %d error: %s\n", offset, err)
					continue
				}

				if !match.Match(msg) {
					continue
				}
				matched += 1

				switch *flagFormat {
				case "count":
					// no op
				case "json":
					contents, _ := json.Marshal(msg)
					fmt.Fprintf(out, "%s\n", contents)
				case "heka":
					fmt.Fprintf(out, "%s", record)
				default:
					fmt.Fprintf(out, "Timestamp: %s\n"+
						"Type: %s\n"+
						"Hostname: %s\n"+
						"Pid: %d\n"+
						"UUID: %s\n"+
						"Logger: %s\n"+
						"Payload: %s\n"+
						"EnvVersion: %s\n"+
						"Severity: %d\n"+
						"Fields: %+v\n\n",
						time.Unix(0, msg.GetTimestamp()), msg.GetType(),
						msg.GetHostname(), msg.GetPid(), msg.GetUuidString(),
						msg.GetLogger(), msg.GetPayload(), msg.GetEnvVersion(),
						msg.GetSeverity(), msg.Fields)
				}
			}
		}
		offset += int64(n)
	}
	fmt.Printf("Processed: %d, matched: %d messages\n", processed, matched)
	if err != nil {
		fmt.Printf("%s\n", err)
		os.Exit(6)
	}
}
Beispiel #4
0
func TestInjectMessage(t *testing.T) {
	var sbc SandboxConfig
	tests := []string{
		"lua types",
		"cloudwatch metric",
		"external reference",
		"array only",
		"private keys",
		"special characters",
		"message field all types",
		"internal reference",
	}
	outputs := []string{
		`{"value":1}1.2 string nil true false`,
		`{"StatisticValues":[{"Minimum":0,"SampleCount":0,"Sum":0,"Maximum":0},{"Minimum":0,"SampleCount":0,"Sum":0,"Maximum":0}],"Dimensions":[{"Name":"d1","Value":"v1"},{"Name":"d2","Value":"v2"}],"MetricName":"example","Timestamp":0,"Value":0,"Unit":"s"}`,
		`{"a":{"y":2,"x":1}}`,
		`[1,2,3]`,
		`{"x":1,"_m":1,"_private":[1,2]}`,
		`{"special\tcharacters":"\"\t\r\n\b\f\\\/"}`,
		"\x10\x80\x94\xeb\xdc\x03\x52\x13\x0a\x06\x6e\x75\x6d\x62\x65\x72\x10\x03\x39\x00\x00\x00\x00\x00\x00\xf0\x3f\x52\x2c\x0a\x07\x6e\x75\x6d\x62\x65\x72\x73\x10\x03\x1a\x05\x63\x6f\x75\x6e\x74\x3a\x18\x00\x00\x00\x00\x00\x00\xf0\x3f\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x08\x40\x52\x0e\x0a\x05\x62\x6f\x6f\x6c\x73\x10\x04\x42\x03\x01\x00\x00\x52\x0a\x0a\x04\x62\x6f\x6f\x6c\x10\x04\x40\x01\x52\x10\x0a\x06\x73\x74\x72\x69\x6e\x67\x22\x06\x73\x74\x72\x69\x6e\x67\x52\x15\x0a\x07\x73\x74\x72\x69\x6e\x67\x73\x22\x02\x73\x31\x22\x02\x73\x32\x22\x02\x73\x33",
		`{"y":[2],"x":[1,2,3],"ir":[1,2,3]}`,
	}
	if false { // lua jit values
		outputs[1] = `{"Timestamp":0,"Value":0,"StatisticValues":[{"SampleCount":0,"Sum":0,"Maximum":0,"Minimum":0},{"SampleCount":0,"Sum":0,"Maximum":0,"Minimum":0}],"Unit":"s","MetricName":"example","Dimensions":[{"Name":"d1","Value":"v1"},{"Name":"d2","Value":"v2"}]}`
	}

	sbc.ScriptFilename = "./testsupport/inject_message.lua"
	sbc.MemoryLimit = 100000
	sbc.InstructionLimit = 1000
	sbc.OutputLimit = 8000
	pack := getTestPack()
	sb, err := lua.CreateLuaSandbox(&sbc)
	if err != nil {
		t.Errorf("%s", err)
	}
	err = sb.Init("", "")
	if err != nil {
		t.Errorf("%s", err)
	}
	cnt := 0
	sb.InjectMessage(func(p, pt, pn string) int {
		if len(pt) == 0 { // no type is a Heka protobuf message
			if p[18:] != outputs[cnt] { // ignore the UUID
				t.Errorf("Output is incorrect, expected: \"%x\" received: \"%x\"", outputs[cnt], p[18:])
			}
		} else {
			if p != outputs[cnt] {
				t.Errorf("Output is incorrect, expected: \"%s\" received: \"%s\"", outputs[cnt], p)
			}
		}
		if cnt == 6 {
			msg := new(message.Message)
			err := proto.Unmarshal([]byte(p), msg)
			if err != nil {
				t.Errorf("%s", err)
			}
			if msg.GetTimestamp() != 1e9 {
				t.Errorf("Timestamp expected %d received %d", int(1e9), pack.Message.GetTimestamp())
			}
			if field := msg.FindFirstField("numbers"); field != nil {
				if field.GetRepresentation() != "count" {
					t.Errorf("'numbers' representation expected count received %s", 1e9, field.GetRepresentation())
				}
			} else {
				t.Errorf("'numbers' field not found")
			}
			tests := []string{
				"Timestamp == 1000000000",
				"Fields[number] == 1",
				"Fields[numbers][0][0] == 1 && Fields[numbers][0][1] == 2 && Fields[numbers][0][2] == 3",
				"Fields[string] == 'string'",
				"Fields[strings][0][0] == 's1' && Fields[strings][0][1] == 's2' && Fields[strings][0][2] == 's3'",
				"Fields[bool] == TRUE",
				"Fields[bools][0][0] == TRUE && Fields[bools][0][1] == FALSE && Fields[bools][0][2] == FALSE",
			}
			for _, v := range tests {
				ms, _ := message.CreateMatcherSpecification(v)
				match := ms.Match(msg)
				if !match {
					t.Errorf("Test failed %s", v)
				}
			}
		}
		cnt++
		return 0
	})

	for _, v := range tests {
		pack.Message.SetPayload(v)
		r := sb.ProcessMessage(pack)
		if r != 0 {
			t.Errorf("ProcessMessage should return 0, received %d %s", r, sb.LastError())
		}
	}
	sb.Destroy("")
	if cnt != len(tests) {
		t.Errorf("InjectMessage was called %d times, expected %d", cnt, len(tests))
	}
}
Beispiel #5
0
func main() {
	flagMatch := flag.String("match", "TRUE", "message_matcher filter expression")
	flagFormat := flag.String("format", "txt", "output format [txt|json|heka|count]")
	flagOutput := flag.String("output", "", "output filename, defaults to stdout")
	flagStdin := flag.Bool("stdin", false, "read list of s3 key names from stdin")
	flagBucket := flag.String("bucket", "default-bucket", "S3 Bucket name")
	flagAWSKey := flag.String("aws-key", "", "AWS Key")
	flagAWSSecretKey := flag.String("aws-secret-key", "", "AWS Secret Key")
	flagAWSRegion := flag.String("aws-region", "us-west-2", "AWS Region")
	flagMaxMessageSize := flag.Uint64("max-message-size", 4*1024*1024, "maximum message size in bytes")
	flagWorkers := flag.Uint64("workers", 16, "number of parallel workers")
	flagConnectTimeout := flag.Uint64("connect_timeout", 60, "Max seconds to wait for an S3 connection")
	flagReadTimeout := flag.Uint64("read_timeout", 300, "Max seconds to wait for an S3 file read to complete")
	flag.Parse()

	if !*flagStdin && flag.NArg() < 1 {
		flag.PrintDefaults()
		os.Exit(1)
	}

	if *flagMaxMessageSize < math.MaxUint32 {
		maxSize := uint32(*flagMaxMessageSize)
		message.SetMaxMessageSize(maxSize)
	} else {
		fmt.Fprintf(os.Stderr, "Message size is too large: %d\n", flagMaxMessageSize)
		os.Exit(8)
	}

	workers := 1
	if *flagWorkers == 0 {
		fmt.Fprintf(os.Stderr, "Cannot run with zero workers. Using 1.\n")
	} else if *flagWorkers < 2000 {
		workers = int(*flagWorkers)
	} else {
		fmt.Fprintf(os.Stderr, "Too many workers: %d. Use a reasonable value (up to a few hundred).\n", flagWorkers)
		os.Exit(8)
	}

	var connectTimeout uint32
	if *flagConnectTimeout < math.MaxUint32 {
		connectTimeout = uint32(*flagConnectTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Connection Timeout is too large:%d.\n", flagConnectTimeout)
		os.Exit(8)
	}

	var readTimeout uint32
	if *flagReadTimeout < math.MaxUint32 {
		readTimeout = uint32(*flagReadTimeout)
	} else {
		fmt.Fprintf(os.Stderr, "Read Timeout is too large:%d.\n", flagReadTimeout)
		os.Exit(8)
	}

	var err error
	var match *message.MatcherSpecification
	if match, err = message.CreateMatcherSpecification(*flagMatch); err != nil {
		fmt.Fprintf(os.Stderr, "Match specification - %s\n", err)
		os.Exit(2)
	}

	var out *os.File
	if "" == *flagOutput {
		out = os.Stdout
	} else {
		if out, err = os.OpenFile(*flagOutput, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644); err != nil {
			fmt.Fprintf(os.Stderr, "%s\n", err)
			os.Exit(3)
		}
		defer out.Close()
	}

	auth, err := aws.GetAuth(*flagAWSKey, *flagAWSSecretKey, "", time.Now())
	if err != nil {
		fmt.Fprintf(os.Stderr, "Authentication error: %s\n", err)
		os.Exit(4)
	}
	region, ok := aws.Regions[*flagAWSRegion]
	if !ok {
		fmt.Fprintf(os.Stderr, "Parameter 'aws-region' must be a valid AWS Region\n")
		os.Exit(5)
	}
	s := s3.New(auth, region)
	if connectTimeout > 0 {
		s.ConnectTimeout = time.Duration(connectTimeout) * time.Second
	}
	if readTimeout > 0 {
		s.ReadTimeout = time.Duration(readTimeout) * time.Second
	}
	bucket := s.Bucket(*flagBucket)

	filenameChannel := make(chan string, 1000)
	recordChannel := make(chan s3splitfile.S3Record, 1000)
	doneChannel := make(chan string, 1000)
	allDone := make(chan int)

	for i := 1; i <= workers; i++ {
		go cat(bucket, filenameChannel, recordChannel, doneChannel)
	}
	go save(recordChannel, match, *flagFormat, out, allDone)

	startTime := time.Now().UTC()
	totalFiles := 0
	pendingFiles := 0
	if *flagStdin {
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			filename := scanner.Text()
			totalFiles++
			pendingFiles++
			filenameChannel <- filename
			if pendingFiles >= 1000 {
				waitFor(doneChannel, 1)
				pendingFiles--
			}
		}
		close(filenameChannel)
	} else {
		for _, filename := range flag.Args() {
			totalFiles++
			pendingFiles++
			filenameChannel <- filename
			if pendingFiles >= 1000 {
				waitFor(doneChannel, 1)
				pendingFiles--
			}
		}
		close(filenameChannel)
	}

	fmt.Fprintf(os.Stderr, "Waiting for last %d files\n", pendingFiles)
	waitFor(doneChannel, pendingFiles)
	close(recordChannel)
	bytesRead := <-allDone
	// All done! Win!
	duration := time.Now().UTC().Sub(startTime).Seconds()
	mb := float64(bytesRead) / 1024.0 / 1024.0
	if duration == 0.0 {
		duration = 1.0
	}
	fmt.Fprintf(os.Stderr, "All done processing %d files, %.2fMB in %.2f seconds (%.2fMB/s)\n", totalFiles, mb, duration, (mb / duration))
}