func (this *SandboxFilter) Run(fr pipeline.FilterRunner, h pipeline.PluginHelper) (err error) { inChan := fr.InChan() ticker := fr.Ticker() var ( ok = true terminated = false sample = true blocking = false backpressure = false pack *pipeline.PipelinePack retval int msgLoopCount uint injectionCount uint startTime time.Time slowDuration int64 = int64(this.pConfig.Globals.MaxMsgProcessDuration) duration int64 capacity = cap(inChan) - 1 ) // We assign to the return value of Run() for errors in the closure so that // the plugin runner can determine what caused the SandboxFilter to return. this.sb.InjectMessage(func(payload, payload_type, payload_name string) int { if injectionCount == 0 { err = pipeline.TerminatedError("exceeded InjectMessage count") return 2 } injectionCount-- pack := h.PipelinePack(msgLoopCount) if pack == nil { err = pipeline.TerminatedError(fmt.Sprintf("exceeded MaxMsgLoops = %d", this.pConfig.Globals.MaxMsgLoops)) return 3 } if len(payload_type) == 0 { // heka protobuf message hostname := pack.Message.GetHostname() err := proto.Unmarshal([]byte(payload), pack.Message) if err == nil { // do not allow filters to override the following pack.Message.SetType("heka.sandbox." + pack.Message.GetType()) pack.Message.SetLogger(fr.Name()) pack.Message.SetHostname(hostname) } else { return 1 } } else { pack.Message.SetType("heka.sandbox-output") pack.Message.SetLogger(fr.Name()) pack.Message.SetPayload(payload) ptype, _ := message.NewField("payload_type", payload_type, "file-extension") pack.Message.AddField(ptype) pname, _ := message.NewField("payload_name", payload_name, "") pack.Message.AddField(pname) } if !fr.Inject(pack) { return 4 } atomic.AddInt64(&this.injectMessageCount, 1) return 0 }) for ok { select { case pack, ok = <-inChan: if !ok { break } atomic.AddInt64(&this.processMessageCount, 1) injectionCount = this.pConfig.Globals.MaxMsgProcessInject msgLoopCount = pack.MsgLoopCount if this.manager != nil { // only check for backpressure on dynamic plugins // reading a channel length is generally fast ~1ns // we need to check the entire chain back to the router backpressure = len(inChan) >= capacity || fr.MatchRunner().InChanLen() >= capacity || len(h.PipelineConfig().Router().InChan()) >= capacity } // performing the timing is expensive ~40ns but if we are // backpressured we need a decent sample set before triggering // termination if sample || (backpressure && this.processMessageSamples < int64(capacity)) || this.sbc.Profile { startTime = time.Now() sample = true } retval = this.sb.ProcessMessage(pack) if sample { duration = time.Since(startTime).Nanoseconds() this.reportLock.Lock() this.processMessageDuration += duration this.processMessageSamples++ if this.sbc.Profile { this.profileMessageDuration = this.processMessageDuration this.profileMessageSamples = this.processMessageSamples if this.profileMessageSamples == int64(capacity)*10 { this.sbc.Profile = false // reset the normal sampling so it isn't heavily skewed by the profile values // i.e. process messages fast during profiling and then switch to malicious code this.processMessageDuration = this.profileMessageDuration / this.profileMessageSamples this.processMessageSamples = 1 } } this.reportLock.Unlock() } if retval <= 0 { if backpressure && this.processMessageSamples >= int64(capacity) { if this.processMessageDuration/this.processMessageSamples > slowDuration || fr.MatchRunner().GetAvgDuration() > slowDuration/5 { terminated = true blocking = true } } if retval < 0 { atomic.AddInt64(&this.processMessageFailures, 1) em := this.sb.LastError() if len(em) > 0 { fr.LogError(errors.New(em)) } } sample = 0 == rand.Intn(this.sampleDenominator) } else { terminated = true } pack.Recycle() case t := <-ticker: injectionCount = this.pConfig.Globals.MaxMsgTimerInject startTime = time.Now() if retval = this.sb.TimerEvent(t.UnixNano()); retval != 0 { terminated = true } duration = time.Since(startTime).Nanoseconds() this.reportLock.Lock() this.timerEventDuration += duration this.timerEventSamples++ this.reportLock.Unlock() } if terminated { pack := h.PipelinePack(0) pack.Message.SetType("heka.sandbox-terminated") pack.Message.SetLogger(pipeline.HEKA_DAEMON) message.NewStringField(pack.Message, "plugin", fr.Name()) if blocking { pack.Message.SetPayload("sandbox is running slowly and blocking the router") // no lock on the ProcessMessage variables here because there are no active writers message.NewInt64Field(pack.Message, "ProcessMessageCount", this.processMessageCount, "count") message.NewInt64Field(pack.Message, "ProcessMessageFailures", this.processMessageFailures, "count") message.NewInt64Field(pack.Message, "ProcessMessageSamples", this.processMessageSamples, "count") message.NewInt64Field(pack.Message, "ProcessMessageAvgDuration", this.processMessageDuration/this.processMessageSamples, "ns") message.NewInt64Field(pack.Message, "MatchAvgDuration", fr.MatchRunner().GetAvgDuration(), "ns") message.NewIntField(pack.Message, "FilterChanLength", len(inChan), "count") message.NewIntField(pack.Message, "MatchChanLength", fr.MatchRunner().InChanLen(), "count") message.NewIntField(pack.Message, "RouterChanLength", len(h.PipelineConfig().Router().InChan()), "count") } else { pack.Message.SetPayload(this.sb.LastError()) } fr.Inject(pack) break } } if this.manager != nil { this.manager.PluginExited() } this.reportLock.Lock() var destroyErr error if this.sbc.PreserveData { destroyErr = this.sb.Destroy(this.preservationFile) } else { destroyErr = this.sb.Destroy("") } if destroyErr != nil { err = destroyErr } this.sb = nil this.reportLock.Unlock() return }