// On Heka restarts this function reloads all previously running SandboxFilters // using the script, configuration, and preservation files in the working // directory. func (this *SandboxManagerFilter) restoreSandboxes(fr pipeline.FilterRunner, h pipeline.PluginHelper, dir string) { glob := fmt.Sprintf("%s-*.toml", getNormalizedName(fr.Name())) if matches, err := filepath.Glob(filepath.Join(dir, glob)); err == nil { for _, fn := range matches { var configFile pipeline.ConfigFile if _, err = toml.DecodeFile(fn, &configFile); err != nil { fr.LogError(fmt.Errorf("restoreSandboxes failed: %s\n", err)) continue } else { for _, conf := range configFile { var runner pipeline.FilterRunner name := path.Base(fn[:len(fn)-5]) fr.LogMessage(fmt.Sprintf("Loading: %s", name)) runner, err = this.createRunner(dir, name, conf) if err != nil { fr.LogError(fmt.Errorf("createRunner failed: %s\n", err.Error())) removeAll(dir, fmt.Sprintf("%s.*", name)) break } err = h.PipelineConfig().AddFilterRunner(runner) if err != nil { fr.LogError(err) } else { atomic.AddInt32(&this.currentFilters, 1) } break // only interested in the first item } } } } }
func (this *SandboxManagerFilter) Run(fr pipeline.FilterRunner, h pipeline.PluginHelper) (err error) { inChan := fr.InChan() var ok = true var pack *pipeline.PipelinePack var delta int64 this.restoreSandboxes(fr, h, this.workingDirectory) for ok { select { case pack, ok = <-inChan: if !ok { break } atomic.AddInt64(&this.processMessageCount, 1) delta = time.Now().UnixNano() - pack.Message.GetTimestamp() if math.Abs(float64(delta)) >= 5e9 { fr.LogError(fmt.Errorf("Discarded control message: %d seconds skew", delta/1e9)) pack.Recycle() break } action, _ := pack.Message.GetFieldValue("action") switch action { case "load": current := int(atomic.LoadInt32(&this.currentFilters)) if current < this.maxFilters { err := this.loadSandbox(fr, h, this.workingDirectory, pack.Message) if err != nil { fr.LogError(err) } } else { fr.LogError(fmt.Errorf("%s attempted to load more than %d filters", fr.Name(), this.maxFilters)) } case "unload": fv, _ := pack.Message.GetFieldValue("name") if name, ok := fv.(string); ok { name = getSandboxName(fr.Name(), name) if this.pConfig.RemoveFilterRunner(name) { removeAll(this.workingDirectory, fmt.Sprintf("%s.*", name)) } } } pack.Recycle() } } return }
// Parses a Heka message and extracts the information necessary to start a new // SandboxFilter func (this *SandboxManagerFilter) loadSandbox(fr pipeline.FilterRunner, h pipeline.PluginHelper, dir string, msg *message.Message) (err error) { fv, _ := msg.GetFieldValue("config") if config, ok := fv.(string); ok { var configFile pipeline.ConfigFile if _, err = toml.Decode(config, &configFile); err != nil { return fmt.Errorf("loadSandbox failed: %s\n", err) } else { for name, conf := range configFile { name = getSandboxName(fr.Name(), name) if _, ok := h.Filter(name); ok { // todo support reload return fmt.Errorf("loadSandbox failed: %s is already running", name) } fr.LogMessage(fmt.Sprintf("Loading: %s", name)) confFile := filepath.Join(dir, fmt.Sprintf("%s.toml", name)) err = ioutil.WriteFile(confFile, []byte(config), 0600) if err != nil { return } var sbc SandboxConfig if err = toml.PrimitiveDecode(conf, &sbc); err != nil { return fmt.Errorf("loadSandbox failed: %s\n", err) } scriptFile := filepath.Join(dir, fmt.Sprintf("%s.%s", name, sbc.ScriptType)) err = ioutil.WriteFile(scriptFile, []byte(msg.GetPayload()), 0600) if err != nil { removeAll(dir, fmt.Sprintf("%s.*", name)) return } // check/clear the old state preservation file // this avoids issues with changes to the data model since the last load // and prevents holes in the graph from looking like anomalies os.Remove(filepath.Join(pipeline.PrependBaseDir(DATA_DIR), name+DATA_EXT)) var runner pipeline.FilterRunner runner, err = this.createRunner(dir, name, conf) if err != nil { removeAll(dir, fmt.Sprintf("%s.*", name)) return } err = h.PipelineConfig().AddFilterRunner(runner) if err == nil { this.currentFilters++ } break // only interested in the first item } } } return }
// Parses a Heka message and extracts the information necessary to start a new // SandboxFilter func (this *SandboxManagerFilter) loadSandbox(fr pipeline.FilterRunner, h pipeline.PluginHelper, dir string, msg *message.Message) (err error) { fv, _ := msg.GetFieldValue("config") if config, ok := fv.(string); ok { var configFile pipeline.ConfigFile if _, err = toml.Decode(config, &configFile); err != nil { return fmt.Errorf("loadSandbox failed: %s\n", err) } for name, conf := range configFile { name = getSandboxName(fr.Name(), name) if _, ok := h.Filter(name); ok { // todo support reload return fmt.Errorf("loadSandbox failed: %s is already running", name) } fr.LogMessage(fmt.Sprintf("Loading: %s", name)) confFile := filepath.Join(dir, fmt.Sprintf("%s.toml", name)) err = ioutil.WriteFile(confFile, []byte(config), 0600) if err != nil { return } var sbc SandboxConfig // Default, will get overwritten if necessary sbc.ScriptType = "lua" if err = toml.PrimitiveDecode(conf, &sbc); err != nil { return fmt.Errorf("loadSandbox failed: %s\n", err) } scriptFile := filepath.Join(dir, fmt.Sprintf("%s.%s", name, sbc.ScriptType)) err = ioutil.WriteFile(scriptFile, []byte(msg.GetPayload()), 0600) if err != nil { removeAll(dir, fmt.Sprintf("%s.*", name)) return } var runner pipeline.FilterRunner runner, err = this.createRunner(dir, name, conf) if err != nil { removeAll(dir, fmt.Sprintf("%s.*", name)) return } err = this.pConfig.AddFilterRunner(runner) if err == nil { atomic.AddInt32(&this.currentFilters, 1) } break // only interested in the first item } } return }
func (this *SandboxFilter) Run(fr pipeline.FilterRunner, h pipeline.PluginHelper) (err error) { inChan := fr.InChan() ticker := fr.Ticker() var ( ok = true terminated = false sample = true blocking = false backpressure = false pack *pipeline.PipelinePack retval int msgLoopCount uint injectionCount uint startTime time.Time slowDuration int64 = int64(this.pConfig.Globals.MaxMsgProcessDuration) duration int64 capacity = cap(inChan) - 1 ) // We assign to the return value of Run() for errors in the closure so that // the plugin runner can determine what caused the SandboxFilter to return. this.sb.InjectMessage(func(payload, payload_type, payload_name string) int { if injectionCount == 0 { err = pipeline.TerminatedError("exceeded InjectMessage count") return 2 } injectionCount-- pack := h.PipelinePack(msgLoopCount) if pack == nil { err = pipeline.TerminatedError(fmt.Sprintf("exceeded MaxMsgLoops = %d", this.pConfig.Globals.MaxMsgLoops)) return 3 } if len(payload_type) == 0 { // heka protobuf message hostname := pack.Message.GetHostname() err := proto.Unmarshal([]byte(payload), pack.Message) if err == nil { // do not allow filters to override the following pack.Message.SetType("heka.sandbox." + pack.Message.GetType()) pack.Message.SetLogger(fr.Name()) pack.Message.SetHostname(hostname) } else { return 1 } } else { pack.Message.SetType("heka.sandbox-output") pack.Message.SetLogger(fr.Name()) pack.Message.SetPayload(payload) ptype, _ := message.NewField("payload_type", payload_type, "file-extension") pack.Message.AddField(ptype) pname, _ := message.NewField("payload_name", payload_name, "") pack.Message.AddField(pname) } if !fr.Inject(pack) { return 4 } atomic.AddInt64(&this.injectMessageCount, 1) return 0 }) for ok { select { case pack, ok = <-inChan: if !ok { break } atomic.AddInt64(&this.processMessageCount, 1) injectionCount = this.pConfig.Globals.MaxMsgProcessInject msgLoopCount = pack.MsgLoopCount if this.manager != nil { // only check for backpressure on dynamic plugins // reading a channel length is generally fast ~1ns // we need to check the entire chain back to the router backpressure = len(inChan) >= capacity || fr.MatchRunner().InChanLen() >= capacity || len(h.PipelineConfig().Router().InChan()) >= capacity } // performing the timing is expensive ~40ns but if we are // backpressured we need a decent sample set before triggering // termination if sample || (backpressure && this.processMessageSamples < int64(capacity)) || this.sbc.Profile { startTime = time.Now() sample = true } retval = this.sb.ProcessMessage(pack) if sample { duration = time.Since(startTime).Nanoseconds() this.reportLock.Lock() this.processMessageDuration += duration this.processMessageSamples++ if this.sbc.Profile { this.profileMessageDuration = this.processMessageDuration this.profileMessageSamples = this.processMessageSamples if this.profileMessageSamples == int64(capacity)*10 { this.sbc.Profile = false // reset the normal sampling so it isn't heavily skewed by the profile values // i.e. process messages fast during profiling and then switch to malicious code this.processMessageDuration = this.profileMessageDuration / this.profileMessageSamples this.processMessageSamples = 1 } } this.reportLock.Unlock() } if retval <= 0 { if backpressure && this.processMessageSamples >= int64(capacity) { if this.processMessageDuration/this.processMessageSamples > slowDuration || fr.MatchRunner().GetAvgDuration() > slowDuration/5 { terminated = true blocking = true } } if retval < 0 { atomic.AddInt64(&this.processMessageFailures, 1) em := this.sb.LastError() if len(em) > 0 { fr.LogError(errors.New(em)) } } sample = 0 == rand.Intn(this.sampleDenominator) } else { terminated = true } pack.Recycle() case t := <-ticker: injectionCount = this.pConfig.Globals.MaxMsgTimerInject startTime = time.Now() if retval = this.sb.TimerEvent(t.UnixNano()); retval != 0 { terminated = true } duration = time.Since(startTime).Nanoseconds() this.reportLock.Lock() this.timerEventDuration += duration this.timerEventSamples++ this.reportLock.Unlock() } if terminated { pack := h.PipelinePack(0) pack.Message.SetType("heka.sandbox-terminated") pack.Message.SetLogger(pipeline.HEKA_DAEMON) message.NewStringField(pack.Message, "plugin", fr.Name()) if blocking { pack.Message.SetPayload("sandbox is running slowly and blocking the router") // no lock on the ProcessMessage variables here because there are no active writers message.NewInt64Field(pack.Message, "ProcessMessageCount", this.processMessageCount, "count") message.NewInt64Field(pack.Message, "ProcessMessageFailures", this.processMessageFailures, "count") message.NewInt64Field(pack.Message, "ProcessMessageSamples", this.processMessageSamples, "count") message.NewInt64Field(pack.Message, "ProcessMessageAvgDuration", this.processMessageDuration/this.processMessageSamples, "ns") message.NewInt64Field(pack.Message, "MatchAvgDuration", fr.MatchRunner().GetAvgDuration(), "ns") message.NewIntField(pack.Message, "FilterChanLength", len(inChan), "count") message.NewIntField(pack.Message, "MatchChanLength", fr.MatchRunner().InChanLen(), "count") message.NewIntField(pack.Message, "RouterChanLength", len(h.PipelineConfig().Router().InChan()), "count") } else { pack.Message.SetPayload(this.sb.LastError()) } fr.Inject(pack) break } } if this.manager != nil { this.manager.PluginExited() } this.reportLock.Lock() var destroyErr error if this.sbc.PreserveData { destroyErr = this.sb.Destroy(this.preservationFile) } else { destroyErr = this.sb.Destroy("") } if destroyErr != nil { err = destroyErr } this.sb = nil this.reportLock.Unlock() return }