func (retry *ProgressiveRetryer) Wait(msg string) bool { var delay time.Duration // how long is the retry happening? retryDuration := time.Now().Sub(retry.firstRetry) // how long since the last retry? silenceDuration := time.Now().Sub(retry.lastRetry) if retry.firstRetry.IsZero() { // first retry; just do it without waiting. retry.reset() delay = 0 } else if silenceDuration > RESET_AFTER { // reset retry stats if Wait was not called in the last 20 // minutes (implying sufficiently successful period). retry.reset() delay = 0 } else if retry.hasRetryLimit() && retryDuration > retry.retryLimit { // respect retryLimit log.Errorf("%s -- giving up after retrying for %v.", msg, retry.retryLimit) retry.reset() return false } else { switch { case retryDuration < time.Minute: // once every 5 seconds for 1 minute delay = 5 * time.Second case retryDuration < (1+5)*time.Minute: // once every 30 seconds for next 5 minutes delay = 30 * time.Second case retryDuration < (1+5+10)*time.Minute: // once every 1 minute for next 10 minutes delay = time.Minute default: // once every 5 minutes therein delay = 5 * time.Minute } } // Log the retry action if delay == 0 { log.Warnf("%s -- retrying now.", msg) } else { if retry.hasRetryLimit() { // If there is a retry limit -- which are the tmp. and // appdrain. drains -- this drain is to be considered // unimportant for the sys admins. So we do not generate // a WARN, thus putting it in cloud events. log.Infof("%s -- retrying after %v (max %v).", msg, delay, retry.retryLimit) } else { log.Warnf("%s -- retrying after %v.", msg, delay) } } time.Sleep(delay) retry.lastRetry = time.Now() return true }
func (instance *Instance) tailStream(stream string, filename string, stopCh chan bool, tracker storage.Tracker) { var err error pub := logyard.Broker.NewPublisherMust() defer pub.Stop() limit, err := instance.getReadLimit(pub, stream, filename) if err != nil { log.Warn(err) instance.SendTimelineEvent("WARN -- %v", err) return } rateLimiter := GetConfig().GetLeakyBucket() reqUrl, err := url.Parse(fmt.Sprintf("http://localhost:4243/containers/%s/logs", instance.DockerId)) if err != nil { log.Warn(err) return } q := reqUrl.Query() q.Set(stream, "true") q.Set("follow", "true") reqUrl.RawQuery = q.Encode() resp, err := http.Get(reqUrl.String()) if err != nil { log.Warn(err) instance.SendTimelineEvent("WARN -- %v", err) return } if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest { log.Warnf("HTTP error response %v from %v", resp.Status, reqUrl) return } t, err := tail.TailReader(util.WrapReadSeekClose(resp.Body), tail.Config{ MaxLineSize: GetConfig().MaxRecordSize, MustExist: false, Follow: true, Location: &tail.SeekInfo{-limit, os.SEEK_END}, ReOpen: false, Poll: false, RateLimiter: rateLimiter}) if err != nil { log.Warnf("Cannot tail docker stream (%s); %s", stream, err) instance.SendTimelineEvent("ERROR -- Cannot tail file (%s); %s", stream, err) return } instance.readFromTail(t, pub, stream, stopCh, filename, tracker) }
func (instance *Instance) publishLineAs(pub *zmqpubsub.Publisher, source string, logname string, line *tail.Line) { if line == nil { panic("line is nil") } msg := &message.Message{ LogFilename: logname, Source: source, InstanceIndex: instance.Index, AppGUID: instance.AppGUID, AppName: instance.AppName, AppSpace: instance.AppSpace, MessageCommon: common.NewMessageCommon(line.Text, line.Time, util.LocalNodeId()), } if line.Err != nil { // Mark this as a special error record, as it is // coming from tail, not the app. msg.Source = fmt.Sprintf("%v[apptail]", util.GetBrandName()) msg.LogFilename = "" log.Warnf("[%s] %s", instance.AppName, line.Text) } err := msg.Publish(pub, false) if err != nil { common.Fatal("Unable to publish: %v", err) } }
// Fatalf sends the error back to the client, and closes the connection func (s *WebSocketStream) Fatalf(format string, v ...interface{}) { data := &wsStreamData{fmt.Sprintf(format, v...), ""} err := s.send(data) if err != nil { log.Warnf("Error sending error back to websocket client: %v", err) } s.Close() }
func (c *Config) GetLeakyBucket() *ratelimiter.LeakyBucket { rate := c.MaxLinesPerSecond if rate < 1 { log.Warnf("max_lines_per_second must be a positive integer; using default") rate = 100 } burstSize := c.MaxLinesBurst if burstSize < 1 { log.Warnf("max_lines_burst must be a positive integer; using default") burstSize = 10000 } interval := time.Duration(int64(time.Second) / rate) return ratelimiter.NewLeakyBucket(burstSize, interval) }
func RegisterTailCleanup() { c := make(chan os.Signal, 1) signal.Notify(c, os.Interrupt, syscall.SIGTERM) for sig := range c { log.Warnf("captured signal %v; exiting after cleanup", sig) cleanup() os.Exit(1) } }
func main() { c := NewCron(os.Args[1], os.Args[2], os.Args[3:len(os.Args)]) go c.Start() ch := make(chan os.Signal, 1) signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM) log.Warnf("%v", <-ch) c.Stop() os.Exit(1) }
func main() { major, minor, patch := gozmq.Version() log.Infof("Starting logyard_sieve (zeromq %d.%d.%d)", major, minor, patch) LoadConfig() parser := sieve.NewStackatoParser(getConfig().Events) parser.DeleteSamples() pub := logyard.Broker.NewPublisherMust() defer pub.Stop() sub := logyard.Broker.Subscribe("systail") defer sub.Stop() server.MarkRunning("logyard_sieve") log.Info("Watching the systail stream on this node") for message := range sub.Ch { var record systail.Message err := json.Unmarshal([]byte(message.Value), &record) if err != nil { log.Warnf("failed to parse json: %s; ignoring record: %s", err, message.Value) continue } event, err := parser.Parse(record.Name, record.Text) if err != nil { log.Warnf( "failed to parse event from %s: %s -- source: %s", record.Name, err, record.Text) continue } if event != nil { event.MessageCommon = common.NewMessageCommon( event.Desc, time.Unix(record.UnixTime, 0), record.NodeID) event.MustPublish(pub) } } }
func (instance *Instance) tailFile(name, filename string, stopCh chan bool, tracker storage.Tracker) { var err error var location *tail.SeekInfo var limit int64 var shouldInitialize bool pub := logyard.Broker.NewPublisherMust() defer pub.Stop() if tracker.IsChildNodeInitialized(instance.getShortDockerId(), filename) { offset := tracker.GetFileCachedOffset(instance.getShortDockerId(), filename) location = &tail.SeekInfo{offset, os.SEEK_SET} } else { limit, err = instance.getReadLimit(pub, name, filename) location = &tail.SeekInfo{-limit, os.SEEK_END} shouldInitialize = true } if err != nil { log.Warn(err) instance.SendTimelineEvent("WARN -- %v", err) return } rateLimiter := GetConfig().GetLeakyBucket() t, err := tail.TailFile(filename, tail.Config{ MaxLineSize: GetConfig().MaxRecordSize, MustExist: true, Follow: true, Location: location, ReOpen: false, Poll: false, RateLimiter: rateLimiter}) // IMPORTANT: this registration happens everytime app restarts if shouldInitialize { tracker.InitializeChildNode(instance.getShortDockerId(), filename, INITIAL_OFFSET) } if err != nil { log.Warnf("Cannot tail file (%s); %s", filename, err) instance.SendTimelineEvent("ERROR -- Cannot tail file (%s); %s", name, err) return } instance.readFromTail(t, pub, name, stopCh, filename, tracker) }
func getDockerEvents(retries int) *http.Response { c := http.Client{} for attempt := 0; attempt < retries; attempt++ { res, err := c.Get(events_url) if err != nil { if (attempt + 1) == retries { log.Fatalf("Failed to read from docker daemon; giving up retrying: %v", err) } log.Warnf("Docker connection error (%v); retrying after 1 second.", err) time.Sleep(time.Second) } else { return res } } panic("unreachable") }
// RemoveOrphanedDrains removes all drains created by applog_endpoint. func RemoveOrphanedDrains() { // Note that this is tricky to do when horizontally scalling // applog_endpoint. Could be solved easily by using nodeID or ip // addr in the drain name. logyardConfig := logyard.GetConfig() for name, _ := range logyardConfig.Drains { if strings.HasPrefix(name, DRAIN_PREFIX) { log.Infof("Removing orphaned drain %v", name) err := logyard.DeleteDrain(name) if err != nil { log.Warnf("Failed to delete drain %v -- %v", name, err) } } } }
func NewCron(schedule string, command string, args []string) *Cron { log.Infof("Running per schedule: %v", schedule) c := &Cron{cron.New(), &sync.WaitGroup{}} c.AddFunc(schedule, func() { c.wg.Add(1) log.Infof("Executing: %v %v", command, strings.Join(args, " ")) err := execute(command, args) if err != nil { log.Warnf("Failed: %v", err) } else { log.Info("Succeeded") } c.wg.Done() }) return c }
func GetLiveDockerContainers(retries int) map[string]bool { allDockerIds := make(map[string]bool) var httpResByte []byte c := http.Client{} for attempt := 0; attempt < retries; attempt++ { res, err := c.Get(containers_url) defer res.Body.Close() if err != nil { if (attempt + 1) == retries { log.Fatalf("Failed to read from docker daemon; giving up retrying: %v", err) } log.Warnf("Docker connection error (%v); retrying after 1 second.", err) time.Sleep(time.Second) } else { httpResByte, err = ioutil.ReadAll(res.Body) if err != nil { log.Fatal(err) } var jsonData []Docker err = json.Unmarshal([]byte(httpResByte), &jsonData) if err != nil { log.Fatal(err) } for _, element := range jsonData { shortenedKey := element.Id[:ID_LENGTH] allDockerIds[shortenedKey] = true } } } return allDockerIds }
func tailHandlerWs( w http.ResponseWriter, r *http.Request, stream *wsutil.WebSocketStream) { args, err := ParseArguments(r) if err != nil { stream.Fatalf("Invalid arguments; %v", err) return } if err := sendRecent(stream, args); err != nil { stream.Fatalf("%v", err) return } d, err := drain.NewAppLogDrain(args.GUID) if err != nil { stream.Fatalf("Unable to create drain: %v", err) return } ch, err := d.Start() if err != nil { stream.Fatalf("Unable to start drain: %v", err) } err = stream.Forward(ch) if err != nil { log.Infof("%v", err) d.Stop(err) } // We expect drain.Wait to not block at this point. if err := d.Wait(); err != nil { if _, ok := err.(wsutil.WebSocketStreamError); !ok { log.Warnf("Error from app log drain server: %v", err) } } }
func (instance *Instance) getLogFiles() map[string]string { var logfiles map[string]string rawMode := len(instance.LogFiles) > 0 if rawMode { // If the logfiles list was explicitly passed, use it as is. logfiles = instance.LogFiles } else { // Use $STACKATO_LOG_FILES logfiles = make(map[string]string) if env, err := docker.GetDockerAppEnv(instance.RootPath); err != nil { log.Errorf("Failed to read docker image env: %v", err) } else { if s, ok := env["STACKATO_LOG_FILES"]; ok { parts := strings.Split(s, ":") if len(parts) > 7 { parts = parts[len(parts)-7 : len(parts)] instance.SendTimelineEvent("WARN -- $STACKATO_LOG_FILES is large; using only last 7 logs: %v", parts) } for _, f := range parts { parts := strings.SplitN(f, "=", 2) logfiles[parts[0]] = parts[1] } } } } // Expand paths, and securely ensure they fall within the app root. logfilesSecure := make(map[string]string) for name, path := range logfiles { var fullpath string // Treat relative paths as being relative to $STACKATO_APP_ROOT if !filepath.IsAbs(path) { stackatoAppRoot := "/home/stackato/" fullpath = filepath.Join(instance.RootPath, stackatoAppRoot, path) } else { fullpath = filepath.Join(instance.RootPath, path) } fullpath, err := filepath.Abs(fullpath) if err != nil { log.Warnf("Cannot find Abs of %v <join> %v: %v", instance.RootPath, path, err) instance.SendTimelineEvent("WARN -- Failed to find absolute path for %v", path) continue } fullpath, err = filepath.EvalSymlinks(fullpath) if err != nil { log.Infof("Error reading log file %v: %v", fullpath, err) instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path) continue } if !strings.HasPrefix(fullpath, instance.RootPath) { log.Warnf("Ignoring insecure log path %v (via %v) in instance %+v", fullpath, path, instance) // This user warning is exactly the same as above, lest we provide // a backdoor for a malicious user to list the directory tree on // the host. instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path) continue } logfilesSecure[name] = fullpath } if len(logfilesSecure) == 0 && !instance.DockerStreams { instance.SendTimelineEvent("ERROR -- No valid log files detected for tailing") } return logfilesSecure }