// Tail begins tailing the files for this instance. func (instance *Instance) Tail(tracker storage.Tracker) { log.Infof("Tailing %v logs for %v -- %+v", instance.Type, instance.Identifier(), instance) stopCh := make(chan bool) instance.pubch = pubchannel.New("event.timeline", stopCh) logfiles := instance.getLogFiles() log.Infof("Determined log files: %+v", logfiles) shortDockerId := instance.getShortDockerId() tracker.RegisterInstance(shortDockerId) for name, filename := range logfiles { // call tailStream for standard docker logs and tailFile for user custom logs if instance.DockerStreams && (name == STDOUT || name == STDERR) { go instance.tailStream(name, filename, stopCh, tracker) } else { go instance.tailFile(name, filename, stopCh, tracker) } } go func() { docker.DockerListener.BlockUntilContainerStops(instance.DockerId) log.Infof("Container for %v exited", instance.Identifier()) close(stopCh) tracker.Remove(shortDockerId) }() }
func main() { flag.Parse() go common.RegisterTailCleanup() apptail.LoadConfig() fstorage := storage.NewFileStorage(*stateFile_path) tracker := storage.NewTracker(fstorage, *debug) tracker.LoadTailers() interval := time.Duration(int64(apptail.GetConfig().PersistPositionIntervalSeconds)) go tracker.StartSubmissionTimer(interval * time.Second) major, minor, patch := gozmq.Version() log.Infof("Starting apptail (zeromq %d.%d.%d)", major, minor, patch) log.Infof("Config: %+v\n", apptail.GetConfig()) uid := getUID() natsclient := server.NewNatsClient(3) mux := &sync.Mutex{} n := 0 started_instances := StartedInstance{} natsclient.Subscribe("logyard."+uid+".newinstance", func(instance *apptail.Instance) { n++ if started_instances.checkInstanceAndUpdate(n, instance.DockerId, mux) { go func() { instance.Tail(tracker) started_instances.delete(instance.DockerId, mux) }() } }) natsclient.Publish("logyard."+uid+".start", []byte("{}")) log.Infof("Waiting for app instances ...") go docker.DockerListener.Listen() // clean up the cache after restart docker.DockerListener.TrackerCleanUp(tracker) server.MarkRunning("apptail") apptail_event.MonitorCloudEvents() }
func (instance *Instance) readFromTail(t *tail.Tail, pub *zmqpubsub.Publisher, name string, stopCh chan bool, filename string, tracker storage.Tracker) { var err error FORLOOP: for { select { case line, ok := <-t.Lines: if !ok { err = t.Wait() break FORLOOP } currentOffset, err := t.Tell() if err != nil { log.Error(err.Error()) } tracker.Update(instance.getShortDockerId(), filename, currentOffset) instance.publishLine(pub, name, line) case <-stopCh: err = t.Stop() break FORLOOP } } if err != nil { log.Warn(err) instance.SendTimelineEvent("WARN -- Error tailing file (%s); %s", name, err) } log.Infof("Completed tailing %v log for %v", name, instance.Identifier()) }
func (d *AppLogDrain) Stop(reason error) { log.Infof("Stopping drain %s for reason: %v", d.Id(), reason) if err := d.removeDrain(); err != nil { log.Errorf("Failed to remove drain %v: %v", d.Id(), err) } d.srv.Kill(reason) }
func (r *ReadSeekCloseWrapper) Drain() { var err, err2 error buf := make([]byte, 0x10000) for { var count int count, err = r.reader.Read(buf) if count > 0 { _, err2 = r.buffer.Write(buf[:count]) } if err != nil && err != io.EOF && err != io.ErrClosedPipe { log.Errorf("Error reading stream %v: %v", r.reader, err) } if err2 != nil { log.Errorf("Error writing buffer: %v: %v", r.buffer, err2) } if err != nil || err2 != nil { break } if r.buffer.Len() > 0 { r.available.L.Lock() r.available.Broadcast() r.available.L.Unlock() } } log.Infof("Read complete (error %v/%v)", err, err2) }
func (l *dockerListener) BlockUntilContainerStops(id string) { var total int ch := make(chan bool) id = id[:ID_LENGTH] if len(id) != ID_LENGTH { common.Fatal("Invalid docker ID length: %v", len(id)) } // Add a wait channel func() { l.mux.Lock() if _, ok := l.waiters[id]; ok { log.Warn("already added") } else { l.waiters[id] = ch } total = len(l.waiters) l.mux.Unlock() runtime.Gosched() }() // Wait log.Infof("Waiting for container %v to exit (total waiters: %d)", id, total) <-ch }
func (retry *ProgressiveRetryer) Wait(msg string) bool { var delay time.Duration // how long is the retry happening? retryDuration := time.Now().Sub(retry.firstRetry) // how long since the last retry? silenceDuration := time.Now().Sub(retry.lastRetry) if retry.firstRetry.IsZero() { // first retry; just do it without waiting. retry.reset() delay = 0 } else if silenceDuration > RESET_AFTER { // reset retry stats if Wait was not called in the last 20 // minutes (implying sufficiently successful period). retry.reset() delay = 0 } else if retry.hasRetryLimit() && retryDuration > retry.retryLimit { // respect retryLimit log.Errorf("%s -- giving up after retrying for %v.", msg, retry.retryLimit) retry.reset() return false } else { switch { case retryDuration < time.Minute: // once every 5 seconds for 1 minute delay = 5 * time.Second case retryDuration < (1+5)*time.Minute: // once every 30 seconds for next 5 minutes delay = 30 * time.Second case retryDuration < (1+5+10)*time.Minute: // once every 1 minute for next 10 minutes delay = time.Minute default: // once every 5 minutes therein delay = 5 * time.Minute } } // Log the retry action if delay == 0 { log.Warnf("%s -- retrying now.", msg) } else { if retry.hasRetryLimit() { // If there is a retry limit -- which are the tmp. and // appdrain. drains -- this drain is to be considered // unimportant for the sys admins. So we do not generate // a WARN, thus putting it in cloud events. log.Infof("%s -- retrying after %v (max %v).", msg, delay, retry.retryLimit) } else { log.Warnf("%s -- retrying after %v.", msg, delay) } } time.Sleep(delay) retry.lastRetry = time.Now() return true }
func (retry *ThriceRetryer) Wait(msg string) bool { if retry.count < 3 { retry.count += 1 log.Infof("retry #%d -- %v.", retry.count, msg) return true } return false }
func NewCron(schedule string, command string, args []string) *Cron { log.Infof("Running per schedule: %v", schedule) c := &Cron{cron.New(), &sync.WaitGroup{}} c.AddFunc(schedule, func() { c.wg.Add(1) log.Infof("Executing: %v %v", command, strings.Join(args, " ")) err := execute(command, args) if err != nil { log.Warnf("Failed: %v", err) } else { log.Info("Succeeded") } c.wg.Done() }) return c }
func (t *tracker) Remove(key string) { if t.debug { log.Infof("Removing the following key %s from cached instances", key) } t.mux.Lock() delete(t.Cached.Instances, key) t.mux.Unlock() if err := t.Commit(); err != nil { log.Fatal(err) } }
func (h *webSocketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { ws, err := upgrader.Upgrade(w, r, nil) if err != nil { var errString string if _, ok := err.(websocket.HandshakeError); !ok { errString = fmt.Sprintf("Handshake error: %v", err) } else { errString = fmt.Sprintf("Unknown websocket error: %v", err) } log.Info(errString) http.Error(w, errString, 500) return } log.Infof("wsutil.ServeWS start - %v", getWsConnId(r, ws)) defer log.Infof("wsutil.ServeWS finish - %v", getWsConnId(r, ws)) h.handler.ServeWS(w, r, &WebSocketStream{ws}) ws.Close() }
func (t *tracker) dumpState(ops string) { // Important to wrap this in a mutex since it's accessing shared resource if t.debug { for k, v := range t.Cached.Instances { message := fmt.Sprintf("[%s] ContainerId: %s", ops, k) for fname, buffer := range v { log.Infof(message+" File: %s --> TailOffset: %d", fname, buffer) } } } }
func NewStackatoParser(spec map[string]map[string]EventParserSpec) Parser { parserSpec := map[string]EventParserGroup{} for process, d := range spec { if _, ok := parserSpec[process]; !ok { parserSpec[process] = map[string]*EventParser{} } for eventName, evt := range d { log.Infof("Loading parse spec %s/%s", process, eventName) parserSpec[process][eventName] = evt.ToEventParser() } } parser := NewParser(parserSpec) parser.Build() return parser }
// RemoveOrphanedDrains removes all drains created by applog_endpoint. func RemoveOrphanedDrains() { // Note that this is tricky to do when horizontally scalling // applog_endpoint. Could be solved easily by using nodeID or ip // addr in the drain name. logyardConfig := logyard.GetConfig() for name, _ := range logyardConfig.Drains { if strings.HasPrefix(name, DRAIN_PREFIX) { log.Infof("Removing orphaned drain %v", name) err := logyard.DeleteDrain(name) if err != nil { log.Warnf("Failed to delete drain %v -- %v", name, err) } } } }
// addDrain adds a logyard drain for the apptail.{appGUID} stream // pointing to ourself (port) func (d *AppLogDrain) addDrain() error { uri := fmt.Sprintf("%s://%v:%v", LINESERVER_PROTO, server.LocalIPMust(), d.port) filter := fmt.Sprintf("apptail.%s", d.appGUID) drainURI, err := drain.ConstructDrainURI( d.drainName, uri, []string{filter}, nil) if err != nil { return err } if err = logyard.AddDrain(d.drainName, drainURI); err != nil { return err } log.Infof("Added drain %v => %v", d.drainName, drainURI) return nil }
func main() { go common.RegisterTailCleanup() major, minor, patch := gozmq.Version() log.Infof("Starting systail (zeromq %d.%d.%d)", major, minor, patch) systail.LoadConfig() nodeid, err := server.LocalIP() if err != nil { common.Fatal("Failed to determine IP addr: %v", err) } log.Info("Host IP: ", nodeid) tailers := []*tail.Tail{} logFiles := systail.GetConfig().LogFiles fmt.Printf("%+v\n", logFiles) if len(logFiles) == 0 { common.Fatal("No log files exist in configuration.") } for name, logfile := range logFiles { t, err := tailLogFile(name, logfile, nodeid) if err != nil { common.Fatal("%v", err) } tailers = append(tailers, t) } server.MarkRunning("systail") for _, tail := range tailers { err := tail.Wait() if err != nil { log.Errorf("Cannot tail [%s]: %s", tail.Filename, err) } } // we don't expect any of the tailers to exit with or without // error. log.Error("No file left to tail; exiting.") os.Exit(1) }
// Clear clears the cache associated with the given process and // current host. func (s *StateCache) Clear(name string) { log.Infof("[statecache] Clearing state of %s", name) allKey, thisKey := s.getKeys(name) // Note that redis automatically deletes the SET if it will be // empty after an SREM. reply := s.Client.SRem(allKey, s.Host) if err := reply.Err(); err != nil { log.Errorf("Unable to clear state cache of %s in redis; %v", name, err) } reply2 := s.Client.Del(thisKey) if err := reply2.Err(); err != nil { log.Errorf("Unable to clear state cache of %s in redis; %v", name, err) } }
func main() { major, minor, patch := gozmq.Version() log.Infof("Starting logyard_sieve (zeromq %d.%d.%d)", major, minor, patch) LoadConfig() parser := sieve.NewStackatoParser(getConfig().Events) parser.DeleteSamples() pub := logyard.Broker.NewPublisherMust() defer pub.Stop() sub := logyard.Broker.Subscribe("systail") defer sub.Stop() server.MarkRunning("logyard_sieve") log.Info("Watching the systail stream on this node") for message := range sub.Ch { var record systail.Message err := json.Unmarshal([]byte(message.Value), &record) if err != nil { log.Warnf("failed to parse json: %s; ignoring record: %s", err, message.Value) continue } event, err := parser.Parse(record.Name, record.Text) if err != nil { log.Warnf( "failed to parse event from %s: %s -- source: %s", record.Name, err, record.Text) continue } if event != nil { event.MessageCommon = common.NewMessageCommon( event.Desc, time.Unix(record.UnixTime, 0), record.NodeID) event.MustPublish(pub) } } }
// getUID returns the UID of the aggregator running on this node. the UID is // also shared between the local dea/stager, so that we send/receive messages // only from the local dea/stagers. func getUID() string { var UID string uidFile := "/tmp/logyard.uid" if _, err := os.Stat(uidFile); os.IsNotExist(err) { uid, err := uuid.NewV4() if err != nil { common.Fatal("%v", err) } UID = uid.String() if err = ioutil.WriteFile(uidFile, []byte(UID), 0644); err != nil { common.Fatal("%v", err) } } else { data, err := ioutil.ReadFile(uidFile) if err != nil { common.Fatal("%v", err) } UID = string(data) } log.Infof("detected logyard UID: %s\n", UID) return UID }
func (r *ReadSeekCloseWrapper) Seek(offset int64, whence int) (int64, error) { const MaxInt = int64((^uint(0)) >> 1) log.Infof("Attempting to seek to whence %v offset %v", whence, offset) if whence != 2 { return 0, fmt.Errorf("Seeking to %v not implemented", whence) } if offset > 0 { return 0, fmt.Errorf("Seeking past EOF to %v not supported", offset) } if offset == 0 { // Optimize discarding everything (common case) r.buffer.Reset() } else { discard := int64(r.buffer.Len()) + offset for discard > 0 { chunkSize := discard % MaxInt r.buffer.Next(int(chunkSize)) discard -= chunkSize } } return 0, nil }
func tailHandlerWs( w http.ResponseWriter, r *http.Request, stream *wsutil.WebSocketStream) { args, err := ParseArguments(r) if err != nil { stream.Fatalf("Invalid arguments; %v", err) return } if err := sendRecent(stream, args); err != nil { stream.Fatalf("%v", err) return } d, err := drain.NewAppLogDrain(args.GUID) if err != nil { stream.Fatalf("Unable to create drain: %v", err) return } ch, err := d.Start() if err != nil { stream.Fatalf("Unable to start drain: %v", err) } err = stream.Forward(ch) if err != nil { log.Infof("%v", err) d.Stop(err) } // We expect drain.Wait to not block at this point. if err := d.Wait(); err != nil { if _, ok := err.(wsutil.WebSocketStreamError); !ok { log.Warnf("Error from app log drain server: %v", err) } } }
func (s *FileStorage) Load(e interface{}) error { var err error if _, err = os.Stat(s.file_path); os.IsNotExist(err) { log.Infof("Creating %s since it does not exist", s.file_path) _, err = os.Create(s.file_path) } else { n, err := ioutil.ReadFile(s.file_path) if err != nil { log.Error(err) } p := bytes.NewBuffer(n) dec := gob.NewDecoder(p) err = dec.Decode(e) if err != nil { log.Error(err) } } return err }
// SetState caches the given state of a process in redis. func (s *StateCache) SetState( name string, state state.State, rev int64) { info := StateInfo(state.Info()) info["rev"] = fmt.Sprintf("%d", rev) data, err := json.Marshal(info) if err != nil { log.Fatal(err) } allKey, thisKey := s.getKeys(name) log.Infof("[statecache] Caching state of %s", name) reply := s.Client.SAdd(allKey, s.Host) if err := reply.Err(); err != nil { log.Errorf("Unable to cache state of %s in redis; %v", name, err) return } reply2 := s.Client.Set(thisKey, string(data)) if err := reply2.Err(); err != nil { log.Errorf("Unable to cache state of %s in redis; %v", name, err) } }
func (instance *Instance) getLogFiles() map[string]string { var logfiles map[string]string rawMode := len(instance.LogFiles) > 0 if rawMode { // If the logfiles list was explicitly passed, use it as is. logfiles = instance.LogFiles } else { // Use $STACKATO_LOG_FILES logfiles = make(map[string]string) if env, err := docker.GetDockerAppEnv(instance.RootPath); err != nil { log.Errorf("Failed to read docker image env: %v", err) } else { if s, ok := env["STACKATO_LOG_FILES"]; ok { parts := strings.Split(s, ":") if len(parts) > 7 { parts = parts[len(parts)-7 : len(parts)] instance.SendTimelineEvent("WARN -- $STACKATO_LOG_FILES is large; using only last 7 logs: %v", parts) } for _, f := range parts { parts := strings.SplitN(f, "=", 2) logfiles[parts[0]] = parts[1] } } } } // Expand paths, and securely ensure they fall within the app root. logfilesSecure := make(map[string]string) for name, path := range logfiles { var fullpath string // Treat relative paths as being relative to $STACKATO_APP_ROOT if !filepath.IsAbs(path) { stackatoAppRoot := "/home/stackato/" fullpath = filepath.Join(instance.RootPath, stackatoAppRoot, path) } else { fullpath = filepath.Join(instance.RootPath, path) } fullpath, err := filepath.Abs(fullpath) if err != nil { log.Warnf("Cannot find Abs of %v <join> %v: %v", instance.RootPath, path, err) instance.SendTimelineEvent("WARN -- Failed to find absolute path for %v", path) continue } fullpath, err = filepath.EvalSymlinks(fullpath) if err != nil { log.Infof("Error reading log file %v: %v", fullpath, err) instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path) continue } if !strings.HasPrefix(fullpath, instance.RootPath) { log.Warnf("Ignoring insecure log path %v (via %v) in instance %+v", fullpath, path, instance) // This user warning is exactly the same as above, lest we provide // a backdoor for a malicious user to list the directory tree on // the host. instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path) continue } logfilesSecure[name] = fullpath } if len(logfilesSecure) == 0 && !instance.DockerStreams { instance.SendTimelineEvent("ERROR -- No valid log files detected for tailing") } return logfilesSecure }
func (m *StateMachine) Log(msg string, v ...interface{}) { log.Infof(m.process.Logf("[STM] "+msg, v...)) }
func routerAdvertise(m interface{}) { info := newRouterRegisterInfo() log.Infof("Advertising ourself to router: %+v (router.start? %+v)", info, m) NATS.Publish("router.register", info) }
func (d *AppLogDrain) removeDrain() error { err := logyard.DeleteDrain(d.drainName) log.Infof("Removed drain %v", d.Id()) return err }