Ejemplo n.º 1
0
// Tail begins tailing the files for this instance.
func (instance *Instance) Tail(tracker storage.Tracker) {
	log.Infof("Tailing %v logs for %v -- %+v",
		instance.Type, instance.Identifier(), instance)

	stopCh := make(chan bool)

	instance.pubch = pubchannel.New("event.timeline", stopCh)

	logfiles := instance.getLogFiles()

	log.Infof("Determined log files: %+v", logfiles)

	shortDockerId := instance.getShortDockerId()

	tracker.RegisterInstance(shortDockerId)

	for name, filename := range logfiles {
		// call tailStream for standard docker logs and tailFile for user custom logs
		if instance.DockerStreams && (name == STDOUT || name == STDERR) {
			go instance.tailStream(name, filename, stopCh, tracker)
		} else {
			go instance.tailFile(name, filename, stopCh, tracker)
		}
	}

	go func() {
		docker.DockerListener.BlockUntilContainerStops(instance.DockerId)
		log.Infof("Container for %v exited", instance.Identifier())
		close(stopCh)
		tracker.Remove(shortDockerId)
	}()

}
Ejemplo n.º 2
0
func main() {
	flag.Parse()
	go common.RegisterTailCleanup()

	apptail.LoadConfig()

	fstorage := storage.NewFileStorage(*stateFile_path)
	tracker := storage.NewTracker(fstorage, *debug)
	tracker.LoadTailers()

	interval := time.Duration(int64(apptail.GetConfig().PersistPositionIntervalSeconds))
	go tracker.StartSubmissionTimer(interval * time.Second)

	major, minor, patch := gozmq.Version()
	log.Infof("Starting apptail (zeromq %d.%d.%d)", major, minor, patch)

	log.Infof("Config: %+v\n", apptail.GetConfig())

	uid := getUID()

	natsclient := server.NewNatsClient(3)

	mux := &sync.Mutex{}

	n := 0
	started_instances := StartedInstance{}

	natsclient.Subscribe("logyard."+uid+".newinstance", func(instance *apptail.Instance) {

		n++
		if started_instances.checkInstanceAndUpdate(n, instance.DockerId, mux) {
			go func() {
				instance.Tail(tracker)
				started_instances.delete(instance.DockerId, mux)
			}()
		}

	})

	natsclient.Publish("logyard."+uid+".start", []byte("{}"))
	log.Infof("Waiting for app instances ...")

	go docker.DockerListener.Listen()

	// clean up the cache after restart
	docker.DockerListener.TrackerCleanUp(tracker)

	server.MarkRunning("apptail")

	apptail_event.MonitorCloudEvents()

}
Ejemplo n.º 3
0
func (instance *Instance) readFromTail(t *tail.Tail, pub *zmqpubsub.Publisher, name string, stopCh chan bool, filename string, tracker storage.Tracker) {
	var err error
FORLOOP:
	for {
		select {
		case line, ok := <-t.Lines:
			if !ok {
				err = t.Wait()
				break FORLOOP
			}
			currentOffset, err := t.Tell()
			if err != nil {
				log.Error(err.Error())

			}
			tracker.Update(instance.getShortDockerId(), filename, currentOffset)
			instance.publishLine(pub, name, line)
		case <-stopCh:
			err = t.Stop()
			break FORLOOP
		}
	}

	if err != nil {
		log.Warn(err)
		instance.SendTimelineEvent("WARN -- Error tailing file (%s); %s", name, err)
	}

	log.Infof("Completed tailing %v log for %v", name, instance.Identifier())
}
Ejemplo n.º 4
0
func (d *AppLogDrain) Stop(reason error) {
	log.Infof("Stopping drain %s for reason: %v", d.Id(), reason)
	if err := d.removeDrain(); err != nil {
		log.Errorf("Failed to remove drain %v: %v", d.Id(), err)
	}
	d.srv.Kill(reason)
}
Ejemplo n.º 5
0
func (r *ReadSeekCloseWrapper) Drain() {
	var err, err2 error
	buf := make([]byte, 0x10000)
	for {
		var count int
		count, err = r.reader.Read(buf)
		if count > 0 {
			_, err2 = r.buffer.Write(buf[:count])
		}
		if err != nil && err != io.EOF && err != io.ErrClosedPipe {
			log.Errorf("Error reading stream %v: %v", r.reader, err)
		}
		if err2 != nil {
			log.Errorf("Error writing buffer: %v: %v", r.buffer, err2)
		}
		if err != nil || err2 != nil {
			break
		}
		if r.buffer.Len() > 0 {
			r.available.L.Lock()
			r.available.Broadcast()
			r.available.L.Unlock()
		}
	}
	log.Infof("Read complete (error %v/%v)", err, err2)
}
Ejemplo n.º 6
0
func (l *dockerListener) BlockUntilContainerStops(id string) {
	var total int
	ch := make(chan bool)
	id = id[:ID_LENGTH]

	if len(id) != ID_LENGTH {
		common.Fatal("Invalid docker ID length: %v", len(id))
	}

	// Add a wait channel
	func() {
		l.mux.Lock()
		if _, ok := l.waiters[id]; ok {
			log.Warn("already added")

		} else {

			l.waiters[id] = ch

		}
		total = len(l.waiters)
		l.mux.Unlock()
		runtime.Gosched()
	}()

	// Wait
	log.Infof("Waiting for container %v to exit (total waiters: %d)", id, total)
	<-ch
}
Ejemplo n.º 7
0
func (retry *ProgressiveRetryer) Wait(msg string) bool {
	var delay time.Duration

	// how long is the retry happening?
	retryDuration := time.Now().Sub(retry.firstRetry)

	// how long since the last retry?
	silenceDuration := time.Now().Sub(retry.lastRetry)

	if retry.firstRetry.IsZero() {
		// first retry; just do it without waiting.
		retry.reset()
		delay = 0
	} else if silenceDuration > RESET_AFTER {
		// reset retry stats if Wait was not called in the last 20
		// minutes (implying sufficiently successful period).
		retry.reset()
		delay = 0
	} else if retry.hasRetryLimit() && retryDuration > retry.retryLimit {
		// respect retryLimit
		log.Errorf("%s -- giving up after retrying for %v.", msg, retry.retryLimit)
		retry.reset()
		return false
	} else {
		switch {
		case retryDuration < time.Minute:
			// once every 5 seconds for 1 minute
			delay = 5 * time.Second
		case retryDuration < (1+5)*time.Minute:
			// once every 30 seconds for next 5 minutes
			delay = 30 * time.Second
		case retryDuration < (1+5+10)*time.Minute:
			// once every 1 minute for next 10 minutes
			delay = time.Minute
		default:
			// once every 5 minutes therein
			delay = 5 * time.Minute
		}
	}

	// Log the retry action
	if delay == 0 {
		log.Warnf("%s -- retrying now.", msg)
	} else {
		if retry.hasRetryLimit() {
			// If there is a retry limit -- which are the tmp. and
			// appdrain. drains -- this drain is to be considered
			// unimportant for the sys admins. So we do not generate
			// a WARN, thus putting it in cloud events.
			log.Infof("%s -- retrying after %v (max %v).", msg, delay, retry.retryLimit)
		} else {
			log.Warnf("%s -- retrying after %v.", msg, delay)
		}
	}

	time.Sleep(delay)
	retry.lastRetry = time.Now()
	return true
}
Ejemplo n.º 8
0
func (retry *ThriceRetryer) Wait(msg string) bool {
	if retry.count < 3 {
		retry.count += 1
		log.Infof("retry #%d -- %v.", retry.count, msg)
		return true
	}
	return false
}
Ejemplo n.º 9
0
func NewCron(schedule string, command string, args []string) *Cron {
	log.Infof("Running per schedule: %v", schedule)
	c := &Cron{cron.New(), &sync.WaitGroup{}}

	c.AddFunc(schedule, func() {
		c.wg.Add(1)

		log.Infof("Executing: %v %v", command, strings.Join(args, " "))
		err := execute(command, args)
		if err != nil {
			log.Warnf("Failed: %v", err)
		} else {
			log.Info("Succeeded")
		}
		c.wg.Done()
	})
	return c
}
Ejemplo n.º 10
0
func (t *tracker) Remove(key string) {
	if t.debug {
		log.Infof("Removing the following key %s from cached instances", key)
	}
	t.mux.Lock()
	delete(t.Cached.Instances, key)
	t.mux.Unlock()
	if err := t.Commit(); err != nil {
		log.Fatal(err)

	}
}
Ejemplo n.º 11
0
func (h *webSocketHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
	ws, err := upgrader.Upgrade(w, r, nil)
	if err != nil {
		var errString string
		if _, ok := err.(websocket.HandshakeError); !ok {
			errString = fmt.Sprintf("Handshake error: %v", err)
		} else {
			errString = fmt.Sprintf("Unknown websocket error: %v", err)
		}
		log.Info(errString)
		http.Error(w, errString, 500)
		return
	}

	log.Infof("wsutil.ServeWS start - %v", getWsConnId(r, ws))
	defer log.Infof("wsutil.ServeWS finish - %v", getWsConnId(r, ws))

	h.handler.ServeWS(w, r, &WebSocketStream{ws})

	ws.Close()
}
Ejemplo n.º 12
0
func (t *tracker) dumpState(ops string) {
	// Important to wrap this in a mutex since it's accessing shared resource
	if t.debug {
		for k, v := range t.Cached.Instances {
			message := fmt.Sprintf("[%s] ContainerId: %s", ops, k)
			for fname, buffer := range v {

				log.Infof(message+" File: %s --> TailOffset: %d", fname, buffer)

			}

		}
	}
}
Ejemplo n.º 13
0
func NewStackatoParser(spec map[string]map[string]EventParserSpec) Parser {
	parserSpec := map[string]EventParserGroup{}
	for process, d := range spec {
		if _, ok := parserSpec[process]; !ok {
			parserSpec[process] = map[string]*EventParser{}
		}
		for eventName, evt := range d {
			log.Infof("Loading parse spec %s/%s", process, eventName)
			parserSpec[process][eventName] = evt.ToEventParser()
		}
	}
	parser := NewParser(parserSpec)
	parser.Build()
	return parser
}
Ejemplo n.º 14
0
// RemoveOrphanedDrains removes all drains created by applog_endpoint.
func RemoveOrphanedDrains() {
	// Note that this is tricky to do when horizontally scalling
	// applog_endpoint. Could be solved easily by using nodeID or ip
	// addr in the drain name.
	logyardConfig := logyard.GetConfig()
	for name, _ := range logyardConfig.Drains {
		if strings.HasPrefix(name, DRAIN_PREFIX) {
			log.Infof("Removing orphaned drain %v", name)
			err := logyard.DeleteDrain(name)
			if err != nil {
				log.Warnf("Failed to delete drain %v -- %v",
					name, err)
			}
		}
	}
}
Ejemplo n.º 15
0
// addDrain adds a logyard drain for the apptail.{appGUID} stream
// pointing to ourself (port)
func (d *AppLogDrain) addDrain() error {
	uri := fmt.Sprintf("%s://%v:%v",
		LINESERVER_PROTO,
		server.LocalIPMust(),
		d.port)
	filter := fmt.Sprintf("apptail.%s", d.appGUID)
	drainURI, err := drain.ConstructDrainURI(
		d.drainName, uri, []string{filter}, nil)
	if err != nil {
		return err
	}
	if err = logyard.AddDrain(d.drainName, drainURI); err != nil {
		return err
	}
	log.Infof("Added drain %v => %v", d.drainName, drainURI)
	return nil
}
Ejemplo n.º 16
0
func main() {
	go common.RegisterTailCleanup()

	major, minor, patch := gozmq.Version()
	log.Infof("Starting systail (zeromq %d.%d.%d)", major, minor, patch)

	systail.LoadConfig()

	nodeid, err := server.LocalIP()
	if err != nil {
		common.Fatal("Failed to determine IP addr: %v", err)
	}
	log.Info("Host IP: ", nodeid)

	tailers := []*tail.Tail{}

	logFiles := systail.GetConfig().LogFiles

	fmt.Printf("%+v\n", logFiles)
	if len(logFiles) == 0 {
		common.Fatal("No log files exist in configuration.")
	}

	for name, logfile := range logFiles {
		t, err := tailLogFile(name, logfile, nodeid)
		if err != nil {
			common.Fatal("%v", err)
		}
		tailers = append(tailers, t)
	}

	server.MarkRunning("systail")

	for _, tail := range tailers {
		err := tail.Wait()
		if err != nil {
			log.Errorf("Cannot tail [%s]: %s", tail.Filename, err)
		}
	}

	// we don't expect any of the tailers to exit with or without
	// error.
	log.Error("No file left to tail; exiting.")
	os.Exit(1)
}
Ejemplo n.º 17
0
// Clear clears the cache associated with the given process and
// current host.
func (s *StateCache) Clear(name string) {
	log.Infof("[statecache] Clearing state of %s", name)
	allKey, thisKey := s.getKeys(name)

	// Note that redis automatically deletes the SET if it will be
	// empty after an SREM.
	reply := s.Client.SRem(allKey, s.Host)
	if err := reply.Err(); err != nil {
		log.Errorf("Unable to clear state cache of %s in redis; %v",
			name, err)
	}

	reply2 := s.Client.Del(thisKey)
	if err := reply2.Err(); err != nil {
		log.Errorf("Unable to clear state cache of %s in redis; %v",
			name, err)
	}
}
Ejemplo n.º 18
0
func main() {
	major, minor, patch := gozmq.Version()
	log.Infof("Starting logyard_sieve (zeromq %d.%d.%d)", major, minor, patch)

	LoadConfig()

	parser := sieve.NewStackatoParser(getConfig().Events)
	parser.DeleteSamples()

	pub := logyard.Broker.NewPublisherMust()
	defer pub.Stop()
	sub := logyard.Broker.Subscribe("systail")
	defer sub.Stop()

	server.MarkRunning("logyard_sieve")

	log.Info("Watching the systail stream on this node")
	for message := range sub.Ch {
		var record systail.Message
		err := json.Unmarshal([]byte(message.Value), &record)
		if err != nil {
			log.Warnf("failed to parse json: %s; ignoring record: %s",
				err, message.Value)
			continue
		}

		event, err := parser.Parse(record.Name, record.Text)
		if err != nil {
			log.Warnf(
				"failed to parse event from %s: %s -- source: %s",
				record.Name, err, record.Text)
			continue
		}
		if event != nil {
			event.MessageCommon = common.NewMessageCommon(
				event.Desc, time.Unix(record.UnixTime, 0), record.NodeID)
			event.MustPublish(pub)
		}

	}
}
Ejemplo n.º 19
0
// getUID returns the UID of the aggregator running on this node. the UID is
// also shared between the local dea/stager, so that we send/receive messages
// only from the local dea/stagers.
func getUID() string {
	var UID string
	uidFile := "/tmp/logyard.uid"
	if _, err := os.Stat(uidFile); os.IsNotExist(err) {
		uid, err := uuid.NewV4()
		if err != nil {
			common.Fatal("%v", err)
		}
		UID = uid.String()
		if err = ioutil.WriteFile(uidFile, []byte(UID), 0644); err != nil {
			common.Fatal("%v", err)
		}
	} else {
		data, err := ioutil.ReadFile(uidFile)
		if err != nil {
			common.Fatal("%v", err)
		}
		UID = string(data)
	}
	log.Infof("detected logyard UID: %s\n", UID)
	return UID
}
Ejemplo n.º 20
0
func (r *ReadSeekCloseWrapper) Seek(offset int64, whence int) (int64, error) {
	const MaxInt = int64((^uint(0)) >> 1)
	log.Infof("Attempting to seek to whence %v offset %v", whence, offset)
	if whence != 2 {
		return 0, fmt.Errorf("Seeking to %v not implemented", whence)
	}
	if offset > 0 {
		return 0, fmt.Errorf("Seeking past EOF to %v not supported", offset)
	}
	if offset == 0 {
		// Optimize discarding everything (common case)
		r.buffer.Reset()
	} else {
		discard := int64(r.buffer.Len()) + offset
		for discard > 0 {
			chunkSize := discard % MaxInt
			r.buffer.Next(int(chunkSize))
			discard -= chunkSize
		}
	}
	return 0, nil
}
Ejemplo n.º 21
0
func tailHandlerWs(
	w http.ResponseWriter, r *http.Request, stream *wsutil.WebSocketStream) {
	args, err := ParseArguments(r)
	if err != nil {
		stream.Fatalf("Invalid arguments; %v", err)
		return
	}

	if err := sendRecent(stream, args); err != nil {
		stream.Fatalf("%v", err)
		return
	}

	d, err := drain.NewAppLogDrain(args.GUID)
	if err != nil {
		stream.Fatalf("Unable to create drain: %v", err)
		return
	}
	ch, err := d.Start()
	if err != nil {
		stream.Fatalf("Unable to start drain: %v", err)
	}

	err = stream.Forward(ch)
	if err != nil {
		log.Infof("%v", err)
		d.Stop(err)
	}

	// We expect drain.Wait to not block at this point.
	if err := d.Wait(); err != nil {
		if _, ok := err.(wsutil.WebSocketStreamError); !ok {
			log.Warnf("Error from app log drain server: %v", err)
		}
	}
}
Ejemplo n.º 22
0
func (s *FileStorage) Load(e interface{}) error {

	var err error
	if _, err = os.Stat(s.file_path); os.IsNotExist(err) {
		log.Infof("Creating %s since it does not exist", s.file_path)
		_, err = os.Create(s.file_path)

	} else {
		n, err := ioutil.ReadFile(s.file_path)
		if err != nil {

			log.Error(err)

		}
		p := bytes.NewBuffer(n)
		dec := gob.NewDecoder(p)
		err = dec.Decode(e)
		if err != nil {
			log.Error(err)

		}
	}
	return err
}
Ejemplo n.º 23
0
// SetState caches the given state of a process in redis.
func (s *StateCache) SetState(
	name string, state state.State, rev int64) {
	info := StateInfo(state.Info())
	info["rev"] = fmt.Sprintf("%d", rev)
	data, err := json.Marshal(info)
	if err != nil {
		log.Fatal(err)
	}

	allKey, thisKey := s.getKeys(name)

	log.Infof("[statecache] Caching state of %s", name)
	reply := s.Client.SAdd(allKey, s.Host)
	if err := reply.Err(); err != nil {
		log.Errorf("Unable to cache state of %s in redis; %v",
			name, err)
		return
	}
	reply2 := s.Client.Set(thisKey, string(data))
	if err := reply2.Err(); err != nil {
		log.Errorf("Unable to cache state of %s in redis; %v",
			name, err)
	}
}
Ejemplo n.º 24
0
func (instance *Instance) getLogFiles() map[string]string {
	var logfiles map[string]string

	rawMode := len(instance.LogFiles) > 0
	if rawMode {
		// If the logfiles list was explicitly passed, use it as is.
		logfiles = instance.LogFiles
	} else {
		// Use $STACKATO_LOG_FILES
		logfiles = make(map[string]string)
		if env, err := docker.GetDockerAppEnv(instance.RootPath); err != nil {
			log.Errorf("Failed to read docker image env: %v", err)
		} else {
			if s, ok := env["STACKATO_LOG_FILES"]; ok {
				parts := strings.Split(s, ":")
				if len(parts) > 7 {
					parts = parts[len(parts)-7 : len(parts)]
					instance.SendTimelineEvent("WARN -- $STACKATO_LOG_FILES is large; using only last 7 logs: %v", parts)
				}
				for _, f := range parts {
					parts := strings.SplitN(f, "=", 2)
					logfiles[parts[0]] = parts[1]
				}
			}
		}
	}

	// Expand paths, and securely ensure they fall within the app root.
	logfilesSecure := make(map[string]string)
	for name, path := range logfiles {
		var fullpath string

		// Treat relative paths as being relative to $STACKATO_APP_ROOT
		if !filepath.IsAbs(path) {
			stackatoAppRoot := "/home/stackato/"
			fullpath = filepath.Join(instance.RootPath, stackatoAppRoot, path)
		} else {
			fullpath = filepath.Join(instance.RootPath, path)
		}

		fullpath, err := filepath.Abs(fullpath)
		if err != nil {
			log.Warnf("Cannot find Abs of %v <join> %v: %v", instance.RootPath, path, err)
			instance.SendTimelineEvent("WARN -- Failed to find absolute path for %v", path)
			continue
		}
		fullpath, err = filepath.EvalSymlinks(fullpath)
		if err != nil {
			log.Infof("Error reading log file %v: %v", fullpath, err)
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		if !strings.HasPrefix(fullpath, instance.RootPath) {
			log.Warnf("Ignoring insecure log path %v (via %v) in instance %+v", fullpath, path, instance)
			// This user warning is exactly the same as above, lest we provide
			// a backdoor for a malicious user to list the directory tree on
			// the host.
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		logfilesSecure[name] = fullpath
	}

	if len(logfilesSecure) == 0 && !instance.DockerStreams {
		instance.SendTimelineEvent("ERROR -- No valid log files detected for tailing")
	}

	return logfilesSecure
}
Ejemplo n.º 25
0
func (m *StateMachine) Log(msg string, v ...interface{}) {
	log.Infof(m.process.Logf("[STM] "+msg, v...))
}
Ejemplo n.º 26
0
func routerAdvertise(m interface{}) {
	info := newRouterRegisterInfo()
	log.Infof("Advertising ourself to router: %+v (router.start? %+v)",
		info, m)
	NATS.Publish("router.register", info)
}
Ejemplo n.º 27
0
func (d *AppLogDrain) removeDrain() error {
	err := logyard.DeleteDrain(d.drainName)
	log.Infof("Removed drain %v", d.Id())
	return err
}