Beispiel #1
0
func (r *ReadSeekCloseWrapper) Drain() {
	var err, err2 error
	buf := make([]byte, 0x10000)
	for {
		var count int
		count, err = r.reader.Read(buf)
		if count > 0 {
			_, err2 = r.buffer.Write(buf[:count])
		}
		if err != nil && err != io.EOF && err != io.ErrClosedPipe {
			log.Errorf("Error reading stream %v: %v", r.reader, err)
		}
		if err2 != nil {
			log.Errorf("Error writing buffer: %v: %v", r.buffer, err2)
		}
		if err != nil || err2 != nil {
			break
		}
		if r.buffer.Len() > 0 {
			r.available.L.Lock()
			r.available.Broadcast()
			r.available.L.Unlock()
		}
	}
	log.Infof("Read complete (error %v/%v)", err, err2)
}
Beispiel #2
0
func (d *AppLogDrain) Stop(reason error) {
	log.Infof("Stopping drain %s for reason: %v", d.Id(), reason)
	if err := d.removeDrain(); err != nil {
		log.Errorf("Failed to remove drain %v: %v", d.Id(), err)
	}
	d.srv.Kill(reason)
}
Beispiel #3
0
func (retry *ProgressiveRetryer) Wait(msg string) bool {
	var delay time.Duration

	// how long is the retry happening?
	retryDuration := time.Now().Sub(retry.firstRetry)

	// how long since the last retry?
	silenceDuration := time.Now().Sub(retry.lastRetry)

	if retry.firstRetry.IsZero() {
		// first retry; just do it without waiting.
		retry.reset()
		delay = 0
	} else if silenceDuration > RESET_AFTER {
		// reset retry stats if Wait was not called in the last 20
		// minutes (implying sufficiently successful period).
		retry.reset()
		delay = 0
	} else if retry.hasRetryLimit() && retryDuration > retry.retryLimit {
		// respect retryLimit
		log.Errorf("%s -- giving up after retrying for %v.", msg, retry.retryLimit)
		retry.reset()
		return false
	} else {
		switch {
		case retryDuration < time.Minute:
			// once every 5 seconds for 1 minute
			delay = 5 * time.Second
		case retryDuration < (1+5)*time.Minute:
			// once every 30 seconds for next 5 minutes
			delay = 30 * time.Second
		case retryDuration < (1+5+10)*time.Minute:
			// once every 1 minute for next 10 minutes
			delay = time.Minute
		default:
			// once every 5 minutes therein
			delay = 5 * time.Minute
		}
	}

	// Log the retry action
	if delay == 0 {
		log.Warnf("%s -- retrying now.", msg)
	} else {
		if retry.hasRetryLimit() {
			// If there is a retry limit -- which are the tmp. and
			// appdrain. drains -- this drain is to be considered
			// unimportant for the sys admins. So we do not generate
			// a WARN, thus putting it in cloud events.
			log.Infof("%s -- retrying after %v (max %v).", msg, delay, retry.retryLimit)
		} else {
			log.Warnf("%s -- retrying after %v.", msg, delay)
		}
	}

	time.Sleep(delay)
	retry.lastRetry = time.Now()
	return true
}
Beispiel #4
0
// Clear clears the cache associated with the given process and
// current host.
func (s *StateCache) Clear(name string) {
	log.Infof("[statecache] Clearing state of %s", name)
	allKey, thisKey := s.getKeys(name)

	// Note that redis automatically deletes the SET if it will be
	// empty after an SREM.
	reply := s.Client.SRem(allKey, s.Host)
	if err := reply.Err(); err != nil {
		log.Errorf("Unable to clear state cache of %s in redis; %v",
			name, err)
	}

	reply2 := s.Client.Del(thisKey)
	if err := reply2.Err(); err != nil {
		log.Errorf("Unable to clear state cache of %s in redis; %v",
			name, err)
	}
}
Beispiel #5
0
// SetState caches the given state of a process in redis.
func (s *StateCache) SetState(
	name string, state state.State, rev int64) {
	info := StateInfo(state.Info())
	info["rev"] = fmt.Sprintf("%d", rev)
	data, err := json.Marshal(info)
	if err != nil {
		log.Fatal(err)
	}

	allKey, thisKey := s.getKeys(name)

	log.Infof("[statecache] Caching state of %s", name)
	reply := s.Client.SAdd(allKey, s.Host)
	if err := reply.Err(); err != nil {
		log.Errorf("Unable to cache state of %s in redis; %v",
			name, err)
		return
	}
	reply2 := s.Client.Set(thisKey, string(data))
	if err := reply2.Err(); err != nil {
		log.Errorf("Unable to cache state of %s in redis; %v",
			name, err)
	}
}
Beispiel #6
0
func main() {
	go common.RegisterTailCleanup()

	major, minor, patch := gozmq.Version()
	log.Infof("Starting systail (zeromq %d.%d.%d)", major, minor, patch)

	systail.LoadConfig()

	nodeid, err := server.LocalIP()
	if err != nil {
		common.Fatal("Failed to determine IP addr: %v", err)
	}
	log.Info("Host IP: ", nodeid)

	tailers := []*tail.Tail{}

	logFiles := systail.GetConfig().LogFiles

	fmt.Printf("%+v\n", logFiles)
	if len(logFiles) == 0 {
		common.Fatal("No log files exist in configuration.")
	}

	for name, logfile := range logFiles {
		t, err := tailLogFile(name, logfile, nodeid)
		if err != nil {
			common.Fatal("%v", err)
		}
		tailers = append(tailers, t)
	}

	server.MarkRunning("systail")

	for _, tail := range tailers {
		err := tail.Wait()
		if err != nil {
			log.Errorf("Cannot tail [%s]: %s", tail.Filename, err)
		}
	}

	// we don't expect any of the tailers to exit with or without
	// error.
	log.Error("No file left to tail; exiting.")
	os.Exit(1)
}
Beispiel #7
0
// Publish publishes an AppLogMessage to logyard after sanity checks.
func (msg *Message) Publish(pub *zmqpubsub.Publisher, allowInvalidJson bool) error {
	// JSON must be a UTF-8 encoded string.
	if !utf8.ValidString(msg.Text) {
		msg.Text = string([]rune(msg.Text))
	}

	data, err := json.Marshal(msg)
	if err != nil {
		if allowInvalidJson {
			log.Errorf("Cannot encode %+v into JSON -- %s. Skipping this message", msg, err)
		} else {
			return fmt.Errorf("Failed to encode app log record to JSON: ", err)
		}
	}
	key := fmt.Sprintf("apptail.%v", msg.AppGUID)
	pub.MustPublish(key, string(data))
	return nil
}
Beispiel #8
0
func (retry *NoopRetryer) Wait(msg string) bool {
	log.Errorf("%s -- never retrying.", msg)
	return false
}
Beispiel #9
0
func (instance *Instance) getLogFiles() map[string]string {
	var logfiles map[string]string

	rawMode := len(instance.LogFiles) > 0
	if rawMode {
		// If the logfiles list was explicitly passed, use it as is.
		logfiles = instance.LogFiles
	} else {
		// Use $STACKATO_LOG_FILES
		logfiles = make(map[string]string)
		if env, err := docker.GetDockerAppEnv(instance.RootPath); err != nil {
			log.Errorf("Failed to read docker image env: %v", err)
		} else {
			if s, ok := env["STACKATO_LOG_FILES"]; ok {
				parts := strings.Split(s, ":")
				if len(parts) > 7 {
					parts = parts[len(parts)-7 : len(parts)]
					instance.SendTimelineEvent("WARN -- $STACKATO_LOG_FILES is large; using only last 7 logs: %v", parts)
				}
				for _, f := range parts {
					parts := strings.SplitN(f, "=", 2)
					logfiles[parts[0]] = parts[1]
				}
			}
		}
	}

	// Expand paths, and securely ensure they fall within the app root.
	logfilesSecure := make(map[string]string)
	for name, path := range logfiles {
		var fullpath string

		// Treat relative paths as being relative to $STACKATO_APP_ROOT
		if !filepath.IsAbs(path) {
			stackatoAppRoot := "/home/stackato/"
			fullpath = filepath.Join(instance.RootPath, stackatoAppRoot, path)
		} else {
			fullpath = filepath.Join(instance.RootPath, path)
		}

		fullpath, err := filepath.Abs(fullpath)
		if err != nil {
			log.Warnf("Cannot find Abs of %v <join> %v: %v", instance.RootPath, path, err)
			instance.SendTimelineEvent("WARN -- Failed to find absolute path for %v", path)
			continue
		}
		fullpath, err = filepath.EvalSymlinks(fullpath)
		if err != nil {
			log.Infof("Error reading log file %v: %v", fullpath, err)
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		if !strings.HasPrefix(fullpath, instance.RootPath) {
			log.Warnf("Ignoring insecure log path %v (via %v) in instance %+v", fullpath, path, instance)
			// This user warning is exactly the same as above, lest we provide
			// a backdoor for a malicious user to list the directory tree on
			// the host.
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		logfilesSecure[name] = fullpath
	}

	if len(logfilesSecure) == 0 && !instance.DockerStreams {
		instance.SendTimelineEvent("ERROR -- No valid log files detected for tailing")
	}

	return logfilesSecure
}