Beispiel #1
0
func (retry *ProgressiveRetryer) Wait(msg string) bool {
	var delay time.Duration

	// how long is the retry happening?
	retryDuration := time.Now().Sub(retry.firstRetry)

	// how long since the last retry?
	silenceDuration := time.Now().Sub(retry.lastRetry)

	if retry.firstRetry.IsZero() {
		// first retry; just do it without waiting.
		retry.reset()
		delay = 0
	} else if silenceDuration > RESET_AFTER {
		// reset retry stats if Wait was not called in the last 20
		// minutes (implying sufficiently successful period).
		retry.reset()
		delay = 0
	} else if retry.hasRetryLimit() && retryDuration > retry.retryLimit {
		// respect retryLimit
		log.Errorf("%s -- giving up after retrying for %v.", msg, retry.retryLimit)
		retry.reset()
		return false
	} else {
		switch {
		case retryDuration < time.Minute:
			// once every 5 seconds for 1 minute
			delay = 5 * time.Second
		case retryDuration < (1+5)*time.Minute:
			// once every 30 seconds for next 5 minutes
			delay = 30 * time.Second
		case retryDuration < (1+5+10)*time.Minute:
			// once every 1 minute for next 10 minutes
			delay = time.Minute
		default:
			// once every 5 minutes therein
			delay = 5 * time.Minute
		}
	}

	// Log the retry action
	if delay == 0 {
		log.Warnf("%s -- retrying now.", msg)
	} else {
		if retry.hasRetryLimit() {
			// If there is a retry limit -- which are the tmp. and
			// appdrain. drains -- this drain is to be considered
			// unimportant for the sys admins. So we do not generate
			// a WARN, thus putting it in cloud events.
			log.Infof("%s -- retrying after %v (max %v).", msg, delay, retry.retryLimit)
		} else {
			log.Warnf("%s -- retrying after %v.", msg, delay)
		}
	}

	time.Sleep(delay)
	retry.lastRetry = time.Now()
	return true
}
Beispiel #2
0
func (instance *Instance) tailStream(stream string, filename string, stopCh chan bool, tracker storage.Tracker) {
	var err error

	pub := logyard.Broker.NewPublisherMust()
	defer pub.Stop()

	limit, err := instance.getReadLimit(pub, stream, filename)
	if err != nil {
		log.Warn(err)
		instance.SendTimelineEvent("WARN -- %v", err)
		return
	}

	rateLimiter := GetConfig().GetLeakyBucket()

	reqUrl, err := url.Parse(fmt.Sprintf("http://localhost:4243/containers/%s/logs", instance.DockerId))
	if err != nil {
		log.Warn(err)
		return
	}
	q := reqUrl.Query()
	q.Set(stream, "true")
	q.Set("follow", "true")
	reqUrl.RawQuery = q.Encode()

	resp, err := http.Get(reqUrl.String())
	if err != nil {
		log.Warn(err)
		instance.SendTimelineEvent("WARN -- %v", err)
		return
	}

	if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusBadRequest {
		log.Warnf("HTTP error response %v from %v", resp.Status, reqUrl)
		return
	}

	t, err := tail.TailReader(util.WrapReadSeekClose(resp.Body), tail.Config{
		MaxLineSize: GetConfig().MaxRecordSize,
		MustExist:   false,
		Follow:      true,
		Location:    &tail.SeekInfo{-limit, os.SEEK_END},
		ReOpen:      false,
		Poll:        false,
		RateLimiter: rateLimiter})
	if err != nil {
		log.Warnf("Cannot tail docker stream (%s); %s", stream, err)
		instance.SendTimelineEvent("ERROR -- Cannot tail file (%s); %s", stream, err)
		return
	}

	instance.readFromTail(t, pub, stream, stopCh, filename, tracker)
}
Beispiel #3
0
func (instance *Instance) publishLineAs(pub *zmqpubsub.Publisher, source string, logname string, line *tail.Line) {
	if line == nil {
		panic("line is nil")
	}

	msg := &message.Message{
		LogFilename:   logname,
		Source:        source,
		InstanceIndex: instance.Index,
		AppGUID:       instance.AppGUID,
		AppName:       instance.AppName,
		AppSpace:      instance.AppSpace,
		MessageCommon: common.NewMessageCommon(line.Text, line.Time, util.LocalNodeId()),
	}

	if line.Err != nil {
		// Mark this as a special error record, as it is
		// coming from tail, not the app.
		msg.Source = fmt.Sprintf("%v[apptail]", util.GetBrandName())
		msg.LogFilename = ""
		log.Warnf("[%s] %s", instance.AppName, line.Text)
	}

	err := msg.Publish(pub, false)
	if err != nil {
		common.Fatal("Unable to publish: %v", err)
	}
}
Beispiel #4
0
// Fatalf sends the error back to the client, and closes the connection
func (s *WebSocketStream) Fatalf(format string, v ...interface{}) {
	data := &wsStreamData{fmt.Sprintf(format, v...), ""}
	err := s.send(data)
	if err != nil {
		log.Warnf("Error sending error back to websocket client: %v", err)
	}
	s.Close()
}
Beispiel #5
0
func (c *Config) GetLeakyBucket() *ratelimiter.LeakyBucket {
	rate := c.MaxLinesPerSecond
	if rate < 1 {
		log.Warnf("max_lines_per_second must be a positive integer; using default")
		rate = 100
	}

	burstSize := c.MaxLinesBurst
	if burstSize < 1 {
		log.Warnf("max_lines_burst must be a positive integer; using default")
		burstSize = 10000
	}

	interval := time.Duration(int64(time.Second) / rate)

	return ratelimiter.NewLeakyBucket(burstSize, interval)
}
Beispiel #6
0
func RegisterTailCleanup() {
	c := make(chan os.Signal, 1)
	signal.Notify(c, os.Interrupt, syscall.SIGTERM)
	for sig := range c {
		log.Warnf("captured signal %v; exiting after cleanup", sig)
		cleanup()
		os.Exit(1)
	}
}
Beispiel #7
0
func main() {
	c := NewCron(os.Args[1], os.Args[2], os.Args[3:len(os.Args)])
	go c.Start()

	ch := make(chan os.Signal, 1)
	signal.Notify(ch, syscall.SIGINT, syscall.SIGTERM)
	log.Warnf("%v", <-ch)

	c.Stop()
	os.Exit(1)
}
Beispiel #8
0
func main() {
	major, minor, patch := gozmq.Version()
	log.Infof("Starting logyard_sieve (zeromq %d.%d.%d)", major, minor, patch)

	LoadConfig()

	parser := sieve.NewStackatoParser(getConfig().Events)
	parser.DeleteSamples()

	pub := logyard.Broker.NewPublisherMust()
	defer pub.Stop()
	sub := logyard.Broker.Subscribe("systail")
	defer sub.Stop()

	server.MarkRunning("logyard_sieve")

	log.Info("Watching the systail stream on this node")
	for message := range sub.Ch {
		var record systail.Message
		err := json.Unmarshal([]byte(message.Value), &record)
		if err != nil {
			log.Warnf("failed to parse json: %s; ignoring record: %s",
				err, message.Value)
			continue
		}

		event, err := parser.Parse(record.Name, record.Text)
		if err != nil {
			log.Warnf(
				"failed to parse event from %s: %s -- source: %s",
				record.Name, err, record.Text)
			continue
		}
		if event != nil {
			event.MessageCommon = common.NewMessageCommon(
				event.Desc, time.Unix(record.UnixTime, 0), record.NodeID)
			event.MustPublish(pub)
		}

	}
}
Beispiel #9
0
func (instance *Instance) tailFile(name, filename string, stopCh chan bool, tracker storage.Tracker) {
	var err error
	var location *tail.SeekInfo
	var limit int64
	var shouldInitialize bool

	pub := logyard.Broker.NewPublisherMust()
	defer pub.Stop()

	if tracker.IsChildNodeInitialized(instance.getShortDockerId(), filename) {
		offset := tracker.GetFileCachedOffset(instance.getShortDockerId(), filename)
		location = &tail.SeekInfo{offset, os.SEEK_SET}
	} else {

		limit, err = instance.getReadLimit(pub, name, filename)
		location = &tail.SeekInfo{-limit, os.SEEK_END}
		shouldInitialize = true
	}

	if err != nil {
		log.Warn(err)
		instance.SendTimelineEvent("WARN -- %v", err)
		return
	}

	rateLimiter := GetConfig().GetLeakyBucket()

	t, err := tail.TailFile(filename, tail.Config{
		MaxLineSize: GetConfig().MaxRecordSize,
		MustExist:   true,
		Follow:      true,
		Location:    location,
		ReOpen:      false,
		Poll:        false,
		RateLimiter: rateLimiter})

	// IMPORTANT: this registration happens everytime app restarts
	if shouldInitialize {
		tracker.InitializeChildNode(instance.getShortDockerId(), filename, INITIAL_OFFSET)
	}

	if err != nil {
		log.Warnf("Cannot tail file (%s); %s", filename, err)
		instance.SendTimelineEvent("ERROR -- Cannot tail file (%s); %s", name, err)
		return
	}

	instance.readFromTail(t, pub, name, stopCh, filename, tracker)
}
Beispiel #10
0
func getDockerEvents(retries int) *http.Response {
	c := http.Client{}
	for attempt := 0; attempt < retries; attempt++ {
		res, err := c.Get(events_url)
		if err != nil {
			if (attempt + 1) == retries {
				log.Fatalf("Failed to read from docker daemon; giving up retrying: %v", err)
			}
			log.Warnf("Docker connection error (%v); retrying after 1 second.", err)
			time.Sleep(time.Second)
		} else {
			return res
		}
	}
	panic("unreachable")
}
Beispiel #11
0
// RemoveOrphanedDrains removes all drains created by applog_endpoint.
func RemoveOrphanedDrains() {
	// Note that this is tricky to do when horizontally scalling
	// applog_endpoint. Could be solved easily by using nodeID or ip
	// addr in the drain name.
	logyardConfig := logyard.GetConfig()
	for name, _ := range logyardConfig.Drains {
		if strings.HasPrefix(name, DRAIN_PREFIX) {
			log.Infof("Removing orphaned drain %v", name)
			err := logyard.DeleteDrain(name)
			if err != nil {
				log.Warnf("Failed to delete drain %v -- %v",
					name, err)
			}
		}
	}
}
Beispiel #12
0
func NewCron(schedule string, command string, args []string) *Cron {
	log.Infof("Running per schedule: %v", schedule)
	c := &Cron{cron.New(), &sync.WaitGroup{}}

	c.AddFunc(schedule, func() {
		c.wg.Add(1)

		log.Infof("Executing: %v %v", command, strings.Join(args, " "))
		err := execute(command, args)
		if err != nil {
			log.Warnf("Failed: %v", err)
		} else {
			log.Info("Succeeded")
		}
		c.wg.Done()
	})
	return c
}
Beispiel #13
0
func GetLiveDockerContainers(retries int) map[string]bool {
	allDockerIds := make(map[string]bool)
	var httpResByte []byte
	c := http.Client{}
	for attempt := 0; attempt < retries; attempt++ {
		res, err := c.Get(containers_url)
		defer res.Body.Close()
		if err != nil {
			if (attempt + 1) == retries {
				log.Fatalf("Failed to read from docker daemon; giving up retrying: %v", err)
			}
			log.Warnf("Docker connection error (%v); retrying after 1 second.", err)
			time.Sleep(time.Second)
		} else {
			httpResByte, err = ioutil.ReadAll(res.Body)
			if err != nil {
				log.Fatal(err)

			}
			var jsonData []Docker
			err = json.Unmarshal([]byte(httpResByte), &jsonData)

			if err != nil {
				log.Fatal(err)

			}

			for _, element := range jsonData {

				shortenedKey := element.Id[:ID_LENGTH]

				allDockerIds[shortenedKey] = true
			}

		}
	}
	return allDockerIds
}
Beispiel #14
0
func tailHandlerWs(
	w http.ResponseWriter, r *http.Request, stream *wsutil.WebSocketStream) {
	args, err := ParseArguments(r)
	if err != nil {
		stream.Fatalf("Invalid arguments; %v", err)
		return
	}

	if err := sendRecent(stream, args); err != nil {
		stream.Fatalf("%v", err)
		return
	}

	d, err := drain.NewAppLogDrain(args.GUID)
	if err != nil {
		stream.Fatalf("Unable to create drain: %v", err)
		return
	}
	ch, err := d.Start()
	if err != nil {
		stream.Fatalf("Unable to start drain: %v", err)
	}

	err = stream.Forward(ch)
	if err != nil {
		log.Infof("%v", err)
		d.Stop(err)
	}

	// We expect drain.Wait to not block at this point.
	if err := d.Wait(); err != nil {
		if _, ok := err.(wsutil.WebSocketStreamError); !ok {
			log.Warnf("Error from app log drain server: %v", err)
		}
	}
}
Beispiel #15
0
func (instance *Instance) getLogFiles() map[string]string {
	var logfiles map[string]string

	rawMode := len(instance.LogFiles) > 0
	if rawMode {
		// If the logfiles list was explicitly passed, use it as is.
		logfiles = instance.LogFiles
	} else {
		// Use $STACKATO_LOG_FILES
		logfiles = make(map[string]string)
		if env, err := docker.GetDockerAppEnv(instance.RootPath); err != nil {
			log.Errorf("Failed to read docker image env: %v", err)
		} else {
			if s, ok := env["STACKATO_LOG_FILES"]; ok {
				parts := strings.Split(s, ":")
				if len(parts) > 7 {
					parts = parts[len(parts)-7 : len(parts)]
					instance.SendTimelineEvent("WARN -- $STACKATO_LOG_FILES is large; using only last 7 logs: %v", parts)
				}
				for _, f := range parts {
					parts := strings.SplitN(f, "=", 2)
					logfiles[parts[0]] = parts[1]
				}
			}
		}
	}

	// Expand paths, and securely ensure they fall within the app root.
	logfilesSecure := make(map[string]string)
	for name, path := range logfiles {
		var fullpath string

		// Treat relative paths as being relative to $STACKATO_APP_ROOT
		if !filepath.IsAbs(path) {
			stackatoAppRoot := "/home/stackato/"
			fullpath = filepath.Join(instance.RootPath, stackatoAppRoot, path)
		} else {
			fullpath = filepath.Join(instance.RootPath, path)
		}

		fullpath, err := filepath.Abs(fullpath)
		if err != nil {
			log.Warnf("Cannot find Abs of %v <join> %v: %v", instance.RootPath, path, err)
			instance.SendTimelineEvent("WARN -- Failed to find absolute path for %v", path)
			continue
		}
		fullpath, err = filepath.EvalSymlinks(fullpath)
		if err != nil {
			log.Infof("Error reading log file %v: %v", fullpath, err)
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		if !strings.HasPrefix(fullpath, instance.RootPath) {
			log.Warnf("Ignoring insecure log path %v (via %v) in instance %+v", fullpath, path, instance)
			// This user warning is exactly the same as above, lest we provide
			// a backdoor for a malicious user to list the directory tree on
			// the host.
			instance.SendTimelineEvent("WARN -- Ignoring missing/inaccessible path %v", path)
			continue
		}
		logfilesSecure[name] = fullpath
	}

	if len(logfilesSecure) == 0 && !instance.DockerStreams {
		instance.SendTimelineEvent("ERROR -- No valid log files detected for tailing")
	}

	return logfilesSecure
}