func sigaction__reopen_logs(sig os.Signal) { log.Infof("got %s, reopening logfile: %s", SignalName(sig), logPath) if err := reopenLogfile(logPath, logLevel); err != nil { log.Errorf("can't reopen log file: %s", err) } log.Infof("sigaction__reopen_logs: new log opened: %s", logPath) }
func sigaction__graceful_restart(sig os.Signal) { log.Infof("got %s, restarting gracefully", SignalName(sig)) if err := InitiateRestart(); err != nil { log.Errorf("can't initiate restart: %s", err) } }
func wait_for_signals() ExitMethod { c := make(chan os.Signal, 5) signal.Notify(c, syscall.SIGTERM, syscall.SIGQUIT, syscall.SIGUSR1, syscall.SIGINT, syscall.SIGUSR2) for { select { case sig := <-c: switch sig { case syscall.SIGTERM, syscall.SIGINT: sigaction__graceful_shutdown(sig) return EXIT_IMMEDIATELY case syscall.SIGUSR1: if RestartInprogress() == false { // FIXME: why ingore log reopen ? sigaction__reopen_logs(sig) } else { log.Infof("service is restarting. ignoring %s", SignalName(sig)) } case syscall.SIGUSR2: if RestartInprogress() == false { sigaction__graceful_restart(sig) } else { log.Infof("service is restarting. ignoring %s", SignalName(sig)) } case syscall.SIGQUIT: if RestartInprogress() == false { // not a restart sequence, someone's just sent us SIGQUIT sigaction__graceful_shutdown(sig) return EXIT_IMMEDIATELY } FinalizeRestartWithSuccess() return EXIT_GRACEFULLY } case wp := <-RestartChildWaitChan(): FinalizeRestartWithError(wp) case <-RestartChildTimeoutChan(): FinalizeRestartWithTimeout() } } }
func WriteLogsThread(filename string) { log.Infof("Started write logs thread to file=%s", filename) reopenTick := time.Tick(time.Second * 10) fp, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) var wr *bufio.Writer if err != nil { log.Errorf("Could not open %s: %s", filename, err.Error()) } else { wr = bufio.NewWriterSize(fp, 65536) } for { select { case <-reopenTick: if fp != nil { fp.Close() } fp, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666) if err != nil { log.Warnf("Could not reopen %s: %s", err.Error()) wr = nil fp = nil } else { wr = bufio.NewWriterSize(fp, 65536) } case ev := <-rqhLog: l := len(rqhLog) evs := make([]*FinishResult, 0, l+1) evs = append(evs, ev) for i := 0; i < l; i++ { evs = append(evs, <-rqhLog) } if wr != nil { encoder := json.NewEncoder(wr) for _, e := range evs { if err = encoder.Encode(e); err != nil { log.Errorf("Could not write to %s: %s", filename, err.Error()) } } if err = wr.Flush(); err != nil { log.Errorf("Could not flush contents to %s: %s", filename, err.Error()) } } else { log.Errorf("Failed to write %d events to rqh log because file %s could not be opened", len(evs), filename) } } } }
func getOrCreateDispatcherThread(className, location string) *DispatcherData { dispatchThreads.Lock() defer dispatchThreads.Unlock() el, ok := dispatchThreads.v[className] if !ok { el = make(map[string]*DispatcherData) dispatchThreads.v[className] = el } dt, ok := el[location] if !ok { dt = &DispatcherData{ className: className, location: location, newJobsCh: make(chan *NewJobs, 1), redispatchCh: make(chan bool, 1), rusageCh: make(chan *ScriptRusageEntry, 1), finishCh: make(chan *FinishEvent, 1), zeroTTCh: make(chan bool, 1), allJobsCh: make(chan []*TimetableEntry, 1), debugCh: make(chan *DebugPrintRequest, 1), killRequestCh: make(chan *KillRequest, 1), jobsCountCh: make(chan *JobsCountRequest, 1), tickCheckDeletedCh: make(chan bool, 1), } log.Infof("Creating a goroutine for class=%s and location=%s", className, location) el[location] = dt go dt.dispatchThread() } return dt }
func getOrCreateLauncherThread(hostname string) *LauncherData { launcherThreads.Lock() defer launcherThreads.Unlock() dt, ok := launcherThreads.v[hostname] if !ok { dt = &LauncherData{ hostname: hostname, waitingCh: make(chan *RunQueueEntry, 100), allRowsCh: make(chan []*RunQueueEntry, 1), debugCh: make(chan *LauncherDebugPrintRequest, 1), updateStatusRequestCh: make(chan *LauncherUpdateStatusRequest, 5), logFinishRequestCh: make(chan *LauncherLogFinishRequest, 5), } log.Infof("Creating a launcher goroutine for hostname=%s", hostname) launcherThreads.v[hostname] = dt go dt.mainThread() } return dt }
func sigaction__graceful_shutdown(sig os.Signal) { log.Infof("got %s, shutting down", SignalName(sig)) }
func (server *Server) serveConnection(conn net.Conn) { log.Debugf("accepted connection: %s -> %s", conn.LocalAddr(), conn.RemoteAddr()) defer func() { atomic.AddUint64(&server.Stats.ConnCur, ^uint64(0)) // decrements the value, ref: http://golang.org/pkg/sync/atomic/#AddUint64 log.Debugf("closing connection: %s -> %s", conn.LocalAddr(), conn.RemoteAddr()) if server.onDisconnect != nil { server.onDisconnect(RequestT{ Server: server, Conn: conn, }) } conn.Close() }() atomic.AddUint64(&server.Stats.ConnCur, 1) atomic.AddUint64(&server.Stats.ConnTotal, 1) for { // need these as var here, since goto might jump over their definition below var ( result ResultT request RequestT startTime time.Time pinbaReq pinba.Request requestTime time.Duration ) msgid, msg, bytes_read, status, err := server.Codec.ReadRequest(server.Proto, conn) if err != nil { if status == ConnEOF { // connection was closed gracefully from client side break } if status == ConnOK { // misunderstanding on protobuf level result = server.Proto.ErrorGeneric(err) // FIXME(antoxa): remove this goto, it's annoying to code around goto write_response } // IO error or similar log.Infof("aborting connection: %v", err) break } // FIXME(antoxa): stats will not be incremented when ReadRequest returns err != nil and status == ConnOK atomic.AddUint64(&server.Stats.BytesRead, uint64(bytes_read)) atomic.AddUint64(&server.Stats.Requests, 1) request = RequestT{ Server: server, Conn: conn, RequestId: atomic.AddUint64(&globalRequestId, 1), MessageId: msgid, Message: msg, PinbaReq: &pinbaReq, } if msgid < MaxMsgID { atomic.AddUint64(&server.Stats.RequestsIdStat[msgid], 1) } startTime = time.Now() result = server.Proto.Dispatch(request, server.Handler) requestTime = time.Since(startTime) // log slow request if needed if server.slowRequestTime > 0 && server.slowRequestTime <= requestTime { if msg != nil { requestInfo := func() string { reqName := MapRequestIdToName(server.Proto, msgid) body, err := json.Marshal(msg) if err != nil { return fmt.Sprintf("%s %v", reqName, err) } return fmt.Sprintf("%s %s", reqName, body) }() log.Warnf("slow request (%d ms): %s; addr: %s <- %s", requestTime/time.Millisecond, requestInfo, conn.LocalAddr(), conn.RemoteAddr()) } } server.sendToPinba(&pinbaReq, server.pinbaReqNames[msgid], requestTime) write_response: if result.Action == ACTION_RESULT { writeln, err := server.Codec.WriteResponse(server.Proto, conn, result.Message) if err != nil { // write error: can't recover log.Infof("unrecoverable error while writing: %v", err) break } atomic.AddUint64(&server.Stats.BytesWritten, uint64(writeln)) } if result.Action == ACTION_CLOSE { break } } }
func Initialize(default_config_path string, service_conf Config) { flag.StringVar(&flags.ConfFile, "c", default_config_path, "path to config file") flag.StringVar(&flags.LogFile, "l", "", "path to log file, special value '-' means 'stdout'") flag.StringVar(&flags.PidFile, "p", "", "path to pid file. if empty, pidfile will not be created") flag.BoolVar(&flags.Testconf, "t", false, "test configuration and exit") flag.BoolVar(&flags.Version, "v", false, "print version") flag.BoolVar(&flags.FullVersion, "V", false, "print full version info") flag.BoolVar(&flags.Debug, "debug", false, "force DEBUG log level") flag.Parse() if flags.Version { fmt.Printf("%s\n", VersionInfo.GetVersion()) os.Exit(0) } if flags.FullVersion { data, _ := json.MarshalIndent(VersionInfo, "", " ") fmt.Printf("%s\n", data) os.Exit(0) } var err error config = service_conf // save a pointer to service's config (NOT a copy, mon!) commandLine = strings.Join(os.Args, " ") // XXX(antoxa): couldn't think of a better way hostname = getHostname() // get hostname early // moved here from init(), just importing a package should not publish expvars initExpvars() // current executable full path (symlinks and shit sometimes complicate things) binaryPath = func() string { path, err := osutil.GetCurrentBinary() if err != nil { // log as notice, non-critical error (only stats affected) log.Infof("couldn't get current binary (using argv[0] = %q): %v", os.Args[0], err) return os.Args[0] } return path }() // config path confPath := func() string { if flags.ConfFile != "" { return flags.ConfFile } return default_config_path }() // resolve absolute config path, convenient for stats configPath = func(path string) string { var err error if path, err = filepath.Abs(path); err != nil { return path } if path, err = filepath.EvalSymlinks(path); err != nil { return path } return path }(confPath) // parse config and construct final config merged with command line flags // use path as supplied to us in args (i.e. unresolved), just to avoid 'too smart, outsmarted yourself' gotchas err = ParseConfigFromFile(confPath, service_conf) if err != nil { err_message := func(err error) string { switch real_err := err.(type) { case nil: return "syntax is ok" case *json.SyntaxError: return fmt.Sprintf("%v at offset %d", real_err, real_err.Offset) case *os.PathError: return fmt.Sprintf("%v", real_err) default: return fmt.Sprintf("(%T) %v", real_err, real_err) } }(err) stderrLogger.Fatalf("Error in config: %s", err_message) } else { if flags.Testconf { fmt.Printf("testconf %s: syntax is ok\n", configPath) } } mergeCommandlineFlagsToConfig(flags, config) daemonConfig := config.GetDaemonConfig() // antoxa: need the fancy wrapper function to have testconf behave properly // FIXME: testconf should check more stuff (below) and graceful restart also initPidfileLogfile := func() error { // FIXME(antoxa): this testconf thingy is everywhere! must... resist... full rewrite if flags.Testconf { err = PidfileTest(daemonConfig.GetPidFile()) } else { pidfile, err = PidfileOpen(daemonConfig.GetPidFile()) } if err != nil { return fmt.Errorf("can't open pidfile: %s", err) } // FIXME: this is shit ugly // need better integration between logger and daemon-config // or 1-to-1 mapping // or better log package :) log_level := daemonConfig.GetLogLevel() if log_level == 0 { return fmt.Errorf("unknown log_level, supported: %v", badoo_config.ServiceConfigDaemonConfigTLogLevels_name) } err = reopenLogfile(daemonConfig.GetLogFile(), log.Level(log_level)) if err != nil { return fmt.Errorf("can't open logfile: %s", err) } return nil } err = initPidfileLogfile() if err != nil { if flags.Testconf { stderrLogger.Errorf("%v", err) fmt.Printf("testconf failed\n") } else { stderrLogger.Errorf("%v", err) // always pidfile/logfile errors to stderr } os.Exit(1) } else { if flags.Testconf { fmt.Printf("testconf successful\n") os.Exit(0) } } // log some version info like libangel does versionString := func() string { vi := &VersionInfo version := func() string { if vi.GetAutoBuildTag() != "" { return fmt.Sprintf("%s-%s", vi.GetVersion(), vi.GetAutoBuildTag()) } else { return vi.GetVersion() } }() return fmt.Sprintf("%s version %s, git %s, built %s on %s", vi.GetVcsBasename(), version, vi.GetVcsShortHash(), vi.GetBuildDate(), vi.GetBuildHost()) }() log.Infof("%s", versionString) // max cpus, 0 = all of them numCPU := func() int { maxCpus := int(daemonConfig.GetMaxCpus()) if maxCpus <= 0 || maxCpus > runtime.NumCPU() { maxCpus = runtime.NumCPU() } return maxCpus }() runtime.GOMAXPROCS(numCPU) // gc percent, <0 - disables GC if daemonConfig.GcPercent != nil { debug.SetGCPercent(int(daemonConfig.GetGcPercent())) } // process pinba configuration and related stuff pinbaSender, err = func() (*PinbaSender, error) { // assigns a global if daemonConfig.GetPinbaAddress() == "" { return nil, nil // user doesn't want pinba configured } pi, err := PinbaInfoFromConfig(config) if err != nil { return nil, err } return NewPinbaSender(pi), nil }() if err != nil { log.Fatalf("pinba config error: %v", err) } // graceful restart handling // see restart.go and signals.go for more details restartData, err = ParseRestartDataFromEnv() if err != nil { log.Fatalf("can't parse restart data: %v", err) } if restartData != nil { log.Debugf("[CHILD] this is a restart, parent: %d, me: %d", restartData.PPid, os.Getpid()) } // start http pprof server (possibly - inherit fd from parent if this is a restart) err = func() (err error) { HttpServer, err = newHttpServer(config, restartData) // assigning a global here if err != nil { return err } if HttpServer != nil { // nil here means it has not been configured go HttpServer.Serve() } return nil }() if err != nil { log.Fatalf("can't start http_pprof server: %v", err) } }
// Call this when you want to start your servers and stuff func EventLoop(ports []Port) { defer log.Debug("exiting") initPhaseDuration = time.Since(startupTime) daemonConfig := config.GetDaemonConfig() // service-stats ports ports = append(ports, GpbPort("service-stats-gpb", stats_ctx, badoo_service.Gpbrpc)) ports = append(ports, JsonPort("service-stats-gpb/json", stats_ctx, badoo_service.Gpbrpc)) // build map of ports and do some sanity checks ph := make(map[string]*Port) for i := 0; i < len(ports); i++ { p := &ports[i] ph[p.Name] = p // json and gpb ports should have the same context // so try and warn user about passing plain values in (as it makes a copy) if reflect.ValueOf(p.Handler).Kind() != reflect.Ptr { log.Infof("port[%d].Handler should be a pointer (you want gpbs and json to use the same context, right?) (now: %T)", i, p.Handler) } } getRestartSocket := func(rcd *RestartChildData, portName, portAddr string) (*RestartSocket, *os.File) { if rcd == nil { return nil, nil } restartSocket, exists := rcd.GpbrpcSockets[portName] if exists == false { return nil, nil } restartFile := os.NewFile(restartSocket.Fd, "") if restartSocket.Address != portAddr { return nil, restartFile } return &restartSocket, restartFile } // start 'em all for _, lcf := range daemonConfig.GetListen() { portName, portAddr := lcf.GetProto(), lcf.GetAddress() port := ph[portName] if nil == port { log.Warnf("ignoring unknown port: %s at %s", portName, portAddr) continue } if port.IsStarted { log.Warnf("ignoring double startup for port: %s at %s", portName, portAddr) } listener, err := func() (listener net.Listener, err error) { // it's important that this should be a function, see defer inside restartSocket, restartFile := getRestartSocket(restartData, portName, portAddr) // this whole fd/file affair is very inconvenient to // since when getRestartSocket() returns fd - it can't close it yet, as it can be used by FileListener defer restartFile.Close() if restartSocket == nil { listener, err = net.Listen("tcp", portAddr) if err != nil { log.Errorf("listen failed for server %s at %s: %s", portName, portAddr, err) return } log.Infof("port %s bound to address %s", portName, listener.Addr()) } else { listener, err = net.FileListener(restartFile) // this dup()-s if err != nil { log.Errorf("failed to grab parent fd %d for %s at %s: %s", restartSocket.Fd, portName, portAddr, err) return } log.Infof("port %s bound to address %s (parent fd: %d)", portName, listener.Addr(), restartSocket.Fd) } return }() if err != nil { os.Exit(1) } // enable pinba only for ports that explicitly request it ps := func() gpbrpc.PinbaSender { if !lcf.GetPinbaEnabled() { return nil // explicit nil here } if pinbaSender == nil { log.Warnf("pinba is not configured, but pinba_enabled IS set for port %s: %s", portName, portAddr) return nil // explicit nil here } log.Infof("pinba configured for port %s:%s -> %s", portName, portAddr, pinbaSender.Address) return pinbaSender }() // slow request log time slowRequestTime := time.Duration(daemonConfig.GetSlowRequestMs()) * time.Millisecond srv := &Server{ Name: lcf.GetProto(), Address: lcf.GetAddress(), Server: gpbrpc.NewServer(listener, port.Proto, port.Codec, port.Handler, ps, slowRequestTime), } go srv.Server.Serve() port.IsStarted = true StartedServers[port.Name] = srv // save it for laterz } // kill parent if this is a child of graceful restart if restartData != nil { syscall.Kill(restartData.PPid, syscall.SIGQUIT) } log.Infof("entering event loop") exitMethod := wait_for_signals() if exitMethod == EXIT_GRACEFULLY { // wait for established connections to close and then die // FIXME: should stop servers from accepting new connections here! const ATTEMPTS_PER_SEC = 2 maxAttempts := daemonConfig.GetParentWaitTimeout() * ATTEMPTS_PER_SEC for i := uint32(0); i < maxAttempts; i++ { for _, srv := range StartedServers { currConn := atomic.LoadUint64(&srv.Server.Stats.ConnCur) if currConn > 0 { log.Debugf("%s still has %d connections", srv.Name, currConn) time.Sleep(time.Second / ATTEMPTS_PER_SEC) } } } } else { // do nothing for EXIT_IMMEDIATELY } // doing cleanups here // XXX: can this be moved to defer at the start of this function? if pidfile != nil { pidfile.CloseAndRemove() } }