예제 #1
0
파일: logwriter.go 프로젝트: badoo/thunder
func WriteLogsThread(filename string) {
	log.Infof("Started write logs thread to file=%s", filename)

	reopenTick := time.Tick(time.Second * 10)

	fp, err := os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
	var wr *bufio.Writer

	if err != nil {
		log.Errorf("Could not open %s: %s", filename, err.Error())
	} else {
		wr = bufio.NewWriterSize(fp, 65536)
	}

	for {
		select {
		case <-reopenTick:
			if fp != nil {
				fp.Close()
			}

			fp, err = os.OpenFile(filename, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0666)
			if err != nil {
				log.Warnf("Could not reopen %s: %s", err.Error())
				wr = nil
				fp = nil
			} else {
				wr = bufio.NewWriterSize(fp, 65536)
			}
		case ev := <-rqhLog:
			l := len(rqhLog)
			evs := make([]*FinishResult, 0, l+1)
			evs = append(evs, ev)

			for i := 0; i < l; i++ {
				evs = append(evs, <-rqhLog)
			}

			if wr != nil {
				encoder := json.NewEncoder(wr)

				for _, e := range evs {
					if err = encoder.Encode(e); err != nil {
						log.Errorf("Could not write to %s: %s", filename, err.Error())
					}
				}

				if err = wr.Flush(); err != nil {
					log.Errorf("Could not flush contents to %s: %s", filename, err.Error())
				}
			} else {
				log.Errorf("Failed to write %d events to rqh log because file %s could not be opened", len(evs), filename)
			}
		}

	}
}
예제 #2
0
파일: server.go 프로젝트: badoo/thunder
func (server *Server) Serve() {
	for {
		conn, err := server.Listener.Accept()
		if err != nil {

			if strings.Index(err.Error(), "use of closed network connection") != -1 {
				// this error happens after we've called listener.Close() in other goroutine
				return
			}

			log.Errorf("accept() failed: \"%s\", will sleep for %v before trying again", err, SleepAfterAcceptError)
			time.Sleep(SleepAfterAcceptError)

			continue
		}

		if server.onConnect != nil {
			server.onConnect(RequestT{
				Server: server,
				Conn:   conn,
			})
		}

		go server.serveConnection(conn)
	}
}
예제 #3
0
파일: launcher.go 프로젝트: badoo/thunder
func (d *LauncherData) processUpdateStatusRequest(req *LauncherUpdateStatusRequest) {
	var err error
	defer func() { req.errCh <- err }()

	el := d.allMap[req.RunId]

	if el == nil {
		err = fmt.Errorf("No such rq row id=%d", req.RunId)
		return
	}

	if el.RunStatus != req.PrevStatus {
		err = fmt.Errorf("Previous status mismatch for rq row id=%d: req.prev=%s, actual=%s", req.RunId, req.PrevStatus, el.RunStatus)
		return
	}

	err = db.DoInLazyTransaction(func(tx *db.LazyTrx) error {
		return updateRunStatus(tx, req.RunId, req.Status, req.PrevStatus)
	})

	if err != nil {
		log.Errorf("Could not update run status of run_id=%d to %s: %s", req.RunId, req.Status, err.Error())
		return
	}

	d.updateStatus(el, req.Status)
}
예제 #4
0
파일: launcher.go 프로젝트: badoo/thunder
func (d *LauncherData) processFinished() {
	var finishedIds []uint64
	var err error

	for run_id := range d.finishedMap {
		d.call(&badoo_phproxyd.RequestFree{Hash: proto.Uint64(run_id)})
		finishedIds = append(finishedIds, run_id)
	}

	if finishedIds == nil || len(finishedIds) == 0 {
		return
	}

	sort.Sort(common.UInt64Slice(finishedIds))

	err = db.DoInLazyTransaction(func(tx *db.LazyTrx) error {
		return deleteFromRunQueue(tx, finishedIds, RUN_STATUS_FINISHED)
	})

	if err != nil {
		log.Errorf("Could not delete rows from run queue for hostname=%s: %s", d.hostname, err.Error())
		return
	}

	for _, v := range d.finishedMap {
		d.delFromMaps(v.Id)
	}
}
예제 #5
0
파일: signals.go 프로젝트: badoo/thunder
func sigaction__graceful_restart(sig os.Signal) {
	log.Infof("got %s, restarting gracefully", SignalName(sig))

	if err := InitiateRestart(); err != nil {
		log.Errorf("can't initiate restart: %s", err)
	}
}
예제 #6
0
파일: restart.go 프로젝트: badoo/thunder
func (rctx *RestartContext) MovePidfileBack() (err error) {
	pidfile, err = currentRestart.Pidfile.MoveTo(pidfile.path)
	if err != nil {
		log.Errorf("can't move pidfile back: %v, %v", err, currentRestart.Pidfile)
	}

	return err
}
예제 #7
0
파일: vproc.go 프로젝트: badoo/thunder
func (vp *Vproc) Wait() (*os.ProcessState, error) {
	ps, err := vp.Cmd.Process.Wait()
	if err != nil {
		log.Errorf("%v\n", err)
		return ps, err
	}
	return ps, err
}
예제 #8
0
파일: vproc.go 프로젝트: badoo/thunder
func (vp *Vproc) Start() error {
	err := vp.Cmd.Start()
	if err != nil {
		log.Errorf("%v\n", err)
	}

	return err
}
예제 #9
0
파일: signals.go 프로젝트: badoo/thunder
func sigaction__reopen_logs(sig os.Signal) {
	log.Infof("got %s, reopening logfile: %s", SignalName(sig), logPath)

	if err := reopenLogfile(logPath, logLevel); err != nil {
		log.Errorf("can't reopen log file: %s", err)
	}

	log.Infof("sigaction__reopen_logs: new log opened: %s", logPath)
}
예제 #10
0
파일: client.go 프로젝트: badoo/thunder
func NewClient(address string, p Protocol, c ClientCodec, connect_timeout, request_timeout time.Duration) *Client {
	ips, err := dns.LookupHostPort(address)
	if err != nil {
		log.Errorf("dns.LookupHostPort() faield: %v", err)
		// FIXME(antoxa): just reusing existing ips here, which actually sucks
		//                this only works because cli.Call() uses net.Dial() which resolves the name again
	}

	canUseClient := func(client *Client) bool {
		// readLoop() might be modifying this conn
		// but don't really need to lock for ips comparison, since ips are never modified for existing client
		client.lk.Lock()
		defer client.lk.Unlock()

		// TODO(antoxa): can just use one ip for client and recheck not full equality
		//               but only if new ips contain old ip
		if !util.StrSliceEqual(client.ips, ips) {
			return false
		}

		if client.closed {
			return false
		}

		return true
	}

	const max_tries = 3 // arbitrary limit, i know

	for done_tries := 0; done_tries < max_tries; done_tries++ {
		client := Pcm.GetClient(address)
		if client == nil {
			break
		}

		if !canUseClient(client) {
			client.closeNoReuse()
			continue
		}

		log.Debugf("reused existing client %p for %s (after %d tries)", client, address, done_tries)
		return client
	}

	log.Debugf("creating new cli for %s", address)
	return &Client{
		address:         address,
		ips:             ips,
		Proto:           p,
		Codec:           c,
		connect_timeout: connect_timeout,
		request_timeout: request_timeout,
	}
}
예제 #11
0
파일: restart.go 프로젝트: badoo/thunder
func FinalizeRestartWithError(proc_status RestartProcStatus) {

	if proc_status.Err != nil {
		log.Errorf("couldn't collect state for child %d, %v", currentRestart.Child.Pid, proc_status.Err)
	}
	log.Warnf("child %d failed to start, collected %v", currentRestart.Child.Pid, proc_status.State)

	// not waiting for child, so have to release
	currentRestart.Child.Release()

	currentRestart.MovePidfileBack()
	currentRestart = nil
}
예제 #12
0
파일: client.go 프로젝트: badoo/thunder
func (client *Client) readLoop() {
	for {
		msgid, body, len, status, err := ReadGpbsPacket(client.conn)

		// FIXME(antoxa): add special checking for streaming gpbs responses (we don't support them :))

		client.lk.Lock() // no defer, but keep code in one place and save on creating lambda on every read
		if client.numExpectedResponses == 0 {
			if status == ConnOK {
				log.Errorf("unexpected read: %s -> %s: msgid %d, len: %d", client.conn.RemoteAddr(), client.conn.LocalAddr(), msgid, len)
			} else {
				log.Errorf("error on conn: %s -> %s, %v", client.conn.RemoteAddr(), client.conn.LocalAddr(), err)
			}
			client.closeNoReuseLocked()
			client.lk.Unlock()
			return
		}
		client.lk.Unlock()

		if status != ConnOK {
			client.closeNoReuse() // must be closed before channel communication happens

			client.respch <- response{0, nil, err}
			return
		}

		// do not timeout accidentally on next read/write
		client.conn.SetDeadline(time.Time{})

		// decrement the counter here
		// since otherwise we might read next message immediately and not react to it being unexpected
		client.lk.Lock()
		client.numExpectedResponses--
		client.lk.Unlock()

		client.respch <- response{msgid, body, nil}
	}
}
예제 #13
0
파일: servers.go 프로젝트: badoo/thunder
func updateHosts() {
	hosts, info, err := getAvailableHosts()
	if err != nil {
		log.Errorf("Could not get available hosts: %s", err.Error())
		return
	}

	rusageInfo.Lock()
	rusageInfo.groupHosts = hosts
	rusageInfo.hostsInfo = info
	rusageInfo.Unlock()

	// TODO: update max parrtos as well
}
예제 #14
0
파일: jobgen.go 프로젝트: badoo/thunder
func loadFullState(funcs ...*LoadStateFunc) (err error) {
	for _, funEntry := range funcs {
		startTs := time.Now().UnixNano()
		err = funEntry.fun()

		if err != nil {
			log.Errorf("Could not load %s: %s", funEntry.name, err.Error())
			return err
		}

		log.Debugf("Selected from %s for %.5f sec", funEntry.name, float64(time.Now().UnixNano()-startTs)/1e9)
	}

	return nil
}
예제 #15
0
파일: model.go 프로젝트: badoo/thunder
// method inserts rows into timetable and sets insert id
func addToTimetable(tx *db.LazyTrx, ttRows []*TimetableEntry) error {
	if len(ttRows) == 0 {
		return nil
	}

	values := make([]string, 0)

	for _, row := range ttRows {
		val := fmt.Sprintf(
			"('%s', %d, %s, '%s', %d, %s, %d, '%s', '%s', %d, FROM_UNIXTIME(%d), FROM_UNIXTIME(%d))",
			db.EscapeString(row.class_name),
			row.default_retry,
			db.QNullInt64(row.repeat).Data,
			row.method,
			row.finished_successfully,
			db.QNullInt64(row.generation_id).Data,
			row.settings_id,
			db.EscapeString(row.location),
			db.EscapeString(row.JobData),
			0,
			row.created,
			row.NextLaunchTs.Int64)

		values = append(values, val)
	}

	res, err := tx.Exec(QUERY_INSERT_INTO_TIMETABLE, "values", &db.RawData{Data: strings.Join(values, ", ")})
	if err != nil {
		return err
	}

	insId, err := res.LastInsertId()
	if err != nil {
		log.Errorf("Could not get insert id even though insert was successfull: %s", err.Error())
		return err
	}

	for _, row := range ttRows {
		row.id = uint64(insId)
		insId += autoIncrementIncrement
	}

	return nil
}
예제 #16
0
파일: model.go 프로젝트: badoo/thunder
func loadNewIds(newIds map[uint64]bool) error {
	if len(newIds) == 0 {
		return nil
	}

	var ids []string

	for id := range newIds {
		ids = append(ids, strconv.FormatUint(id, 10))
	}

	rows, err := db.Query(QUERY_GET_NEW_SETTINGS, "new_settings_ids", strings.Join(ids, ","))
	if err != nil {
		return err
	}

	for rows.Next() {
		entry := new(ScriptSettings)

		var (
			jobsStr           string
			nextTsCallbackStr sql.NullString
		)

		err = rows.Scan(
			&entry.id,
			&entry.class_name,
			&entry.instance_count,
			&entry.max_time,
			&jobsStr,
			&nextTsCallbackStr,
			&entry.repeat,
			&entry.retry,
			&entry.ttl,
			&entry.repeat_job,
			&entry.retry_job,
			&entry.location,
			&entry.location_type,
			&entry.developer,
			&entry.max_retries,
			&entry.profiling_enabled,
			&entry.debug_enabled,
			&entry.named_params,
			&entry.created)

		if err != nil {
			log.Errorf("Invalid settings: %s", err.Error())
			err = nil
			continue
		}

		entry.jobs, err = parseJobs(jobsStr)
		if err != nil {
			log.Errorf("Could not parse Jobs for %s #%d: %s", entry.class_name, entry.id, err.Error())
			err = nil
			continue
		}

		if nextTsCallbackStr.Valid {
			entry.have_next_ts_callback = true
			entry.next_ts_callback, err = parseNextTsCallback(nextTsCallbackStr.String)
			if err != nil {
				log.Errorf("Could not parse next ts callback for %s #%d: %s", entry.class_name, entry.id, err.Error())
				err = nil
				continue
			}
		} else {
			entry.have_next_ts_callback = false
		}

		if err != nil {
			log.Errorf("Scan error in loadNewIds: %s", err.Error())
			err = nil
			continue
		}

		allSettingsMutex.Lock()
		allSettings[entry.id] = entry
		allSettingsMutex.Unlock()
	}

	return nil
}
예제 #17
0
파일: restart.go 프로젝트: badoo/thunder
// initiate graceful restart process
//  *CAN NOT BE CALLED concurrently* as 'restart in progress' flag is not set immediately
func InitiateRestart() error {

	if RestartInprogress() {
		return fmt.Errorf("restart already inprogress")
	}

	// XXX: tried to move gathering childData into it's own function, hard to get closing all files right with just defer :(
	childData := &RestartChildData{
		PPid:          os.Getpid(),
		GpbrpcSockets: make(RestartSockets),
		files:         []*os.File{},
	}
	defer func() { // close dup()-d files on exit (needs to be before we start populating files list, in case of any errors)
		for _, file := range childData.files {
			file.Close()
		}
	}()

	addFd := func() func(addr string) RestartSocket {
		fdOffset := 3
		return func(addr string) RestartSocket {
			rs := RestartSocket{
				Address: addr,
				Fd:      uintptr(fdOffset),
			}
			fdOffset++
			return rs
		}
	}()

	if HttpServer != nil {
		dupFile, err := dupFdFromListener(HttpServer.Listener)
		if err != nil {
			return fmt.Errorf("can't export fd for http_pprof_addr, err: %v", err)
		}
		childData.files = append(childData.files, dupFile)
		childData.HttpPProfSocket = addFd(HttpServer.Addr)
	}

	for _, server := range StartedServers {
		dupFile, err := dupFdFromListener(server.Server.Listener)
		if err != nil {
			return fmt.Errorf("can't export fd for %s, err: %v", server.Name, err)
		}

		childData.files = append(childData.files, dupFile)
		childData.GpbrpcSockets[server.Name] = addFd(server.Address)
	}

	var tmpPidfile *Pidfile
	var err error

	// move parent's pidfile somewhere child won't start otherwise)
	if pidfile != nil && pidfile.path != "" {
		tmpPidfile, err = pidfile.MoveTo(pidfile.path + RESTART_PIDFILE_SUFFIX)
		if err != nil {
			return fmt.Errorf("can't move pidfile: %v", err)
		}

		// will need to move the pidfile back in case of any further errors
		defer func() {
			if err != nil && tmpPidfile != nil {
				var e1 error // want to reuse global pidfile below, not redefine it (and preserve original err to return it)

				pidfile, e1 = tmpPidfile.MoveTo(pidfile.path)
				if e1 != nil {
					log.Errorf("[you'll now work without pidfile] can't move pidfile back: %v", e1)
				}
			}
		}()
	}

	currentRestart, err = restartRunChild(childData)
	if err != nil {
		return err
	}

	currentRestart.Pidfile = tmpPidfile

	return nil
}
예제 #18
0
파일: jobgen.go 프로젝트: badoo/thunder
func doCycle() bool {
	var (
		jiRows         map[string]map[string]*JobInfoEntry
		scripts        map[string]*ScriptEntry
		flags          map[string]*FlagEntry
		scriptsRusage  map[string]*ScriptRusageEntry
		classLocTTRows map[string]map[string][]*TimetableEntry
	)

	unifiedStartTs := time.Now().UnixNano()

	startTs := time.Now().UnixNano()
	err := loadFullState(
		&LoadStateFunc{name: "Scripts", fun: func() (err error) { scripts, err = getGroupedScriptsForPlatform(); return }},
		&LoadStateFunc{name: "JobInfo", fun: func() (err error) { jiRows, err = getGroupedJobInfo(); return }},
		&LoadStateFunc{name: "Flags", fun: func() (err error) { flags, err = getFlags(); return }},
		&LoadStateFunc{name: "ScriptsRusage", fun: func() (err error) { scriptsRusage, err = getScriptRusageStats(); return }},
		&LoadStateFunc{name: "ScriptTimetable", fun: func() (err error) { classLocTTRows, err = selectTimetable(); return }})

	if err != nil {
		log.Errorf("Failed to select state in doCycle: %s", err.Error())
		return false
	}

	log.Debugf("Loaded for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9)

	startTs = time.Now().UnixNano()
	err = loadSettingsFromRows(jiRows, scripts)
	if err != nil {
		log.Errorf("Could not load settings from rows: %s", err.Error())
		return false
	}

	func() {
		allSettingsMutex.Lock()
		defer allSettingsMutex.Unlock()

		for _, row := range scripts {
			row.settings = allSettings[row.settings_id]
		}
	}()

	scriptsMap.Lock()
	scriptsMap.v = scripts
	scriptsMap.Unlock()

	log.Debugf("  Selected %d rows from flags", len(flags))
	log.Debugf("  Selected %d rows from scripts rusage", len(scriptsRusage))
	log.Debugf("Load settings for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9)

	startTs = time.Now().UnixNano()

	// We should not try to generate jobs for scripts that are not present in Script table
	// But we should not forget settings (e.g. last generation_id) for that script
	for class_name := range jiRows {
		if _, ok := scripts[class_name]; !ok {
			delete(jiRows, class_name)
		}
	}

	log.Debugf("Selected all for %.5f sec", float64(time.Now().UnixNano()-unifiedStartTs)/1e9)

	startTs = time.Now().UnixNano()
	updateLoadEstimates()

	log.Debugf("Load estimates updated for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9)
	func() {
		rusageInfo.Lock()
		defer rusageInfo.Unlock()
		log.Debugf("Group hosts: %+v", rusageInfo.groupHosts)
	}()

	startTs = time.Now().UnixNano()

	failedLocationsMutex.Lock()
	failedLocations = make(map[string]bool)
	failedLocationsMutex.Unlock()

	success := true

	if len(scripts) > 0 {
		throttle.setIntervalCh <- time.Second / time.Duration(len(scripts))
	}

	trigger(throttle.c, "throttle, start of cycle")

	for className, script := range scripts {
		<-throttle.c

		tx := new(db.LazyTrx)
		err := tx.Begin()
		if err != nil {
			log.Errorf("Could not start transaction in job generate: %s", err.Error())
			success = false
			continue
		}

		have := make(map[string]bool)
		locTtRows := classLocTTRows[className]
		if locTtRows != nil {
			for rawLoc, v := range locTtRows {
				loc, err := getLocationIdx(script.settings.location_type, rawLoc)
				if err != nil {
					log.Warningf("Broken settings for class %s: %s", className, err.Error())
					loc = rawLoc
				}
				if len(v) > 0 {
					have[loc] = true
				}
			}
		}

		add_to_timetable, err := generateJobs(tx, className, script.settings, jiRows[className], have, flags[className])

		if err != nil {
			log.Errorf("Could generate jobs for class %s: %s", className, err.Error())
			tx.Rollback()
			success = false
			continue
		}

		err = tx.Commit()
		if err != nil {
			log.Errorf("Could not commit generate jobs for class %s: %s", className, err.Error())
			success = false
			continue
		}

		per_location := make(map[string][]*TimetableEntry)

		for _, row := range add_to_timetable {
			allSettingsMutex.Lock()
			row.settings = allSettings[row.settings_id]
			allSettingsMutex.Unlock()

			if row.settings == nil {
				log.Warningf("Internal inconsistency error: Invalid settings for generated row: %+v", row)
				continue
			}

			key := DEFAULT_LOCATION_IDX
			if row.settings.location_type == LOCATION_TYPE_EACH {
				key = row.location
			}

			if _, ok := per_location[key]; !ok {
				per_location[key] = make([]*TimetableEntry, 0)
			}

			per_location[key] = append(per_location[key], row)
		}

		for location, rows := range per_location {
			notifyAboutNewTTRows(className, location, rows, true)
		}
	}

	notifyForFullTTSelect(classLocTTRows, true)

	log.Debugf("Processed %d classes for %.5f sec", len(scripts), float64(time.Now().UnixNano()-startTs)/1e9)
	log.Debugf("Total %.5f sec", float64(time.Now().UnixNano()-unifiedStartTs)/1e9)

	return success
}
예제 #19
0
파일: launcher.go 프로젝트: badoo/thunder
func (d *LauncherData) processWaiting() {
	//	invalidEntries := make([]uint64, 0)
	var rawResp proto.Message

	for run_id, row := range d.waitingMap {
		err := db.DoInLazyTransaction(func(tx *db.LazyTrx) error {
			return setRunStatusToInit(tx, run_id, row.settings.max_time)
		})

		if err != nil {
			log.Errorf("Could not update run status of run_id=%d to %s: %s", run_id, RUN_STATUS_INIT, err.Error())
			return
		}

		// TODO: add host unreachable check

		d.updateStatus(row, RUN_STATUS_INIT)
		row.max_finished_ts.Int64 = row.created.Int64 + int64(row.settings.max_time)
		row.max_finished_ts.Valid = true

		script := getScriptPath(row.settings)

		params := []string{
			fmt.Sprintf("--id=%d", row.Id),
			row.ClassName,
			fmt.Sprintf("--instance-count=%d", row.settings.instance_count),
			fmt.Sprintf("--settings-id=%d", row.settings_id),
			fmt.Sprintf("--method=%s", row.method),
			fmt.Sprintf("--token=%s", row.token),
			fmt.Sprintf("--retry-attempt=%d", row.retry_attempt),
			fmt.Sprintf("--max-retries=%d", row.settings.max_retries),
			fmt.Sprintf("--max-ts=%d", row.created.Int64+int64(row.settings.max_time)),
			fmt.Sprintf("--force-sf-db=%s", db.GetDbName()),
		}

		if row.settings.named_params.Valid && row.settings.named_params.String != "" {
			params = append(params, fmt.Sprintf("--named-params=%s", row.settings.named_params.String))
		}

		if row.JobData != "" {
			params = append(params, fmt.Sprintf("--job-data=%s", row.JobData))
		}

		if testId := os.Getenv("PHPUNIT_SELENIUM_TEST_ID"); testId != "" {
			params = append(params, fmt.Sprintf("--PHPUNIT_SELENIUM_TEST_ID=%s", testId))
		}

		if row.settings.debug_enabled == 1 && row.settings.created > time.Now().Unix()-DEBUG_TIMEOUT {
			params = append(params, "--debug-mode")
		}

		if row.settings.profiling_enabled == 1 && row.settings.created > time.Now().Unix()-PROFILING_TIMEOUT {
			params = append(params, "--enable-profiling")
		}

		if row.timetable_id.Valid && row.timetable_id.Int64 != 0 {
			params = append(params, fmt.Sprintf("--timetable-id=%d", row.timetable_id.Int64))
		}

		ev := &badoo_phproxyd.RequestRun{
			Script:       proto.String(script),
			Hash:         proto.Uint64(row.Id),
			Tag:          proto.String(PHPROXY_TAG),
			Force:        proto.Int32(1),
			Params:       params,
			Store:        badoo_phproxyd.StoreT_FILES.Enum(),
			FreeAfterRun: proto.Bool(false),
		}

		_, rawResp, err = d.call(ev)
		if err != nil {
			continue
		}

		resp, ok := rawResp.(*badoo_phproxyd.ResponseGeneric)
		if !ok {
			log.Errorf("Unexpected response from host %s when doing run, type: %T, response: %+v", d.hostname, rawResp, rawResp)
			continue
		}

		if resp.GetErrorCode() != 0 {
			log.Errorf("Unexpected response from host %s when doing run, got code %d and text %s", d.hostname, resp.GetErrorCode(), resp.GetErrorText())
			continue
		}
	}
}
예제 #20
0
파일: service.go 프로젝트: badoo/thunder
// Call this when you want to start your servers and stuff
func EventLoop(ports []Port) {
	defer log.Debug("exiting")

	initPhaseDuration = time.Since(startupTime)

	daemonConfig := config.GetDaemonConfig()

	// service-stats ports
	ports = append(ports, GpbPort("service-stats-gpb", stats_ctx, badoo_service.Gpbrpc))
	ports = append(ports, JsonPort("service-stats-gpb/json", stats_ctx, badoo_service.Gpbrpc))

	// build map of ports and do some sanity checks
	ph := make(map[string]*Port)
	for i := 0; i < len(ports); i++ {
		p := &ports[i]
		ph[p.Name] = p

		// json and gpb ports should have the same context
		//  so try and warn user about passing plain values in (as it makes a copy)
		if reflect.ValueOf(p.Handler).Kind() != reflect.Ptr {
			log.Infof("port[%d].Handler should be a pointer (you want gpbs and json to use the same context, right?) (now: %T)", i, p.Handler)
		}
	}

	getRestartSocket := func(rcd *RestartChildData, portName, portAddr string) (*RestartSocket, *os.File) {
		if rcd == nil {
			return nil, nil
		}

		restartSocket, exists := rcd.GpbrpcSockets[portName]
		if exists == false {
			return nil, nil
		}

		restartFile := os.NewFile(restartSocket.Fd, "")

		if restartSocket.Address != portAddr {
			return nil, restartFile
		}

		return &restartSocket, restartFile
	}

	// start 'em all
	for _, lcf := range daemonConfig.GetListen() {
		portName, portAddr := lcf.GetProto(), lcf.GetAddress()
		port := ph[portName]

		if nil == port {
			log.Warnf("ignoring unknown port: %s at %s", portName, portAddr)
			continue
		}

		if port.IsStarted {
			log.Warnf("ignoring double startup for port: %s at %s", portName, portAddr)
		}

		listener, err := func() (listener net.Listener, err error) { // it's important that this should be a function, see defer inside
			restartSocket, restartFile := getRestartSocket(restartData, portName, portAddr)

			// this whole fd/file affair is very inconvenient to
			//  since when getRestartSocket() returns fd - it can't close it yet, as it can be used by FileListener
			defer restartFile.Close()

			if restartSocket == nil {
				listener, err = net.Listen("tcp", portAddr)
				if err != nil {
					log.Errorf("listen failed for server %s at %s: %s", portName, portAddr, err)
					return
				}
				log.Infof("port %s bound to address %s", portName, listener.Addr())

			} else {

				listener, err = net.FileListener(restartFile) // this dup()-s
				if err != nil {
					log.Errorf("failed to grab parent fd %d for %s at %s: %s", restartSocket.Fd, portName, portAddr, err)
					return
				}

				log.Infof("port %s bound to address %s (parent fd: %d)", portName, listener.Addr(), restartSocket.Fd)
			}
			return
		}()

		if err != nil {
			os.Exit(1)
		}

		// enable pinba only for ports that explicitly request it
		ps := func() gpbrpc.PinbaSender {
			if !lcf.GetPinbaEnabled() {
				return nil // explicit nil here
			}

			if pinbaSender == nil {
				log.Warnf("pinba is not configured, but pinba_enabled IS set for port %s: %s", portName, portAddr)
				return nil // explicit nil here
			}

			log.Infof("pinba configured for port %s:%s -> %s", portName, portAddr, pinbaSender.Address)
			return pinbaSender
		}()

		// slow request log time
		slowRequestTime := time.Duration(daemonConfig.GetSlowRequestMs()) * time.Millisecond

		srv := &Server{
			Name:    lcf.GetProto(),
			Address: lcf.GetAddress(),
			Server:  gpbrpc.NewServer(listener, port.Proto, port.Codec, port.Handler, ps, slowRequestTime),
		}
		go srv.Server.Serve()

		port.IsStarted = true
		StartedServers[port.Name] = srv // save it for laterz
	}

	// kill parent if this is a child of graceful restart
	if restartData != nil {
		syscall.Kill(restartData.PPid, syscall.SIGQUIT)
	}

	log.Infof("entering event loop")

	exitMethod := wait_for_signals()

	if exitMethod == EXIT_GRACEFULLY { // wait for established connections to close and then die

		// FIXME: should stop servers from accepting new connections here!

		const ATTEMPTS_PER_SEC = 2
		maxAttempts := daemonConfig.GetParentWaitTimeout() * ATTEMPTS_PER_SEC

		for i := uint32(0); i < maxAttempts; i++ {
			for _, srv := range StartedServers {
				currConn := atomic.LoadUint64(&srv.Server.Stats.ConnCur)
				if currConn > 0 {
					log.Debugf("%s still has %d connections", srv.Name, currConn)
					time.Sleep(time.Second / ATTEMPTS_PER_SEC)
				}
			}
		}
	} else {
		// do nothing for EXIT_IMMEDIATELY
	}

	// doing cleanups here
	// XXX: can this be moved to defer at the start of this function?
	if pidfile != nil {
		pidfile.CloseAndRemove()
	}
}