示例#1
0
文件: jobgen.go 项目: badoo/thunder
func Setup(config common.FullConfig) {
	isDevelServer = config.GetIsDevel()

	dispatchThreads.v = make(map[string]map[string]*DispatcherData)
	launcherThreads.v = make(map[string]*LauncherData)
	killerThreads.v = make(map[string]map[string]bool)

	throttle.c = make(chan bool, THROTTLE_CHAN_CAPACITY)
	throttle.setIntervalCh = make(chan time.Duration, 1)
	go throttleThread()

	rusageInfo.groupsMaxParrots = make(map[string]uint64)
	rusageInfo.groupHosts = make(map[string][]string)
	rusageInfo.hostsInfo = make(map[string]*ServerInfo)
	rusageInfo.loadEstimate = make(map[string]*ScriptRusageEntry)
	rusageInfo.timetableRusage = make(map[uint64]*ScriptRusageEntry)
	rusageInfo.groupIdx = make(map[string]uint64)

	def := config.GetDefault()

	defaultParasiteMemory = def.GetParasiteMemory()
	defaultMinIdleCpu = def.GetMinIdleCpu()
	defaultMinMemory = def.GetMinMemory()
	defaultMinMemoryRatio = def.GetMinMemoryRatio()
	defaultMaxMemory = def.GetMaxMemory()
	defaultRusage = def.GetRusage()

	cycleMs = config.GetCycleMs()
	ttReloadIntervalMs = config.GetFullTimetableReloadIntervalMs()
	autoIncrementIncrement = getAutoIncrementIncrement()

	launcherConf := config.GetLauncher()
	hostSuffix = launcherConf.GetHostSuffix()
	basePath = launcherConf.GetBasePath()
	if launcherConf.DeveloperPath != nil {
		haveDeveloper = true
		developerPath = launcherConf.GetDeveloperPath()
		log.Printf("We have developer dir: %s", developerPath)
	}

	log.Printf("Updating hosts")

	updateHosts()

	log.Printf("Launching update hosts thread")

	go updateHostsThread()

	log.Printf("Clearing old heartbeats")

	if err := clearOldHeartbeats(); err != nil {
		log.Fatalf("Could not clear old heartbeats: %s", err.Error())
	}

	log.Printf("Launching periodic run queue select thread")

	go runqueuePeriodicSelectThread()
	go forceCheckDeletedThread()
}
示例#2
0
func (d *DispatcherData) checkZero(src string) {
	if len(d.addedMap) == 0 && d.waitingList.Len() == 0 {
		log.Debugf("No rows left in class=%s, location=%s (%s)", d.className, d.location, src)
		trigger(d.zeroTTCh, "zerott")

		if d.killRequest != nil {
			log.Printf("Killed all jobs in class=%s, location=%s, waiting on continue channel", d.className, d.location)
			d.killRequest.ResCh <- nil
			d.killRequest = nil
			log.Printf("Can continue dispatching in class=%s, location=%s", d.className, d.location)
		}
	}
}
示例#3
0
文件: launcher.go 项目: badoo/thunder
func (d *LauncherData) updateStatus(row *RunQueueEntry, newStatus string) {
	if row.RunStatus == newStatus {
		return
	}

	if LAUNCHER_DB_DEBUG {
		log.Printf("Updating status of row #%d (%s) from %s to %s", row.Id, row.ClassName, row.RunStatus, newStatus)
	}

	if old := d.getMapByStatus(row.RunStatus); old != nil {
		delete(old, row.Id)
	}

	if m := d.getMapByStatus(newStatus); m != nil {
		m[row.Id] = row
	}

	row.RunStatus = newStatus

	now := time.Now().Unix()

	if newStatus == RUN_STATUS_WAITING {
		row.waiting_ts.Valid = true
		row.waiting_ts.Int64 = now
	} else if newStatus == RUN_STATUS_INIT {
		row.init_ts.Valid = true
		row.init_ts.Int64 = now
	} else if newStatus == RUN_STATUS_RUNNING {
		row.running_ts.Valid = true
		row.running_ts.Int64 = now
	} else if newStatus == RUN_STATUS_FINISHED {
		row.running_ts.Valid = true
		row.running_ts.Int64 = now
	}
}
示例#4
0
文件: launcher.go 项目: badoo/thunder
func (d *LauncherData) addToMaps(row *RunQueueEntry) {
	if row.settings == nil {
		buf := make([]byte, 5000)
		n := runtime.Stack(buf, false)
		log.Warningf("Incorrect row in run queue (addToMaps), settings are invalid: %+v\ntrace:%s", row, buf[0:n])
		return
	}
	if m := d.getMapByStatus(row.RunStatus); m != nil {
		if LAUNCHER_DB_DEBUG {
			log.Printf("RQ row from db: id=%d, class=%s, job_data=%s, hostname=%s", row.Id, row.ClassName, row.JobData, row.hostname)
		}
		if d.allMap[row.Id] != nil {
			log.Warnf("Trying to add already added into run_queue (all map): %+v", row)
			return
		}
		if m[row.Id] != nil {
			log.Warnf("Trying to add already added into run_queue (own map): %+v", row)
			return
		}
		m[row.Id] = row
		d.allMap[row.Id] = row
	} else {
		log.Warnf("Broken run status: %+v", row)
	}
}
示例#5
0
func printFreeResources(infoMap map[string]*ServerInfo) {
	spisok := make([]string, 0, len(infoMap))
	for hostname := range infoMap {
		spisok = append(spisok, hostname)
	}
	sort.Strings(spisok)
	for _, hostname := range spisok {
		row := infoMap[hostname]
		log.Printf("freeresources: host: %-5s serverInfo:%s", hostname, row)
	}
}
示例#6
0
文件: settings.go 项目: badoo/thunder
func loadSettingsFromRows(jiRows map[string]map[string]*JobInfoEntry, scripts map[string]*ScriptEntry) error {
	newIds := make(map[uint64]bool)
	jiRowsCnt := 0

	func() {
		allSettingsMutex.Lock()
		defer allSettingsMutex.Unlock()

		for _, row := range scripts {
			if _, ok := allSettings[row.settings_id]; !ok {
				newIds[row.settings_id] = true
			}
		}

		for _, rows := range jiRows {
			for _, row := range rows {
				jiRowsCnt++

				if _, ok := allSettings[row.settings_id]; !ok {
					newIds[row.settings_id] = true
				}
			}
		}
	}()

	if len(newIds) > 0 {
		loadNewIdsTs := time.Now().UnixNano()
		err := loadNewIds(newIds)
		if err != nil {
			return err
		}
		log.Printf("Loaded %d new ids for %.5f sec", len(newIds), float64(time.Now().UnixNano()-loadNewIdsTs)/1e9)
	}

	log.Debugln("  Selected", len(scripts), "rows from scripts")
	log.Debugln("  Selected", jiRowsCnt, "rows from job info")
	return nil
}
示例#7
0
文件: killer.go 项目: badoo/thunder
func startKilling(className string) {
	killerThreads.Lock()
	defer killerThreads.Unlock()

	dispatchThreads.Lock()
	defer dispatchThreads.Unlock()

	locs, ok := dispatchThreads.v[className]
	if !ok {
		return
	}

	killMap := killerThreads.v[className]
	if killMap == nil {
		killMap = make(map[string]bool)
		killerThreads.v[className] = killMap
	}

	for loc, dt := range locs {
		if killMap[loc] {
			continue
		}

		req := &KillRequest{
			ResCh: make(chan error, 1),
		}

		log.Printf("Sending kill request to class=%s, location=%s", className, loc)
		select {
		case dt.killRequestCh <- req:
			killMap[loc] = true
		default:
			log.Warnf("Could not send kill request to class=%s, location=%s, kill channel was busy", className, loc)
		}
	}
}
示例#8
0
文件: jobgen.go 项目: badoo/thunder
// haveTTRows must be nil if there are no timetable entries for any location
// otherwise it must have only true entries like map["location"] => true
// probably jobs generation can be simplified, it is just the way it is
func generateJobs(tx *db.LazyTrx, className string, settings *ScriptSettings, jiRows map[string]*JobInfoEntry, haveTTRows map[string]bool, flags *FlagEntry) (add_to_timetable []*TimetableEntry, err error) {
	if haveTTRows != nil && len(haveTTRows) == 0 {
		haveTTRows = nil
	}

	now := time.Now().Unix()

	add_to_timetable = make([]*TimetableEntry, 0)
	add_job_info := make([]*JobInfoEntry, 0)
	set_finish_jobs := make([]string, 0)
	set_init_jobs := make([]string, 0)
	set_jobs_generated_js := make([]string, 0)
	prepare_next_generation := make([]NextGenParams, 0)

	have_finish_jobs := settings.jobs.Have_finish_jobs
	is_any := (settings.location_type == LOCATION_TYPE_ANY)
	is_temporary := settings.jobs.Temporary
	temporary_can_run := false

	if flags != nil {
		if flags.kill_requested_ts.Valid {
			is_done := (haveTTRows == nil)
			if is_done {
				log.Printf("Class %s is done, all is ok", className)

				if !flags.killed_ts.Valid {
					tx.AddCommitCallback(func() { continueDispatchAfterKill(className) })
					if err = setKilledFlag(tx, className); err != nil {
						return
					}

					if err = prepareNextGeneration(tx, have_finish_jobs, className, settings); err != nil {
						return
					}
				}
			} else {
				log.Printf("Class %s is not done", className)

				startKilling(className)

				// not the best place to put it, but it works
				if err = setMaxFinishedTs(tx, className, flags.kill_request_employee_id.Int64, flags.kill_requested_ts.Int64); err != nil {
					return
				}
			}

			return
		}

		// Stop generating new job generations when we are on pause
		if flags.pause_requested_ts.Valid {
			is_done := generationFinished(className, haveTTRows, jiRows, settings)

			if is_done && !flags.paused_ts.Valid {
				if err = setPausedFlag(tx, className); err != nil {
					return
				}

				flags.paused_ts = sql.NullInt64{Int64: now, Valid: true}
			}

			if !is_any || flags.paused_ts.Valid {
				return
			}
		}

		if is_temporary && flags.run_requested_ts.Valid && is_any {
			// We accepted run request, which means that we already generated jobs
			if flags.run_accepted_ts.Valid {
				if generationFinished(className, haveTTRows, jiRows, settings) {
					if err = resetRunRequest(tx, className); err != nil {
						return
					}

					if err = prepareNextGeneration(tx, have_finish_jobs, className, settings); err != nil {
						return
					}

					return
				}
			} else {
				if err = setRunAccepted(tx, className); err != nil {
					return
				}
			}

			temporary_can_run = true
		}
	}

	if is_temporary && !temporary_can_run || settings.jobs.Type == JOBS_TYPE_NONE {
		return
	}

	locations := make([]string, 0)

	if !is_any {
		all_locations := getLocations(settings)
		timetable_locations := make(map[string]bool)

		if haveTTRows != nil {
			for location, _ := range haveTTRows {
				timetable_locations[location] = true
			}
		}

		// there can be failed hosts that are still running: we must really compare host names, not just counts
		for _, loc := range all_locations {
			if _, ok := timetable_locations[loc]; !ok {
				locations = append(locations, loc)
			}
		}

		if len(locations) == 0 {
			return
		}
	} else {
		if haveTTRows != nil && len(haveTTRows) > 0 {
			return
		}

		locations = getLocations(settings)
	}

	tt_location_type := LOCATION_TYPE_EACH
	if is_any {
		tt_location_type = LOCATION_TYPE_ANY
	}

	for _, location := range locations {
		job_info_key, gliErr := getLocationIdx(tt_location_type, location)
		if gliErr != nil {
			log.Warningf("Error getting location index for %s for location_type %s and location %s: %s", className, tt_location_type, location, gliErr.Error())
			continue
		}

		var row *JobInfoEntry

		if jiRows == nil || jiRows[job_info_key] == nil {
			row = &JobInfoEntry{generation_id: 0,
				class_name:           className,
				location:             job_info_key,
				next_generate_job_ts: sql.NullInt64{Int64: int64(getNextJobGenerateTs(className, true, 0, settings)), Valid: true},
				settings_id:          settings.id}

			add_job_info = append(add_job_info, row)
		} else {
			row = jiRows[job_info_key]
		}

		tt_row := &TimetableEntry{
			class_name:            className,
			default_retry:         settings.retry_job,
			repeat:                settings.repeat_job,
			method:                METHOD_RUN,
			finished_successfully: 0,
			generation_id:         sql.NullInt64{Int64: int64(row.generation_id), Valid: true},
			settings_id:           row.settings_id,
			location:              location,
			created:               uint64(now),
		}

		tt_row.NextLaunchTs.Valid = true
		tt_row.NextLaunchTs.Int64 = now

		if row.jobs_generated_ts.Valid || row.init_jobs_ts.Valid {
			if have_finish_jobs && !row.finish_jobs_ts.Valid {
				set_finish_jobs = append(set_finish_jobs, job_info_key)

				tt_row.JobData = `"finishJobs"`
				tt_row.method = METHOD_FINISH_JOBS
				tt_row.default_retry = settings.retry_job

				add_to_timetable = append(add_to_timetable, tt_row)
			} else {
				prepare_next_generation = append(prepare_next_generation, NextGenParams{Location: job_info_key, JobInfo: row})
			}

			continue
		} else if row.next_generate_job_ts.Int64 > now {
			continue
		}

		if settings.jobs.Type == JOBS_TYPE_CUSTOM {
			set_init_jobs = append(set_init_jobs, job_info_key)

			tt_row.JobData = `"initJobs"`
			tt_row.method = METHOD_INIT_JOBS
			tt_row.default_retry = uint32(settings.retry.Int64)

			add_to_timetable = append(add_to_timetable, tt_row)
			continue
		}

		jobs, mjlErr := makeJobsList(settings.jobs, settings.instance_count, className)
		if mjlErr != nil {
			log.Warningf("Error generating jobs for %+v with instance_count=%d and jobs=%s: %s", className, settings.instance_count, settings.jobs, mjlErr.Error())
			continue
		}

		for _, job := range jobs {
			tt_row_copy := new(TimetableEntry)
			*tt_row_copy = *tt_row
			tt_row_copy.JobData = job
			add_to_timetable = append(add_to_timetable, tt_row_copy)
		}

		set_jobs_generated_js = append(set_jobs_generated_js, job_info_key)
	}

	if err = addJobInfo(tx, add_job_info); err != nil {
		return
	}

	if err = setFinishJobsTs(tx, className, set_finish_jobs); err != nil {
		return
	}

	if err = batchPrepareNextGeneration(tx, have_finish_jobs, className, prepare_next_generation, settings); err != nil {
		return
	}

	if err = setInitJobsTs(tx, className, set_init_jobs); err != nil {
		return
	}

	if err = setJobsGeneratedTs(tx, className, set_jobs_generated_js); err != nil {
		return
	}

	if err = addToTimetable(tx, add_to_timetable); err != nil {
		return
	}

	return
}