func batchSetNextGenerateJobTs(tx *db.LazyTrx, className string, params []NextGenParams, settings *ScriptSettings) (err error) { if len(params) == 0 { return } fields := `(class_name, location, next_generate_job_ts, jobs_generated_ts, jobs_finished_ts, init_jobs_ts, finish_jobs_ts, generation_id, settings_id)` values := make([]string, 0, len(params)) for _, par := range params { firstRun := !par.JobInfo.jobs_generated_ts.Valid var prevTs, nextTs uint64 if !firstRun { prevTs = uint64(par.JobInfo.jobs_generated_ts.Int64) } nextTs = getNextJobGenerateTs(className, firstRun, prevTs, settings) values = append(values, fmt.Sprintf("('%s', '%s', FROM_UNIXTIME(%d), NULL, NULL, NULL, NULL, 1, %d)", db.EscapeString(className), db.EscapeString(par.Location), nextTs, settings.id)) } _, err = tx.Exec( QUERY_BATCH_SET_NEXT_GENERATE_JOB_TS, "fields", fields, "values", &db.RawData{Data: strings.Join(values, ", ")}) return }
func setRunStatusToInit(tx *db.LazyTrx, run_id uint64, max_time int) (err error) { _, err = tx.Exec( QUERY_UPDATE_RUN_STATUS_INIT, "max_time", max_time, "id", run_id) return }
func setMaxFinishedTs(tx *db.LazyTrx, className string, employeeId int64, ts int64) (err error) { _, err = tx.Exec( QUERY_SET_MAX_FINISHED_TS_NOW, "class_name", className, "employee_id", employeeId, "ts", ts) return }
func setNextGenerateJobTs(tx *db.LazyTrx, className string, settings *ScriptSettings) (err error) { nextTs := getNextJobGenerateTs(className, true, 0, settings) _, err = tx.Exec( QUERY_SET_NEXT_GENERATE_JOB_TS, "class_name", className, "settings_id", settings.id, "next_generate_job_ts", nextTs) return }
func clearJobsResultsForLocations(tx *db.LazyTrx, className string, locations []string) (err error) { if len(locations) == 0 { return } _, err = tx.Exec( QUERY_CLEAR_JOB_RESULTS_FOR_LOCATIONS, "class_name", className, "locations", db.INStr(locations)) return }
func setJobsGeneratedTs(tx *db.LazyTrx, className string, locations []string) (err error) { if len(locations) == 0 { return } _, err = tx.Exec( QUERY_SET_JOBS_GENERATED_TS, "class_name", className, "locations", db.INStr(locations)) return }
func setInitJobsTs(tx *db.LazyTrx, className string, locations []string) (err error) { if len(locations) == 0 { return } _, err = tx.Exec( QUERY_SET_INIT_JOBS_TS, "class_name", className, "locations", db.INStr(locations)) return }
func deleteAddedFromTimetable(tx *db.LazyTrx, ids []uint64) error { if len(ids) == 0 { return nil } res, err := tx.Exec(QUERY_DELETE_FROM_TIMETABLE, "ids", db.INUint64(ids), "add_where", " AND added_to_queue_ts IS NOT NULL") if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != int64(len(ids)) { return fmt.Errorf("deleteAddedFromTimetable unexpected ttIds deleted count:%d instead of %d ids:%+v", aff, len(ids), ids) } return nil }
// method inserts rows into timetable and sets insert id func addToTimetable(tx *db.LazyTrx, ttRows []*TimetableEntry) error { if len(ttRows) == 0 { return nil } values := make([]string, 0) for _, row := range ttRows { val := fmt.Sprintf( "('%s', %d, %s, '%s', %d, %s, %d, '%s', '%s', %d, FROM_UNIXTIME(%d), FROM_UNIXTIME(%d))", db.EscapeString(row.class_name), row.default_retry, db.QNullInt64(row.repeat).Data, row.method, row.finished_successfully, db.QNullInt64(row.generation_id).Data, row.settings_id, db.EscapeString(row.location), db.EscapeString(row.JobData), 0, row.created, row.NextLaunchTs.Int64) values = append(values, val) } res, err := tx.Exec(QUERY_INSERT_INTO_TIMETABLE, "values", &db.RawData{Data: strings.Join(values, ", ")}) if err != nil { return err } insId, err := res.LastInsertId() if err != nil { log.Errorf("Could not get insert id even though insert was successfull: %s", err.Error()) return err } for _, row := range ttRows { row.id = uint64(insId) insId += autoIncrementIncrement } return nil }
func addJobInfo(tx *db.LazyTrx, rows []*JobInfoEntry) (err error) { if len(rows) == 0 { return } fields := `(class_name, location, next_generate_job_ts, settings_id)` values := make([]string, 0, len(rows)) for _, row := range rows { values = append(values, fmt.Sprintf( "('%s', '%s', FROM_UNIXTIME(%d), %d)", db.EscapeString(row.class_name), db.EscapeString(row.location), row.next_generate_job_ts.Int64, row.settings_id)) } _, err = tx.Exec( QUERY_INSERT_INTO_JOB_INFO, "fields", fields, "values", &db.RawData{Data: strings.Join(values, ",")}) return }
func deleteFromRunQueue(tx *db.LazyTrx, ids []uint64, prevStatus string) error { if len(ids) == 0 { return nil } res, err := tx.Exec( QUERY_DELETE_FROM_QUEUE, "ids", db.INUint64(ids), "status", prevStatus) if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != int64(len(ids)) { return fmt.Errorf("deleteFromRunQueue failed aff:%d expected:%d ids:%+v prevStatus:%s", aff, len(ids), ids, prevStatus) } return nil }
func logTTFinish(tx *db.LazyTrx, row *TimetableEntry, haveOldFinishCount bool, oldFinishCount uint) error { var addedToQueueWhere, addedFinishCountWhere string if row.added_to_queue_ts.Valid { addedToQueueWhere = "AND added_to_queue_ts IS NULL" } else { addedToQueueWhere = "AND added_to_queue_ts IS NOT NULL" } if haveOldFinishCount { addedFinishCountWhere = fmt.Sprintf("AND finish_count = %d", oldFinishCount) } else { addedFinishCountWhere = fmt.Sprintf("AND finish_count = %d", row.finish_count-1) } res, err := tx.Exec(QUERY_UPDATE_TIMETABLE_STATUS, "finished_ts", db.QNullInt64(row.finished_ts), "next_launch_ts", db.QNullInt64(row.NextLaunchTs), "added_to_queue_ts", db.QNullInt64(row.added_to_queue_ts), "finish_count", row.finish_count, "retry_count", row.retry_count, "finished_successfully", row.finished_successfully, "id", row.id, "add_where", fmt.Sprintf("%s %s", addedToQueueWhere, addedFinishCountWhere)) if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != 1 { return fmt.Errorf("unexpected affected rows = %d for tt_row = %+v", aff, row) } return nil }
func updateRunStatus(tx *db.LazyTrx, run_id uint64, status, prevStatus string) error { res, err := tx.Exec( QUERY_UPDATE_RUN_STATUS, "status", status, "prev_status", prevStatus, "id", run_id) if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != 1 { return fmt.Errorf("Previous status mismatch for rq row id=%d: tried %s -> %s", run_id, prevStatus, status) } return nil }
func doCycle() bool { var ( jiRows map[string]map[string]*JobInfoEntry scripts map[string]*ScriptEntry flags map[string]*FlagEntry scriptsRusage map[string]*ScriptRusageEntry classLocTTRows map[string]map[string][]*TimetableEntry ) unifiedStartTs := time.Now().UnixNano() startTs := time.Now().UnixNano() err := loadFullState( &LoadStateFunc{name: "Scripts", fun: func() (err error) { scripts, err = getGroupedScriptsForPlatform(); return }}, &LoadStateFunc{name: "JobInfo", fun: func() (err error) { jiRows, err = getGroupedJobInfo(); return }}, &LoadStateFunc{name: "Flags", fun: func() (err error) { flags, err = getFlags(); return }}, &LoadStateFunc{name: "ScriptsRusage", fun: func() (err error) { scriptsRusage, err = getScriptRusageStats(); return }}, &LoadStateFunc{name: "ScriptTimetable", fun: func() (err error) { classLocTTRows, err = selectTimetable(); return }}) if err != nil { log.Errorf("Failed to select state in doCycle: %s", err.Error()) return false } log.Debugf("Loaded for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9) startTs = time.Now().UnixNano() err = loadSettingsFromRows(jiRows, scripts) if err != nil { log.Errorf("Could not load settings from rows: %s", err.Error()) return false } func() { allSettingsMutex.Lock() defer allSettingsMutex.Unlock() for _, row := range scripts { row.settings = allSettings[row.settings_id] } }() scriptsMap.Lock() scriptsMap.v = scripts scriptsMap.Unlock() log.Debugf(" Selected %d rows from flags", len(flags)) log.Debugf(" Selected %d rows from scripts rusage", len(scriptsRusage)) log.Debugf("Load settings for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9) startTs = time.Now().UnixNano() // We should not try to generate jobs for scripts that are not present in Script table // But we should not forget settings (e.g. last generation_id) for that script for class_name := range jiRows { if _, ok := scripts[class_name]; !ok { delete(jiRows, class_name) } } log.Debugf("Selected all for %.5f sec", float64(time.Now().UnixNano()-unifiedStartTs)/1e9) startTs = time.Now().UnixNano() updateLoadEstimates() log.Debugf("Load estimates updated for %.5f sec", float64(time.Now().UnixNano()-startTs)/1e9) func() { rusageInfo.Lock() defer rusageInfo.Unlock() log.Debugf("Group hosts: %+v", rusageInfo.groupHosts) }() startTs = time.Now().UnixNano() failedLocationsMutex.Lock() failedLocations = make(map[string]bool) failedLocationsMutex.Unlock() success := true if len(scripts) > 0 { throttle.setIntervalCh <- time.Second / time.Duration(len(scripts)) } trigger(throttle.c, "throttle, start of cycle") for className, script := range scripts { <-throttle.c tx := new(db.LazyTrx) err := tx.Begin() if err != nil { log.Errorf("Could not start transaction in job generate: %s", err.Error()) success = false continue } have := make(map[string]bool) locTtRows := classLocTTRows[className] if locTtRows != nil { for rawLoc, v := range locTtRows { loc, err := getLocationIdx(script.settings.location_type, rawLoc) if err != nil { log.Warningf("Broken settings for class %s: %s", className, err.Error()) loc = rawLoc } if len(v) > 0 { have[loc] = true } } } add_to_timetable, err := generateJobs(tx, className, script.settings, jiRows[className], have, flags[className]) if err != nil { log.Errorf("Could generate jobs for class %s: %s", className, err.Error()) tx.Rollback() success = false continue } err = tx.Commit() if err != nil { log.Errorf("Could not commit generate jobs for class %s: %s", className, err.Error()) success = false continue } per_location := make(map[string][]*TimetableEntry) for _, row := range add_to_timetable { allSettingsMutex.Lock() row.settings = allSettings[row.settings_id] allSettingsMutex.Unlock() if row.settings == nil { log.Warningf("Internal inconsistency error: Invalid settings for generated row: %+v", row) continue } key := DEFAULT_LOCATION_IDX if row.settings.location_type == LOCATION_TYPE_EACH { key = row.location } if _, ok := per_location[key]; !ok { per_location[key] = make([]*TimetableEntry, 0) } per_location[key] = append(per_location[key], row) } for location, rows := range per_location { notifyAboutNewTTRows(className, location, rows, true) } } notifyForFullTTSelect(classLocTTRows, true) log.Debugf("Processed %d classes for %.5f sec", len(scripts), float64(time.Now().UnixNano()-startTs)/1e9) log.Debugf("Total %.5f sec", float64(time.Now().UnixNano()-unifiedStartTs)/1e9) return success }
// haveTTRows must be nil if there are no timetable entries for any location // otherwise it must have only true entries like map["location"] => true // probably jobs generation can be simplified, it is just the way it is func generateJobs(tx *db.LazyTrx, className string, settings *ScriptSettings, jiRows map[string]*JobInfoEntry, haveTTRows map[string]bool, flags *FlagEntry) (add_to_timetable []*TimetableEntry, err error) { if haveTTRows != nil && len(haveTTRows) == 0 { haveTTRows = nil } now := time.Now().Unix() add_to_timetable = make([]*TimetableEntry, 0) add_job_info := make([]*JobInfoEntry, 0) set_finish_jobs := make([]string, 0) set_init_jobs := make([]string, 0) set_jobs_generated_js := make([]string, 0) prepare_next_generation := make([]NextGenParams, 0) have_finish_jobs := settings.jobs.Have_finish_jobs is_any := (settings.location_type == LOCATION_TYPE_ANY) is_temporary := settings.jobs.Temporary temporary_can_run := false if flags != nil { if flags.kill_requested_ts.Valid { is_done := (haveTTRows == nil) if is_done { log.Printf("Class %s is done, all is ok", className) if !flags.killed_ts.Valid { tx.AddCommitCallback(func() { continueDispatchAfterKill(className) }) if err = setKilledFlag(tx, className); err != nil { return } if err = prepareNextGeneration(tx, have_finish_jobs, className, settings); err != nil { return } } } else { log.Printf("Class %s is not done", className) startKilling(className) // not the best place to put it, but it works if err = setMaxFinishedTs(tx, className, flags.kill_request_employee_id.Int64, flags.kill_requested_ts.Int64); err != nil { return } } return } // Stop generating new job generations when we are on pause if flags.pause_requested_ts.Valid { is_done := generationFinished(className, haveTTRows, jiRows, settings) if is_done && !flags.paused_ts.Valid { if err = setPausedFlag(tx, className); err != nil { return } flags.paused_ts = sql.NullInt64{Int64: now, Valid: true} } if !is_any || flags.paused_ts.Valid { return } } if is_temporary && flags.run_requested_ts.Valid && is_any { // We accepted run request, which means that we already generated jobs if flags.run_accepted_ts.Valid { if generationFinished(className, haveTTRows, jiRows, settings) { if err = resetRunRequest(tx, className); err != nil { return } if err = prepareNextGeneration(tx, have_finish_jobs, className, settings); err != nil { return } return } } else { if err = setRunAccepted(tx, className); err != nil { return } } temporary_can_run = true } } if is_temporary && !temporary_can_run || settings.jobs.Type == JOBS_TYPE_NONE { return } locations := make([]string, 0) if !is_any { all_locations := getLocations(settings) timetable_locations := make(map[string]bool) if haveTTRows != nil { for location, _ := range haveTTRows { timetable_locations[location] = true } } // there can be failed hosts that are still running: we must really compare host names, not just counts for _, loc := range all_locations { if _, ok := timetable_locations[loc]; !ok { locations = append(locations, loc) } } if len(locations) == 0 { return } } else { if haveTTRows != nil && len(haveTTRows) > 0 { return } locations = getLocations(settings) } tt_location_type := LOCATION_TYPE_EACH if is_any { tt_location_type = LOCATION_TYPE_ANY } for _, location := range locations { job_info_key, gliErr := getLocationIdx(tt_location_type, location) if gliErr != nil { log.Warningf("Error getting location index for %s for location_type %s and location %s: %s", className, tt_location_type, location, gliErr.Error()) continue } var row *JobInfoEntry if jiRows == nil || jiRows[job_info_key] == nil { row = &JobInfoEntry{generation_id: 0, class_name: className, location: job_info_key, next_generate_job_ts: sql.NullInt64{Int64: int64(getNextJobGenerateTs(className, true, 0, settings)), Valid: true}, settings_id: settings.id} add_job_info = append(add_job_info, row) } else { row = jiRows[job_info_key] } tt_row := &TimetableEntry{ class_name: className, default_retry: settings.retry_job, repeat: settings.repeat_job, method: METHOD_RUN, finished_successfully: 0, generation_id: sql.NullInt64{Int64: int64(row.generation_id), Valid: true}, settings_id: row.settings_id, location: location, created: uint64(now), } tt_row.NextLaunchTs.Valid = true tt_row.NextLaunchTs.Int64 = now if row.jobs_generated_ts.Valid || row.init_jobs_ts.Valid { if have_finish_jobs && !row.finish_jobs_ts.Valid { set_finish_jobs = append(set_finish_jobs, job_info_key) tt_row.JobData = `"finishJobs"` tt_row.method = METHOD_FINISH_JOBS tt_row.default_retry = settings.retry_job add_to_timetable = append(add_to_timetable, tt_row) } else { prepare_next_generation = append(prepare_next_generation, NextGenParams{Location: job_info_key, JobInfo: row}) } continue } else if row.next_generate_job_ts.Int64 > now { continue } if settings.jobs.Type == JOBS_TYPE_CUSTOM { set_init_jobs = append(set_init_jobs, job_info_key) tt_row.JobData = `"initJobs"` tt_row.method = METHOD_INIT_JOBS tt_row.default_retry = uint32(settings.retry.Int64) add_to_timetable = append(add_to_timetable, tt_row) continue } jobs, mjlErr := makeJobsList(settings.jobs, settings.instance_count, className) if mjlErr != nil { log.Warningf("Error generating jobs for %+v with instance_count=%d and jobs=%s: %s", className, settings.instance_count, settings.jobs, mjlErr.Error()) continue } for _, job := range jobs { tt_row_copy := new(TimetableEntry) *tt_row_copy = *tt_row tt_row_copy.JobData = job add_to_timetable = append(add_to_timetable, tt_row_copy) } set_jobs_generated_js = append(set_jobs_generated_js, job_info_key) } if err = addJobInfo(tx, add_job_info); err != nil { return } if err = setFinishJobsTs(tx, className, set_finish_jobs); err != nil { return } if err = batchPrepareNextGeneration(tx, have_finish_jobs, className, prepare_next_generation, settings); err != nil { return } if err = setInitJobsTs(tx, className, set_init_jobs); err != nil { return } if err = setJobsGeneratedTs(tx, className, set_jobs_generated_js); err != nil { return } if err = addToTimetable(tx, add_to_timetable); err != nil { return } return }
func setPausedFlag(tx *db.LazyTrx, className string) (err error) { _, err = tx.Exec( `UPDATE `+TABLE_SCRIPT_FLAGS+` SET paused_ts = NOW() WHERE class_name = '#class_name#'`, "class_name", className) return }
func setRunAccepted(tx *db.LazyTrx, className string) (err error) { _, err = tx.Exec(QUERY_SET_RUN_ACCEPTED, "class_name", className) return }
func resetRunRequest(tx *db.LazyTrx, className string) (err error) { _, err = tx.Exec(QUERY_RESET_RUN_REQUEST, "class_name", className) return }
func clearJobsResults(tx *db.LazyTrx, className string) (err error) { _, err = tx.Exec( QUERY_CLEAR_JOB_RESULTS, "class_name", className) return }
func addToQueueAndDeleteExpired(tx *db.LazyTrx, rows []*RunQueueEntry, toDelete []*TimetableEntry) error { if len(rows) > 0 { values := make([]string, 0, len(rows)) ttIds := make([]uint64, 0, len(rows)) fields := `(class_name, timetable_id, generation_id, hostname, hostname_idx, job_data, method, created, run_status, waiting_ts, should_init_ts, token, retry_attempt, settings_id)` for _, row := range rows { ttIds = append(ttIds, uint64(row.timetable_id.Int64)) val := fmt.Sprint( "('"+db.EscapeString(row.ClassName)+"',", db.QNullInt64(row.timetable_id).Data, ",", row.generation_id.Int64, ",", "'"+db.EscapeString(row.hostname)+"',", row.hostname_idx, ",", "'"+db.EscapeString(row.JobData)+"',", "'"+db.EscapeString(row.method)+"',", "FROM_UNIXTIME(", row.created.Int64, "),", "'"+db.EscapeString(row.RunStatus)+"',", "FROM_UNIXTIME(", row.waiting_ts.Int64, "),", "FROM_UNIXTIME(", row.should_init_ts.Int64, "),", "'"+db.EscapeString(row.token), "',", row.retry_attempt, ",", row.settings_id, ")") values = append(values, val) } res, err := tx.Exec(QUERY_INSERT_INTO_RUN_QUEUE, "fields", fields, "values", &db.RawData{Data: strings.Join(values, ",")}) if err != nil { return err } insId, err := res.LastInsertId() if err != nil { return err } for _, row := range rows { row.Id = uint64(insId) insId += autoIncrementIncrement } sort.Sort(common.UInt64Slice(ttIds)) res, err = tx.Exec(QUERY_LOG_ADD_TO_QUEUE, "ids", db.INUint64(ttIds)) if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != int64(len(ttIds)) { return fmt.Errorf("update ur cache bro aff: %d ttIds:%+v", aff, ttIds) } } if len(toDelete) > 0 { ttIds := make([]uint64, 0, len(toDelete)) for _, row := range toDelete { ttIds = append(ttIds, row.id) } sort.Sort(common.UInt64Slice(ttIds)) res, err := tx.Exec(QUERY_DELETE_FROM_TIMETABLE, "ids", db.INUint64(ttIds), "add_where", " AND added_to_queue_ts IS NULL") if err != nil { return err } aff, err := res.RowsAffected() if err != nil { return err } if aff != int64(len(ttIds)) { return fmt.Errorf("addToQueueAndDeleteExpired unexpected ttIds deleted count:%d instead of %d ids:%+v", aff, len(ttIds), ttIds) } } return nil }