// getHeartbeat returns a 200 func getHeartbeat(respWriter http.ResponseWriter, request *http.Request) { opid := mig.GenID() loc := fmt.Sprintf("%s%s", ctx.Server.Host, request.URL.String()) resource := cljs.New(loc) defer func() { if e := recover(); e != nil { ctx.Channels.Log <- mig.Log{OpID: opid, Desc: fmt.Sprintf("%v", e)}.Err() resource.SetError(cljs.Error{Code: fmt.Sprintf("%.0f", opid), Message: fmt.Sprintf("%v", e)}) respond(http.StatusInternalServerError, resource, respWriter, request) } ctx.Channels.Log <- mig.Log{OpID: opid, Desc: "leaving getHeartbeat()"}.Debug() }() err := resource.AddItem(cljs.Item{ Href: request.URL.String(), Data: []cljs.Data{ { Name: "heartbeat", Value: "gatorz say hi", }, }}) if err != nil { panic(err) } respond(http.StatusOK, resource, respWriter, request) }
// getIP returns a the public IP of the caller as read from X-Forwarded-For func getIP(respWriter http.ResponseWriter, request *http.Request) { opid := mig.GenID() defer func() { ctx.Channels.Log <- mig.Log{OpID: opid, Desc: "leaving getIP()"}.Debug() }() respond(http.StatusOK, []byte(remotePublicIP(request)), respWriter, request) }
// issueKillAction issues an `agentdestroy` action targeted to a specific agent // and updates the status of the agent in the database func issueKillAction(agent mig.Agent, ctx Context) (err error) { defer func() { if e := recover(); e != nil { err = fmt.Errorf("issueKillAction() -> %v", e) } ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: "leaving issueKillAction()"}.Debug() }() // generate an `agentdestroy` action for this agent killAction := mig.Action{ ID: mig.GenID(), Name: fmt.Sprintf("Kill agent %s", agent.Name), Target: fmt.Sprintf("queueloc='%s'", agent.QueueLoc), ValidFrom: time.Now().Add(-60 * time.Second).UTC(), ExpireAfter: time.Now().Add(30 * time.Minute).UTC(), SyntaxVersion: 2, } var opparams struct { PID int `json:"pid"` Version string `json:"version"` } opparams.PID = agent.PID opparams.Version = agent.Version killOperation := mig.Operation{ Module: "agentdestroy", Parameters: opparams, } killAction.Operations = append(killAction.Operations, killOperation) // sign the action with the scheduler PGP key secring, err := getSecring(ctx) if err != nil { panic(err) } pgpsig, err := killAction.Sign(ctx.PGP.PrivKeyID, secring) if err != nil { panic(err) } killAction.PGPSignatures = append(killAction.PGPSignatures, pgpsig) var jsonAction []byte jsonAction, err = json.Marshal(killAction) if err != nil { panic(err) } // write the action to the spool for scheduling dest := fmt.Sprintf("%s/%.0f.json", ctx.Directories.Action.New, killAction.ID) err = safeWrite(ctx, dest, jsonAction) if err != nil { panic(err) } // mark the agent as `destroyed` in the database err = ctx.DB.MarkAgentDestroyed(agent) if err != nil { panic(err) } ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("issued kill action for agent '%s' "+ "with PID '%d'", agent.Name, agent.PID)}.Warning() return }
// InsertAgent creates a new agent in the database // // If useTx is not nil, the transaction will be used instead of the standard // connection func (db *DB) InsertAgent(agt mig.Agent, useTx *sql.Tx) (err error) { jEnv, err := json.Marshal(agt.Env) if err != nil { err = fmt.Errorf("Failed to marshal agent environment: '%v'", err) return } jTags, err := json.Marshal(agt.Tags) if err != nil { err = fmt.Errorf("Failed to marshal agent tags: '%v'", err) return } agtid := mig.GenID() if useTx != nil { _, err = useTx.Exec(`INSERT INTO agents (id, name, queueloc, mode, version, pid, starttime, destructiontime, heartbeattime, refreshtime, status, environment, tags) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)`, agtid, agt.Name, agt.QueueLoc, agt.Mode, agt.Version, agt.PID, agt.StartTime, agt.DestructionTime, agt.HeartBeatTS, agt.RefreshTS, agt.Status, jEnv, jTags) } else { _, err = db.c.Exec(`INSERT INTO agents (id, name, queueloc, mode, version, pid, starttime, destructiontime, heartbeattime, refreshtime, status, environment, tags) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)`, agtid, agt.Name, agt.QueueLoc, agt.Mode, agt.Version, agt.PID, agt.StartTime, agt.DestructionTime, agt.HeartBeatTS, agt.RefreshTS, agt.Status, jEnv, jTags) } if err != nil { return fmt.Errorf("Failed to insert agent in database: '%v'", err) } return }
// getIP returns a the public IP of the caller as read from X-Forwarded-For func getIP(respWriter http.ResponseWriter, request *http.Request) { opid := mig.GenID() defer func() { ctx.Channels.Log <- mig.Log{OpID: opid, Desc: "leaving getIP()"}.Debug() }() if request.Header.Get("X-FORWARDED-FOR") != "" { respond(http.StatusOK, []byte(request.Header.Get("X-FORWARDED-FOR")), respWriter, request) } else { // request.RemoteAddr contains IP:Port, so strip the port and return just the IP respond(http.StatusOK, []byte(request.RemoteAddr[:strings.LastIndex(request.RemoteAddr, ":")]), respWriter, request) } }
// safeWrite performs a two steps write: // 1) a temp file is written // 2) the temp file is moved into the target folder // this prevents the dir watcher from waking up before the file is fully written func safeWrite(ctx Context, destination string, data []byte) (err error) { if len(data) == 0 { return fmt.Errorf("data slice is empty. file not written") } // write the file temp dir tmp := fmt.Sprintf("%s/%.0f", ctx.Directories.Tmp, mig.GenID()) err = ioutil.WriteFile(tmp, data, 0640) if err != nil { return fmt.Errorf("safeWrite: %v", err) } // move to destination err = os.Rename(tmp, destination) if err != nil { return fmt.Errorf("safeWrite: %v", err) } return }
func createCommand(ctx Context, action mig.Action, agent mig.Agent, emptyResults []modules.Result) (err error) { cmdid := mig.GenID() defer func() { if e := recover(); e != nil { err = fmt.Errorf("createCommand() -> %v", e) } ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, CommandID: cmdid, Desc: "leaving createCommand()"}.Debug() }() var cmd mig.Command cmd.Status = "sent" cmd.Action = action cmd.Agent = agent cmd.ID = cmdid cmd.StartTime = time.Now().UTC() cmd.Results = emptyResults ctx.Channels.CommandReady <- cmd return }
// processNewAction is called when a new action is available. It pulls // the action from the directory, parse it, retrieve a list of targets from // the backend database, and create individual command for each target. func processNewAction(actionPath string, ctx Context) (err error) { var action mig.Action defer func() { if e := recover(); e != nil { err = fmt.Errorf("processNewAction() -> %v", e) } ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: "leaving processNewAction()"}.Debug() }() // load the action file action, err = mig.ActionFromFile(actionPath) if err != nil { panic(err) } action.StartTime = time.Now() // generate an action id if action.ID < 1 { action.ID = mig.GenID() } desc := fmt.Sprintf("new action received: Name='%s' Target='%s' ValidFrom='%s' ExpireAfter='%s'", action.Name, action.Target, action.ValidFrom, action.ExpireAfter) ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: desc} // TODO: replace with action.Validate(), to include signature verification if time.Now().Before(action.ValidFrom) { // queue new action desc := fmt.Sprintf("action '%s' is not ready for scheduling", action.Name) ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: desc}.Debug() return } if time.Now().After(action.ExpireAfter) { ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: fmt.Sprintf("action '%s' is expired. invalidating.", action.Name)} err = invalidAction(ctx, action, actionPath) if err != nil { panic(err) } return } // find target agents for the action agents, err := ctx.DB.ActiveAgentsByTarget(action.Target) if err != nil { panic(err) } action.Counters.Sent = len(agents) if action.Counters.Sent == 0 { err = fmt.Errorf("No agents found for target '%s'. invalidating action.", action.Target) err = invalidAction(ctx, action, actionPath) if err != nil { panic(err) } } ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: fmt.Sprintf("Found %d target agents", action.Counters.Sent)} action.Status = "preparing" inserted, err := ctx.DB.InsertOrUpdateAction(action) if err != nil { panic(err) } if inserted { // action was inserted, and not updated, so we need to insert // the signatures as well astr, err := action.String() if err != nil { panic(err) } for _, sig := range action.PGPSignatures { pubring, err := getPubring(ctx) if err != nil { panic(err) } fp, err := pgp.GetFingerprintFromSignature(astr, sig, pubring) if err != nil { panic(err) } inv, err := ctx.DB.InvestigatorByFingerprint(fp) if err != nil { panic(err) } err = ctx.DB.InsertSignature(action.ID, inv.ID, sig) if err != nil { panic(err) } } } ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: "Action written to database"}.Debug() // create an array of empty results to serve as default for all commands emptyResults := make([]modules.Result, len(action.Operations)) created := 0 for _, agent := range agents { err := createCommand(ctx, action, agent, emptyResults) if err != nil { ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: "Failed to create commmand on agent" + agent.Name}.Err() continue } created++ } if created == 0 { // no command created found ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, ActionID: action.ID, Desc: "No command created. Invalidating action."}.Err() err = invalidAction(ctx, action, actionPath) if err != nil { panic(err) } return nil } // move action to flying state err = flyAction(ctx, action, actionPath) if err != nil { panic(err) } return }
// MakeSignedToken encrypts a timestamp and a random number with the users GPG key // to use as an auth token with the API func (cli Client) MakeSignedToken() (token string, err error) { defer func() { if e := recover(); e != nil { err = fmt.Errorf("MakeSignedToken() -> %v", e) } }() tokenVersion := 1 str := fmt.Sprintf("%d;%s;%.0f", tokenVersion, time.Now().UTC().Format(time.RFC3339), mig.GenID()) secringFile, err := os.Open(cli.Conf.GPG.Home + "/secring.gpg") if err != nil { panic(err) } defer secringFile.Close() sig, err := pgp.Sign(str+"\n", cli.Conf.GPG.KeyID, secringFile) if err != nil { panic(err) } token = str + ";" + sig return }
// getOpID returns an operation ID from a request context, and if not found, generates one func getOpID(r *http.Request) float64 { if opid := context.Get(r, opID); opid != nil { return opid.(float64) } return mig.GenID() }
// createAction receives a signed action in a POST request, validates it, // and write it into the scheduler spool func createAction(respWriter http.ResponseWriter, request *http.Request) { var ( err error action mig.Action ) opid := getOpID(request) loc := fmt.Sprintf("%s%s", ctx.Server.Host, request.URL.String()) resource := cljs.New(loc) defer func() { if e := recover(); e != nil { ctx.Channels.Log <- mig.Log{OpID: opid, ActionID: action.ID, Desc: fmt.Sprintf("%v", e)}.Err() resource.SetError(cljs.Error{Code: fmt.Sprintf("%.0f", opid), Message: fmt.Sprintf("%v", e)}) respond(http.StatusInternalServerError, resource, respWriter, request) } ctx.Channels.Log <- mig.Log{OpID: opid, ActionID: action.ID, Desc: "leaving createAction()"}.Debug() }() // parse the POST body into a mig action err = request.ParseForm() if err != nil { panic(err) } postAction := request.FormValue("action") err = json.Unmarshal([]byte(postAction), &action) if err != nil { panic(err) } ctx.Channels.Log <- mig.Log{OpID: opid, Desc: fmt.Sprintf("Received action for creation '%s'", action)}.Debug() // Init action fields action.ID = mig.GenID() date0 := time.Date(0011, time.January, 11, 11, 11, 11, 11, time.UTC) date1 := time.Date(9998, time.January, 11, 11, 11, 11, 11, time.UTC) action.StartTime = date0 action.FinishTime = date1 action.LastUpdateTime = date0 action.Status = "pending" // load keyring and validate action keyring, err := getKeyring() if err != nil { panic(err) } err = action.Validate() if err != nil { panic(err) } err = action.VerifySignatures(keyring) if err != nil { panic(err) } ctx.Channels.Log <- mig.Log{OpID: opid, ActionID: action.ID, Desc: "Received new action with valid signature"} // write action to database err = ctx.DB.InsertAction(action) if err != nil { panic(err) } // write signatures to database astr, err := action.String() if err != nil { panic(err) } for _, sig := range action.PGPSignatures { k, err := getKeyring() if err != nil { panic(err) } fp, err := pgp.GetFingerprintFromSignature(astr, sig, k) if err != nil { panic(err) } inv, err := ctx.DB.InvestigatorByFingerprint(fp) if err != nil { panic(err) } err = ctx.DB.InsertSignature(action.ID, inv.ID, sig) if err != nil { panic(err) } } ctx.Channels.Log <- mig.Log{OpID: opid, ActionID: action.ID, Desc: "Action written to database"} err = resource.AddItem(cljs.Item{ Href: fmt.Sprintf("%s/action?actionid=%.0f", ctx.Server.BaseURL, action.ID), Data: []cljs.Data{{Name: "action ID " + fmt.Sprintf("%.0f", action.ID), Value: action}}, }) if err != nil { panic(err) } // return a 202 Accepted. the action will be processed asynchronously, and may fail later. respond(http.StatusAccepted, resource, respWriter, request) }
func startRoutines(ctx Context) { // Goroutine that handles events, such as logs and panics, // and decides what to do with them go func() { for event := range ctx.Channels.Log { stop, err := mig.ProcessLog(ctx.Logging, event) if err != nil { panic("Unable to process logs") } // if ProcessLog says we should stop now, feed the Terminate chan if stop { ctx.Channels.Terminate <- errors.New(event.Desc) } } }() ctx.Channels.Log <- mig.Log{Desc: "mig.ProcessLog() routine started"} // Goroutine that loads actions dropped into ctx.Directories.Action.New go func() { for actionPath := range ctx.Channels.NewAction { ctx.OpID = mig.GenID() err := processNewAction(actionPath, ctx) // if something fails in the action processing, move it to the invalid folder if err != nil { // move action to INVALID folder and log dest := fmt.Sprintf("%s/%d.json", ctx.Directories.Action.Invalid, time.Now().UTC().UnixNano()) os.Rename(actionPath, dest) reason := fmt.Sprintf("%v. '%s' moved to '%s'", err, actionPath, dest) ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: reason}.Warning() } } }() ctx.Channels.Log <- mig.Log{Desc: "processNewAction() routine started"} // Goroutine that loads and sends commands dropped in ready state // it uses a select and a timeout to load a batch of commands instead of // sending them one by one go func() { ctx.OpID = mig.GenID() readyCmd := make(map[float64]mig.Command) ctr := 0 for { select { case cmd := <-ctx.Channels.CommandReady: ctr++ readyCmd[cmd.ID] = cmd case <-time.After(1 * time.Second): if ctr > 0 { var cmds []mig.Command for id, cmd := range readyCmd { cmds = append(cmds, cmd) delete(readyCmd, id) } err := sendCommands(cmds, ctx) if err != nil { ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: fmt.Sprintf("%v", err)}.Err() } } // reinit ctx.OpID = mig.GenID() ctr = 0 } } }() ctx.Channels.Log <- mig.Log{Desc: "sendCommands() routine started"} // Goroutine that loads commands from the ctx.Directories.Command.Returned and marks // them as finished or cancelled go func() { ctx.OpID = mig.GenID() returnedCmd := make(map[uint64]string) var ctr uint64 = 0 for { select { case cmdFile := <-ctx.Channels.CommandReturned: ctr++ returnedCmd[ctr] = cmdFile case <-time.After(1 * time.Second): if ctr > 0 { var cmdFiles []string for id, cmdFile := range returnedCmd { cmdFiles = append(cmdFiles, cmdFile) delete(returnedCmd, id) } err := returnCommands(cmdFiles, ctx) if err != nil { ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: fmt.Sprintf("%v", err)}.Err() } } // reinit ctx.OpID = mig.GenID() ctr = 0 } } }() ctx.Channels.Log <- mig.Log{Desc: "terminateCommand() routine started"} // Goroutine that updates an action when a command is done go func() { ctx.OpID = mig.GenID() doneCmd := make(map[float64]mig.Command) ctr := 0 for { select { case cmd := <-ctx.Channels.CommandDone: ctr++ doneCmd[cmd.ID] = cmd case <-time.After(1 * time.Second): if ctr > 0 { var cmds []mig.Command for id, cmd := range doneCmd { cmds = append(cmds, cmd) delete(doneCmd, id) } err := updateAction(cmds, ctx) if err != nil { ctx.Channels.Log <- mig.Log{OpID: ctx.OpID, Desc: fmt.Sprintf("%v", err)}.Err() } } // reinit ctx.OpID = mig.GenID() ctr = 0 } } }() ctx.Channels.Log <- mig.Log{Desc: "updateAction() routine started"} // start a listening channel to receive heartbeats from agents heartbeatsChan, err := startHeartbeatsListener(ctx) if err != nil { panic(err) } go func() { for msg := range heartbeatsChan { ctx.OpID = mig.GenID() err := getHeartbeats(msg, ctx) if err != nil { ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("heartbeat routine failed with error '%v'", err)}.Err() } } }() ctx.Channels.Log <- mig.Log{Desc: "agents heartbeats listener routine started"} // start a listening channel to results from agents agtResultsChan, err := startResultsListener(ctx) if err != nil { panic(err) } go func() { for delivery := range agtResultsChan { ctx.OpID = mig.GenID() // validate the size of the data received, and make sure its first and // last bytes are valid json enclosures. if not, discard the message. if len(delivery.Body) < 10 || delivery.Body[0] != '{' || delivery.Body[len(delivery.Body)-1] != '}' { ctx.Channels.Log <- mig.Log{ OpID: ctx.OpID, Desc: fmt.Sprintf("discarding invalid message received in results channel"), }.Err() continue } // write to disk in Returned directory, discard and continue on failure dest := fmt.Sprintf("%s/%.0f", ctx.Directories.Command.Returned, ctx.OpID) err = safeWrite(ctx, dest, delivery.Body) if err != nil { ctx.Channels.Log <- mig.Log{ OpID: ctx.OpID, Desc: fmt.Sprintf("failed to write agent results to disk: %v", err), }.Err() continue } // publish an event in the command results queue err = sendEvent(mig.Ev_Q_Cmd_Res, delivery.Body, ctx) if err != nil { panic(err) } } }() ctx.Channels.Log <- mig.Log{Desc: "agents results listener routine started"} // launch the routine that regularly walks through the local directories go func() { collectorSleeper, err := time.ParseDuration(ctx.Collector.Freq) if err != nil { panic(err) } for { ctx.OpID = mig.GenID() err := collector(ctx) if err != nil { ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("collector routined failed with error '%v'", err)}.Err() } time.Sleep(collectorSleeper) } }() ctx.Channels.Log <- mig.Log{Desc: "collector routine started"} // launch the routine that periodically runs jobs go func() { periodicSleeper, err := time.ParseDuration(ctx.Periodic.Freq) if err != nil { panic(err) } for { ctx.OpID = mig.GenID() err := periodic(ctx) if err != nil { ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("period routine failed with error '%v'", err)}.Err() } time.Sleep(periodicSleeper) } }() ctx.Channels.Log <- mig.Log{Desc: "periodic routine started"} // launch the routine that cleans up unused amqp queues go func() { sleeper, err := time.ParseDuration(ctx.Periodic.QueuesCleanupFreq) if err != nil { panic(err) } for { ctx.OpID = mig.GenID() err = QueuesCleanup(ctx) if err != nil { ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("queues cleanup routine failed with error '%v'", err)}.Err() } time.Sleep(sleeper) } }() ctx.Channels.Log <- mig.Log{Desc: "queue cleanup routine started"} // launch the routine that handles multi agents on same queue if ctx.Agent.KillDupAgents { go func() { for queueLoc := range ctx.Channels.DetectDupAgents { ctx.OpID = mig.GenID() err = killDupAgents(queueLoc, ctx) if err != nil { ctx.Channels.Log <- mig.Log{Desc: fmt.Sprintf("%v", err)}.Err() } } }() ctx.Channels.Log <- mig.Log{Desc: "killDupAgents() routine started"} } // block here until a terminate message is received exitReason := <-ctx.Channels.Terminate fmt.Fprintf(os.Stderr, "Scheduler is shutting down. Reason: %s", exitReason) Destroy(ctx) return }