func connectToNatsServer(logger lager.Logger, c *config.Config, startMsg chan<- struct{}) *nats.Conn { var natsClient *nats.Conn var natsHost atomic.Value var err error options := natsOptions(logger, c, &natsHost, startMsg) attempts := 3 for attempts > 0 { natsClient, err = options.Connect() if err == nil { break } else { attempts-- time.Sleep(100 * time.Millisecond) } } if err != nil { logger.Fatal("nats-connection-error", err) } var natsHostStr string natsUrl, err := url.Parse(natsClient.ConnectedUrl()) if err == nil { natsHostStr = natsUrl.Host } logger.Info("Successfully-connected-to-nats", lager.Data{"host": natsHostStr}) natsHost.Store(natsHostStr) return natsClient }
func (db *SQLDB) FailTask(logger lager.Logger, taskGuid, failureReason string) (*models.Task, error) { logger = logger.Session("fail-task", lager.Data{"task_guid": taskGuid}) logger.Info("starting") defer logger.Info("complete") var task *models.Task err := db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error { var err error task, err = db.fetchTaskForUpdate(logger, taskGuid, tx) if err != nil { logger.Error("failed-locking-task", err) return err } if err = task.ValidateTransitionTo(models.Task_Completed); err != nil { if task.State != models.Task_Pending { logger.Error("failed-to-transition-task-to-completed", err) return err } } return db.completeTask(logger, task, true, failureReason, "", tx) }) return task, err }
func (h *TaskHandler) commonTasks(logger lager.Logger, w http.ResponseWriter, req *http.Request, version format.Version) { var err error logger = logger.Session("tasks", lager.Data{"revision": 0}) request := &models.TasksRequest{} response := &models.TasksResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) return } filter := models.TaskFilter{Domain: request.Domain, CellID: request.CellId} response.Tasks, err = h.controller.Tasks(logger, filter.Domain, filter.CellID) if err != nil { response.Error = models.ConvertError(err) return } for i := range response.Tasks { task := response.Tasks[i] if task.TaskDefinition == nil { continue } response.Tasks[i] = task.VersionDownTo(version) } }
func initializeDropsonde(logger lager.Logger) { dropsondeDestination := fmt.Sprint("localhost:", *dropsondePort) err := dropsonde.Initialize(dropsondeDestination, dropsondeOrigin) if err != nil { logger.Error("failed-to-initialize-dropsonde", err) } }
func NewPoller(logger lager.Logger, httpClient *http.Client, pollInterval time.Duration) Poller { return &poller{ client: httpClient, pollInterval: pollInterval, logger: logger.Session("poller"), } }
func (db *ETCDDB) DesireTask(logger lager.Logger, taskDef *models.TaskDefinition, taskGuid, domain string) error { logger = logger.WithData(lager.Data{"task_guid": taskGuid}) logger.Info("starting") defer logger.Info("finished") now := db.clock.Now().UnixNano() task := &models.Task{ TaskDefinition: taskDef, TaskGuid: taskGuid, Domain: domain, State: models.Task_Pending, CreatedAt: now, UpdatedAt: now, } value, err := db.serializeModel(logger, task) if err != nil { return err } logger.Debug("persisting-task") _, err = db.client.Create(TaskSchemaPathByGuid(task.TaskGuid), value, NO_TTL) if err != nil { return ErrorFromEtcdError(logger, err) } logger.Debug("succeeded-persisting-task") return nil }
func (db *ETCDDB) resolveRestartableCrashedActualLRPS(logger lager.Logger, actualLRP *models.ActualLRP, starts *startRequests) func() { return func() { actualKey := actualLRP.ActualLRPKey logger = logger.Session("restart-crash", lager.Data{ "process_guid": actualKey.ProcessGuid, "index": actualKey.Index, }) if actualLRP.State != models.ActualLRPStateCrashed { logger.Error("failed-actual-lrp-state-is-not-crashed", nil) return } logger.Debug("unclaiming-actual-lrp", lager.Data{"process_guid": actualLRP.ActualLRPKey.ProcessGuid, "index": actualLRP.ActualLRPKey.Index}) _, err := db.unclaimActualLRP(logger, &actualLRP.ActualLRPKey, &actualLRP.ActualLRPInstanceKey) if err != nil { logger.Error("failed-unclaiming-crash", err) return } logger.Debug("succeeded-unclaiming-actual-lrp") starts.Add(logger, &actualKey) } }
func (h *ActualLRPLifecycleHandler) StartActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { var err error logger = logger.Session("start-actual-lrp") request := &models.StartActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return } before, after, err := h.db.StartActualLRP(logger, request.ActualLrpKey, request.ActualLrpInstanceKey, request.ActualLrpNetInfo) if err != nil { response.Error = models.ConvertError(err) return } if before == nil { go h.actualHub.Emit(models.NewActualLRPCreatedEvent(after)) } else if !before.Equal(after) { go h.actualHub.Emit(models.NewActualLRPChangedEvent(before, after)) } }
func initializeDropsonde(logger lager.Logger, uploaderConfig config.UploaderConfig) { dropsondeDestination := fmt.Sprint("localhost:", uploaderConfig.DropsondePort) err := dropsonde.Initialize(dropsondeDestination, dropsondeOrigin) if err != nil { logger.Error("failed to initialize dropsonde: %v", err) } }
func (h *ActualLRPLifecycleHandler) RemoveActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { var err error logger = logger.Session("remove-actual-lrp") request := &models.RemoveActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return } beforeActualLRPGroup, err := h.db.ActualLRPGroupByProcessGuidAndIndex(logger, request.ProcessGuid, request.Index) if err != nil { response.Error = models.ConvertError(err) return } err = h.db.RemoveActualLRP(logger, request.ProcessGuid, request.Index, request.ActualLrpInstanceKey) if err != nil { response.Error = models.ConvertError(err) return } go h.actualHub.Emit(models.NewActualLRPRemovedEvent(beforeActualLRPGroup)) }
func (h *ActualLRPLifecycleHandler) ClaimActualLRP(logger lager.Logger, w http.ResponseWriter, req *http.Request) { var err error logger = logger.Session("claim-actual-lrp") request := &models.ClaimActualLRPRequest{} response := &models.ActualLRPLifecycleResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { response.Error = models.ConvertError(err) return } before, after, err := h.db.ClaimActualLRP(logger, request.ProcessGuid, request.Index, request.ActualLrpInstanceKey) if err != nil { response.Error = models.ConvertError(err) return } if !after.Equal(before) { go h.actualHub.Emit(models.NewActualLRPChangedEvent(before, after)) } }
func (a *AUFSDiffSizer) DiffSize(logger lager.Logger, containerRootFSPath string) (uint64, error) { _, err := os.Stat(containerRootFSPath) if os.IsNotExist(err) { return 0, fmt.Errorf("get usage: %s", err) } log := logger.Session("diff-size", lager.Data{"path": containerRootFSPath}) log.Debug("start") command := fmt.Sprintf("df -B 1 %s | tail -n1 | awk -v N=3 '{print $N}'", a.AUFSDiffPathFinder.GetDiffLayerPath((containerRootFSPath))) outbytes, err := exec.Command("sh", "-c", command).CombinedOutput() if err != nil { log.Error("df-failed", err) return 0, fmt.Errorf("get usage: df: %s, %s", err, string(outbytes)) } var bytesUsed uint64 if _, err := fmt.Sscanf(string(outbytes), "%d", &bytesUsed); err != nil { log.Error("scanf-failed", err, lager.Data{"out": string(outbytes)}) return 0, nil } log.Debug("finished", lager.Data{"bytes": bytesUsed}) return bytesUsed, nil }
func (s *CgroupStarter) mountCgroup(logger lager.Logger, cgroupPath, subsystems string) error { logger = logger.Session("mount-cgroup", lager.Data{ "path": cgroupPath, "subsystems": subsystems, }) logger.Info("started") if !s.isMountPoint(cgroupPath) { if err := os.MkdirAll(cgroupPath, 0755); err != nil { return fmt.Errorf("mkdir '%s': %s", cgroupPath, err) } cmd := exec.Command("mount", "-n", "-t", "cgroup", "-o", subsystems, "cgroup", cgroupPath) cmd.Stderr = logging.Writer(logger.Session("mount-cgroup-cmd")) if err := s.CommandRunner.Run(cmd); err != nil { return fmt.Errorf("mounting subsystems '%s' in '%s': %s", subsystems, cgroupPath, err) } } else { logger.Info("subsystems-already-mounted") } logger.Info("finished") return nil }
func NewMetrics(logger lager.Logger, backingStoresPath, depotPath string) Metrics { return &metrics{ backingStoresPath: backingStoresPath, depotPath: depotPath, logger: logger.Session("metrics"), } }
func NewPresence( logger lager.Logger, consulClient consuladapter.Client, lockKey string, lockValue []byte, clock clock.Clock, retryInterval time.Duration, lockTTL time.Duration, ) Presence { uuid, err := uuid.NewV4() if err != nil { logger.Fatal("create-uuid-failed", err) } session, err := NewSessionNoChecks(uuid.String(), lockTTL, consulClient) if err != nil { logger.Fatal("consul-session-failed", err) } return Presence{ consul: session, key: lockKey, value: lockValue, clock: clock, retryInterval: retryInterval, logger: logger, } }
func initializeServer(logger lager.Logger, uploaderConfig config.UploaderConfig) ifrit.Runner { transport := &http.Transport{ Proxy: http.ProxyFromEnvironment, Dial: (&net.Dialer{ Timeout: ccUploadDialTimeout, KeepAlive: ccUploadKeepAlive, }).Dial, TLSClientConfig: &tls.Config{ InsecureSkipVerify: uploaderConfig.SkipCertVerify, }, TLSHandshakeTimeout: ccUploadTLSHandshakeTimeout, } pollerHttpClient := cfhttp.NewClient() pollerHttpClient.Transport = transport uploader := ccclient.NewUploader(logger, &http.Client{Transport: transport}) poller := ccclient.NewPoller(logger, pollerHttpClient, time.Duration(uploaderConfig.CCJobPollingInterval)) ccUploaderHandler, err := handlers.New(uploader, poller, logger) if err != nil { logger.Error("router-building-failed", err) os.Exit(1) } return http_server.New(uploaderConfig.ListenAddress, ccUploaderHandler) }
// The stager calls this when it wants to claim a completed task. This ensures that only one // stager ever attempts to handle a completed task func (db *ETCDDB) ResolvingTask(logger lager.Logger, taskGuid string) error { logger = logger.WithData(lager.Data{"task_guid": taskGuid}) logger.Info("starting") defer logger.Info("finished") task, index, err := db.taskByGuidWithIndex(logger, taskGuid) if err != nil { logger.Error("failed-getting-task", err) return err } err = task.ValidateTransitionTo(models.Task_Resolving) if err != nil { logger.Error("invalid-state-transition", err) return err } task.UpdatedAt = db.clock.Now().UnixNano() task.State = models.Task_Resolving value, err := db.serializeModel(logger, task) if err != nil { return err } _, err = db.client.CompareAndSwap(TaskSchemaPathByGuid(taskGuid), value, NO_TTL, index) if err != nil { return ErrorFromEtcdError(logger, err) } return nil }
func (db *SQLDB) transact(logger lager.Logger, f func(logger lager.Logger, tx *sql.Tx) error) error { var err error for attempts := 0; attempts < 3; attempts++ { err = func() error { tx, err := db.db.Begin() if err != nil { return err } defer tx.Rollback() err = f(logger, tx) if err != nil { return err } return tx.Commit() }() if attempts >= 2 || db.convertSQLError(err) != models.ErrDeadlock { break } else { logger.Error("deadlock-transaction", err, lager.Data{"attempts": attempts}) time.Sleep(500 * time.Millisecond) } } return err }
func (db *ETCDDB) batchDeleteTasks(taskGuids []string, logger lager.Logger) { if len(taskGuids) == 0 { return } works := []func(){} for _, taskGuid := range taskGuids { taskGuid := taskGuid works = append(works, func() { _, err := db.client.Delete(taskGuid, true) if err != nil { logger.Error("failed-to-delete", err, lager.Data{ "task_guid": taskGuid, }) } }) } throttler, err := workpool.NewThrottler(db.convergenceWorkersSize, works) if err != nil { logger.Error("failed-to-create-throttler", err) } throttler.Work() return }
func (db *ETCDDB) UnclaimActualLRP(logger lager.Logger, key *models.ActualLRPKey) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) { actualLRP, modifiedIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, key.ProcessGuid, key.Index) bbsErr := models.ConvertError(err) if bbsErr != nil { return nil, nil, bbsErr } beforeActualLRP := *actualLRP if actualLRP.State == models.ActualLRPStateUnclaimed { logger.Debug("already-unclaimed") return nil, nil, models.ErrActualLRPCannotBeUnclaimed } actualLRP.State = models.ActualLRPStateUnclaimed actualLRP.ActualLRPKey = *key actualLRP.ActualLRPInstanceKey = models.ActualLRPInstanceKey{} actualLRP.ActualLRPNetInfo = models.EmptyActualLRPNetInfo() actualLRP.Since = db.clock.Now().UnixNano() actualLRP.ModificationTag.Increment() data, err := db.serializeModel(logger, actualLRP) if err != nil { return nil, nil, err } _, err = db.client.CompareAndSwap(ActualLRPSchemaPath(key.ProcessGuid, key.Index), data, 0, modifiedIndex) if err != nil { logger.Error("failed-compare-and-swap", err) return nil, nil, ErrorFromEtcdError(logger, err) } return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: actualLRP}, nil }
func appendSSLConnectionStringParam(logger lager.Logger, driverName, databaseConnectionString, sqlCACertFile string) string { switch driverName { case "mysql": if sqlCACertFile != "" { certBytes, err := ioutil.ReadFile(sqlCACertFile) if err != nil { logger.Fatal("failed-to-read-sql-ca-file", err) } caCertPool := x509.NewCertPool() if ok := caCertPool.AppendCertsFromPEM(certBytes); !ok { logger.Fatal("failed-to-parse-sql-ca", err) } tlsConfig := &tls.Config{ InsecureSkipVerify: false, RootCAs: caCertPool, } mysql.RegisterTLSConfig("bbs-tls", tlsConfig) databaseConnectionString = fmt.Sprintf("%s?tls=bbs-tls", databaseConnectionString) } case "postgres": if sqlCACertFile == "" { databaseConnectionString = fmt.Sprintf("%s?sslmode=disable", databaseConnectionString) } else { databaseConnectionString = fmt.Sprintf("%s?sslmode=verify-ca&sslrootcert=%s", databaseConnectionString, sqlCACertFile) } } return databaseConnectionString }
func (db *ETCDDB) FailActualLRP(logger lager.Logger, key *models.ActualLRPKey, errorMessage string) (*models.ActualLRPGroup, *models.ActualLRPGroup, error) { logger = logger.WithData(lager.Data{"actual_lrp_key": key, "error_message": errorMessage}) logger.Info("starting") lrp, prevIndex, err := db.rawActualLRPByProcessGuidAndIndex(logger, key.ProcessGuid, key.Index) if err != nil { logger.Error("failed-to-get-actual-lrp", err) return nil, nil, err } beforeActualLRP := *lrp if lrp.State != models.ActualLRPStateUnclaimed { return nil, nil, models.ErrActualLRPCannotBeFailed } lrp.ModificationTag.Increment() lrp.PlacementError = errorMessage lrp.Since = db.clock.Now().UnixNano() lrpData, serialErr := db.serializeModel(logger, lrp) if serialErr != nil { return nil, nil, serialErr } _, err = db.client.CompareAndSwap(ActualLRPSchemaPath(key.ProcessGuid, key.Index), lrpData, 0, prevIndex) if err != nil { logger.Error("failed", err) return nil, nil, models.ErrActualLRPCannotBeFailed } logger.Info("succeeded") return &models.ActualLRPGroup{Instance: &beforeActualLRP}, &models.ActualLRPGroup{Instance: lrp}, nil }
func (n *networker) Destroy(log lager.Logger, handle string) error { cfg, err := load(n.configStore, handle) if err != nil { log.Error("no-properties-for-container-skipping-destroy-network", err) return nil } if err := n.configurer.DestroyIPTablesRules(log, cfg); err != nil { return err } if err := n.subnetPool.Release(cfg.Subnet, cfg.ContainerIP); err != nil && err != subnets.ErrReleasedUnallocatedSubnet { log.Error("release-failed", err) return err } if ports, ok := n.configStore.Get(handle, gardener.MappedPortsKey); ok { mappings, err := portsFromJson(ports) if err != nil { return err } for _, m := range mappings { n.portPool.Release(m.HostPort) } } err = n.subnetPool.RunIfFree(cfg.Subnet, func() error { return n.configurer.DestroyBridge(log, cfg) }) return err }
func (db *ETCDDB) rawActualLRPGroupByProcessGuidAndIndex(logger lager.Logger, processGuid string, index int32) (*models.ActualLRPGroup, error) { node, err := db.fetchRecursiveRaw(logger, ActualLRPIndexDir(processGuid, index)) if err != nil { return nil, err } group := models.ActualLRPGroup{} for _, instanceNode := range node.Nodes { var lrp models.ActualLRP deserializeErr := db.deserializeModel(logger, instanceNode, &lrp) if deserializeErr != nil { logger.Error("failed-parsing-actual-lrp", deserializeErr, lager.Data{"key": instanceNode.Key}) return nil, deserializeErr } if isInstanceActualLRPNode(instanceNode) { group.Instance = &lrp } if isEvacuatingActualLRPNode(instanceNode) { group.Evacuating = &lrp } } if group.Evacuating == nil && group.Instance == nil { return nil, models.ErrResourceNotFound } return &group, nil }
func (db *SQLDB) DesireTask(logger lager.Logger, taskDef *models.TaskDefinition, taskGuid, domain string) error { logger = logger.Session("desire-task", lager.Data{"task_guid": taskGuid}) logger.Info("starting") defer logger.Info("complete") taskDefData, err := db.serializeModel(logger, taskDef) if err != nil { logger.Error("failed-serializing-task-definition", err) return err } return db.transact(logger, func(logger lager.Logger, tx *sql.Tx) error { now := db.clock.Now().UnixNano() _, err = db.insert(logger, tx, tasksTable, SQLAttributes{ "guid": taskGuid, "domain": domain, "created_at": now, "updated_at": now, "first_completed_at": 0, "state": models.Task_Pending, "task_definition": taskDefData, }, ) if err != nil { logger.Error("failed-inserting-task", err) return db.convertSQLError(err) } return nil }) }
func (db *ETCDDB) createActualLRP(logger lager.Logger, desiredLRP *models.DesiredLRP, index int32) error { logger = logger.Session("create-actual-lrp") var err error if index >= desiredLRP.Instances { err = models.NewError(models.Error_InvalidRecord, "Index too large") logger.Error("actual-lrp-index-too-large", err, lager.Data{"actual_index": index, "desired_instances": desiredLRP.Instances}) return err } guid, err := uuid.NewV4() if err != nil { return err } actualLRP := &models.ActualLRP{ ActualLRPKey: models.NewActualLRPKey( desiredLRP.ProcessGuid, index, desiredLRP.Domain, ), State: models.ActualLRPStateUnclaimed, Since: db.clock.Now().UnixNano(), ModificationTag: models.ModificationTag{ Epoch: guid.String(), Index: 0, }, } err = db.createRawActualLRP(logger, actualLRP) if err != nil { return err } return nil }
func (db *SQLDB) completeTask(logger lager.Logger, task *models.Task, failed bool, failureReason, result string, tx *sql.Tx) error { now := db.clock.Now().UnixNano() _, err := db.update(logger, tx, tasksTable, SQLAttributes{ "failed": failed, "failure_reason": failureReason, "result": result, "state": models.Task_Completed, "first_completed_at": now, "updated_at": now, "cell_id": "", }, "guid = ?", task.TaskGuid, ) if err != nil { logger.Error("failed-updating-tasks", err) return db.convertSQLError(err) } task.State = models.Task_Completed task.UpdatedAt = now task.FirstCompletedAt = now task.Failed = failed task.FailureReason = failureReason task.Result = result task.CellId = "" return nil }
func (db *SQLDB) fetchActualLRPForUpdate(logger lager.Logger, processGuid string, index int32, evacuating bool, tx *sql.Tx) (*models.ActualLRP, error) { expireTime := db.clock.Now().Round(time.Second).UnixNano() wheres := "process_guid = ? AND instance_index = ? AND evacuating = ?" bindings := []interface{}{processGuid, index, evacuating} if evacuating { wheres += " AND expire_time > ?" bindings = append(bindings, expireTime) } rows, err := db.all(logger, tx, actualLRPsTable, actualLRPColumns, LockRow, wheres, bindings...) if err != nil { logger.Error("failed-query", err) return nil, db.convertSQLError(err) } groups, err := db.scanAndCleanupActualLRPs(logger, tx, rows) if err != nil { return nil, db.convertSQLError(err) } if len(groups) == 0 { return nil, models.ErrResourceNotFound } actualLRP, _ := groups[0].Resolve() return actualLRP, nil }
func (h *TaskHandler) commonTaskByGuid(logger lager.Logger, w http.ResponseWriter, req *http.Request, version format.Version) { var err error logger = logger.Session("task-by-guid", lager.Data{"revision": 0}) request := &models.TaskByGuidRequest{} response := &models.TaskResponse{} defer func() { exitIfUnrecoverable(logger, h.exitChan, response.Error) }() defer writeResponse(w, response) err = parseRequest(logger, req, request) if err != nil { logger.Error("failed-parsing-request", err) response.Error = models.ConvertError(err) return } response.Task, err = h.controller.TaskByGuid(logger, request.TaskGuid) if err != nil { response.Error = models.ConvertError(err) return } if response.Task.TaskDefinition != nil { response.Task = response.Task.VersionDownTo(version) } }
func (db *serviceClient) Cells(logger lager.Logger) (models.CellSet, error) { kvPairs, _, err := db.consulClient.KV().List(CellSchemaRoot(), nil) if err != nil { bbsErr := models.ConvertError(convertConsulError(err)) if bbsErr.Type != models.Error_ResourceNotFound { return nil, bbsErr } } if kvPairs == nil { err = consuladapter.NewPrefixNotFoundError(CellSchemaRoot()) bbsErr := models.ConvertError(convertConsulError(err)) if bbsErr.Type != models.Error_ResourceNotFound { return nil, bbsErr } } cellPresences := models.NewCellSet() for _, kvPair := range kvPairs { if kvPair.Session == "" { continue } cell := kvPair.Value presence := new(models.CellPresence) err := models.FromJSON(cell, presence) if err != nil { logger.Error("failed-to-unmarshal-cells-json", err) continue } cellPresences.Add(presence) } return cellPresences, nil }