// AutoCluster creates a new cluster func AutoCluster(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() logit.Info.Println("AUTO CLUSTER PROFILE starts") params := AutoClusterInfo{} err = r.DecodeJsonPayload(¶ms) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusInternalServerError) return } err = secimpl.Authorize(dbConn, params.Token, "perm-cluster") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } if params.Name == "" { logit.Error.Println("AutoCluster: error in Name") rest.Error(w, "cluster name required", http.StatusBadRequest) return } if params.ClusterType == "" { logit.Error.Println("AutoCluster: error in ClusterType") rest.Error(w, "ClusterType name required", http.StatusBadRequest) return } if params.ProjectID == "" { logit.Error.Println("AutoCluster: error in ProjectID") rest.Error(w, "ProjectID name required", http.StatusBadRequest) return } if params.ClusterProfile == "" { logit.Error.Println("AutoCluster: error in ClusterProfile") rest.Error(w, "ClusterProfile name required", http.StatusBadRequest) return } logit.Info.Println("AutoCluster: Name=" + params.Name + " ClusterType=" + params.ClusterType + " Profile=" + params.ClusterProfile + " ProjectID=" + params.ProjectID) //create cluster definition dbcluster := types.Cluster{} dbcluster.ID = "" dbcluster.ProjectID = params.ProjectID dbcluster.Name = util.CleanName(params.Name) dbcluster.ClusterType = params.ClusterType dbcluster.Status = "uninitialized" dbcluster.Containers = make(map[string]string) var ival int ival, err = admindb.InsertCluster(dbConn, dbcluster) clusterID := strconv.Itoa(ival) dbcluster.ID = clusterID //logit.Info.Println(clusterID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "Insert Cluster error:"+err.Error(), http.StatusBadRequest) return } //lookup profile profile, err2 := getClusterProfileInfo(dbConn, params.ClusterProfile) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //var masterServer types.Server //var chosenServers []types.Server if profile.Algo == "round-robin" { //masterServer, chosenServers, err2 = roundRobin(dbConn, profile) } else { logit.Error.Println("AutoCluster: error-unsupported algorithm request") rest.Error(w, "AutoCluster error: unsupported algorithm", http.StatusBadRequest) return } //create master container dockermaster := swarmapi.DockerRunRequest{} dockermaster.Image = "cpm-node" dockermaster.ContainerName = params.Name + "-master" dockermaster.ProjectID = params.ProjectID dockermaster.Standalone = "false" dockermaster.Profile = profile.MasterProfile if err != nil { logit.Error.Println("AutoCluster: error-create master node " + err.Error()) rest.Error(w, "AutoCluster error"+err.Error(), http.StatusBadRequest) return } // provision the master logit.Info.Println("dockermaster profile is " + dockermaster.Profile) _, err2 = provisionImpl(dbConn, &dockermaster, false) if err2 != nil { logit.Error.Println("AutoCluster: error-provision master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE master container created") var node types.Container //update node with cluster iD node, err2 = admindb.GetContainerByName(dbConn, dockermaster.ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = "master" err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update standby node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } var sleepSetting types.Setting sleepSetting, err2 = admindb.GetSetting(dbConn, "SLEEP-PROV") if err2 != nil { logit.Error.Println("SLEEP-PROV setting error " + err2.Error()) rest.Error(w, err2.Error(), http.StatusInternalServerError) return } var sleepTime time.Duration sleepTime, err2 = time.ParseDuration(sleepSetting.Value) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, err2.Error(), http.StatusInternalServerError) return } //create standby containers var count int count, err2 = strconv.Atoi(profile.Count) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, err2.Error(), http.StatusBadRequest) return } dockerstandby := make([]swarmapi.DockerRunRequest, count) for i := 0; i < count; i++ { logit.Info.Println("working on standby ....") // loop - provision standby dockerstandby[i].ProjectID = params.ProjectID dockerstandby[i].Image = "cpm-node" dockerstandby[i].ContainerName = params.Name + "-" + STANDBY + "-" + strconv.Itoa(i) dockerstandby[i].Standalone = "false" dockerstandby[i].Profile = profile.StandbyProfile _, err2 = provisionImpl(dbConn, &dockerstandby[i], true) if err2 != nil { logit.Error.Println("AutoCluster: error-provision master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //update node with cluster iD node, err2 = admindb.GetContainerByName(dbConn, dockerstandby[i].ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = STANDBY err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update standby node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } } logit.Info.Println("AUTO CLUSTER PROFILE standbys created") //create pgpool container // provision dockerpgpool := swarmapi.DockerRunRequest{} dockerpgpool.ContainerName = params.Name + "-pgpool" dockerpgpool.Image = "cpm-pgpool" dockerpgpool.ProjectID = params.ProjectID dockerpgpool.Standalone = "false" dockerpgpool.Profile = profile.StandbyProfile _, err2 = provisionImpl(dbConn, &dockerpgpool, true) if err2 != nil { logit.Error.Println("AutoCluster: error-provision pgpool " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE pgpool created") //update node with cluster ID node, err2 = admindb.GetContainerByName(dbConn, dockerpgpool.ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get pgpool node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = "pgpool" err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update pgpool node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //init the master DB // provision the master dockermaster.Profile = profile.MasterProfile err2 = provisionImplInit(dbConn, &dockermaster, false) if err2 != nil { logit.Error.Println("AutoCluster: error-provisionInit master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //make sure every node is ready err2 = waitTillAllReady(dockermaster, dockerpgpool, dockerstandby, sleepTime) if err2 != nil { logit.Error.Println("cluster members not responding in time") rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //configure cluster // ConfigureCluster logit.Info.Println("AUTO CLUSTER PROFILE configure cluster ") err2 = configureCluster(profile.MasterProfile, dbConn, dbcluster, true) if err2 != nil { logit.Error.Println("AutoCluster: error-configure cluster " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE done") w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }
func provisionImpl(dbConn *sql.DB, params *swarmapi.DockerRunRequest, standby bool) (string, error) { logit.Info.Println("PROFILE: provisionImpl starts 1") var errorStr string //make sure the container name is not already taken _, err := admindb.GetContainerByName(dbConn, params.ContainerName) if err != nil { if err != sql.ErrNoRows { return "", err } } else { errorStr = "container name " + params.ContainerName + " already used can't provision" logit.Error.Println(errorStr) return "", errors.New(errorStr) } //get the pg data path var pgdatapath types.Setting pgdatapath, err = admindb.GetSetting(dbConn, "PG-DATA-PATH") if err != nil { logit.Error.Println(err.Error()) return "", err } var infoResponse swarmapi.DockerInfoResponse infoResponse, err = swarmapi.DockerInfo() servers := make([]types.Server, len(infoResponse.Output)) i := 0 for i = range infoResponse.Output { servers[i].ID = infoResponse.Output[i] servers[i].Name = infoResponse.Output[i] servers[i].IPAddress = infoResponse.Output[i] i++ } //for database nodes, on the target server, we need to allocate //a disk volume on all CPM servers for the /pgdata container volume to work with //this causes a volume to be created with the directory //named the same as the container name params.PGDataPath = pgdatapath.Value + "/" + params.ContainerName logit.Info.Println("PROFILE provisionImpl 2 about to provision volume " + params.PGDataPath) if params.Image != "cpm-pgpool" { preq := &cpmserverapi.DiskProvisionRequest{} preq.Path = params.PGDataPath var response cpmserverapi.DiskProvisionResponse for _, each := range servers { logit.Info.Println("Provision: provisionvolume on server " + each.Name) response, err = cpmserverapi.DiskProvisionClient(each.Name, preq) if err != nil { logit.Info.Println("Provision: provisionvolume error" + err.Error()) logit.Error.Println(err.Error()) return "", err } logit.Info.Println("Provision: provisionvolume call response=" + response.Status) } } logit.Info.Println("PROFILE provisionImpl 3 provision volume completed") //run docker run to create the container params.CPU, params.MEM, err = getDockerResourceSettings(dbConn, params.Profile) if err != nil { logit.Error.Println(err.Error()) return "", err } //inspect and remove any existing container logit.Info.Println("PROFILE provisionImpl inspect 4") inspectReq := &swarmapi.DockerInspectRequest{} inspectReq.ContainerName = params.ContainerName var inspectResponse swarmapi.DockerInspectResponse inspectResponse, err = swarmapi.DockerInspect(inspectReq) if err != nil { logit.Error.Println(err.Error()) return "", err } if inspectResponse.RunningState != "not-found" { logit.Info.Println("PROFILE provisionImpl remove existing container 4a") rreq := &swarmapi.DockerRemoveRequest{} rreq.ContainerName = params.ContainerName _, err = swarmapi.DockerRemove(rreq) if err != nil { logit.Error.Println(err.Error()) return "", err } } //pass any restore env vars to the new container if params.RestoreJob != "" { if params.EnvVars == nil { //logit.Info.Println("making envvars map") params.EnvVars = make(map[string]string) } params.EnvVars["RestoreJob"] = params.RestoreJob params.EnvVars["RestoreRemotePath"] = params.RestoreRemotePath params.EnvVars["RestoreRemoteHost"] = params.RestoreRemoteHost params.EnvVars["RestoreRemoteUser"] = params.RestoreRemoteUser params.EnvVars["RestoreDbUser"] = params.RestoreDbUser params.EnvVars["RestoreDbPass"] = params.RestoreDbPass params.EnvVars["RestoreSet"] = params.RestoreSet } // runReq := swarmapi.DockerRunRequest{} runReq.PGDataPath = params.PGDataPath runReq.Profile = params.Profile runReq.Image = params.Image runReq.ContainerName = params.ContainerName runReq.EnvVars = params.EnvVars //logit.Info.Println("CPU=" + params.CPU) //logit.Info.Println("MEM=" + params.MEM) runReq.CPU = "0" runReq.MEM = "0" var runResp swarmapi.DockerRunResponse runResp, err = swarmapi.DockerRun(&runReq) if err != nil { logit.Error.Println(err.Error()) return "", err } logit.Info.Println("PROFILE provisionImpl created container 5 " + runResp.ID) dbnode := types.Container{} dbnode.ID = "" dbnode.Name = params.ContainerName dbnode.Image = params.Image dbnode.ClusterID = "-1" dbnode.ProjectID = params.ProjectID if params.Standalone == "true" { dbnode.Role = "standalone" } else { dbnode.Role = "unassigned" } var strid int strid, err = admindb.InsertContainer(dbConn, dbnode) newid := strconv.Itoa(strid) if err != nil { logit.Error.Println(err.Error()) return "", err } dbnode.ID = newid if params.Image != "cpm-node-proxy" { //register default db users on the new node err = createDBUsers(dbConn, dbnode) } return newid, err }
// ScaleUpCluster increases the count of standby containers in a cluster func ScaleUpCluster(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() err = secimpl.Authorize(dbConn, r.PathParam("Token"), "perm-read") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } ID := r.PathParam("ID") cluster, err := admindb.GetCluster(dbConn, ID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } var containers []types.Container containers, err = admindb.GetAllContainersForCluster(dbConn, ID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //determine number of standby nodes currently standbyCnt := 0 for i := range containers { if containers[i].Role == STANDBY { standbyCnt++ } } //logit.Info.Printf("standbyCnt ends at %d\n", standbyCnt) //provision new container params := new(swarmapi.DockerRunRequest) params.Image = "cpm-node" //TODO make the server choice smart params.ProjectID = cluster.ProjectID params.ContainerName = cluster.Name + "-" + STANDBY + "-" + fmt.Sprintf("%d", standbyCnt) params.Standalone = "false" var standby = true params.Profile = "LG" //logit.Info.Printf("here with ProjectID %s\n", cluster.ProjectID) _, err = provisionImpl(dbConn, params, standby) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } err = provisionImplInit(dbConn, params, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //need to update the new container's ClusterID var node types.Container node, err = admindb.GetContainerByName(dbConn, params.ContainerName) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "error"+err.Error(), http.StatusBadRequest) return } node.ClusterID = cluster.ID node.Role = STANDBY err = admindb.UpdateContainer(dbConn, node) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "error"+err.Error(), http.StatusBadRequest) return } err = configureCluster(params.Profile, dbConn, cluster, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }
// ExecuteNow executes a task schedule on demand allowing an immediate task execution func ExecuteNow(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() postMsg := BackupNowPost{} err = r.DecodeJsonPayload(&postMsg) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusInternalServerError) return } err = secimpl.Authorize(dbConn, postMsg.Token, "perm-backup") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } if postMsg.ProfileName == "" { logit.Error.Println("node ProfileName required") rest.Error(w, "ProfileName required", 400) return } if postMsg.ScheduleID == "" { logit.Error.Println("schedule ID required") rest.Error(w, "schedule ID required", 400) return } schedule, err2 := task.GetSchedule(dbConn, postMsg.ScheduleID) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, err2.Error(), 400) return } request := task.TaskRequest{} request.ScheduleID = postMsg.ScheduleID //in the case of a restore job, the user can supply a new containername if postMsg.ContainerName == "" { request.ContainerName = schedule.ContainerName } else { request.ContainerName = postMsg.ContainerName } //the restore command requires the task schedule statusID request.StatusID = postMsg.StatusID request.ProfileName = postMsg.ProfileName //for restore jobs, we go ahead and create the new //database container here, could possible move to the //restore job task later on if postMsg.ProfileName == "restore" { var newid string provisionParams := swarmapi.DockerRunRequest{} provisionParams.Profile = "SM" provisionParams.ProjectID = postMsg.ProjectID provisionParams.ContainerName = postMsg.ContainerName provisionParams.Image = "cpm-node" provisionParams.IPAddress = schedule.Serverip //logit.Info.Println("before restore provision with...") //logit.Info.Println("profile=" + provisionParams.Profile) //logit.Info.Println("projectid=" + provisionParams.ProjectID) //logit.Info.Println("containername=" + provisionParams.ContainerName) //logit.Info.Println("image=" + provisionParams.Image) //logit.Info.Println("ipaddress=" + provisionParams.IPAddress) newid, err = provisionImpl(dbConn, &provisionParams, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusInternalServerError) return } logit.Info.Printf("created node for restore job id = " + newid) } output, err := task.ExecuteNowClient(&request) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusInternalServerError) return } logit.Info.Println("output=" + output.Output) w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }