// AutoCluster creates a new cluster func AutoCluster(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() logit.Info.Println("AUTO CLUSTER PROFILE starts") params := AutoClusterInfo{} err = r.DecodeJsonPayload(¶ms) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusInternalServerError) return } err = secimpl.Authorize(dbConn, params.Token, "perm-cluster") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } if params.Name == "" { logit.Error.Println("AutoCluster: error in Name") rest.Error(w, "cluster name required", http.StatusBadRequest) return } if params.ClusterType == "" { logit.Error.Println("AutoCluster: error in ClusterType") rest.Error(w, "ClusterType name required", http.StatusBadRequest) return } if params.ProjectID == "" { logit.Error.Println("AutoCluster: error in ProjectID") rest.Error(w, "ProjectID name required", http.StatusBadRequest) return } if params.ClusterProfile == "" { logit.Error.Println("AutoCluster: error in ClusterProfile") rest.Error(w, "ClusterProfile name required", http.StatusBadRequest) return } logit.Info.Println("AutoCluster: Name=" + params.Name + " ClusterType=" + params.ClusterType + " Profile=" + params.ClusterProfile + " ProjectID=" + params.ProjectID) //create cluster definition dbcluster := types.Cluster{} dbcluster.ID = "" dbcluster.ProjectID = params.ProjectID dbcluster.Name = util.CleanName(params.Name) dbcluster.ClusterType = params.ClusterType dbcluster.Status = "uninitialized" dbcluster.Containers = make(map[string]string) var ival int ival, err = admindb.InsertCluster(dbConn, dbcluster) clusterID := strconv.Itoa(ival) dbcluster.ID = clusterID //logit.Info.Println(clusterID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "Insert Cluster error:"+err.Error(), http.StatusBadRequest) return } //lookup profile profile, err2 := getClusterProfileInfo(dbConn, params.ClusterProfile) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //var masterServer types.Server //var chosenServers []types.Server if profile.Algo == "round-robin" { //masterServer, chosenServers, err2 = roundRobin(dbConn, profile) } else { logit.Error.Println("AutoCluster: error-unsupported algorithm request") rest.Error(w, "AutoCluster error: unsupported algorithm", http.StatusBadRequest) return } //create master container dockermaster := swarmapi.DockerRunRequest{} dockermaster.Image = "cpm-node" dockermaster.ContainerName = params.Name + "-master" dockermaster.ProjectID = params.ProjectID dockermaster.Standalone = "false" dockermaster.Profile = profile.MasterProfile if err != nil { logit.Error.Println("AutoCluster: error-create master node " + err.Error()) rest.Error(w, "AutoCluster error"+err.Error(), http.StatusBadRequest) return } // provision the master logit.Info.Println("dockermaster profile is " + dockermaster.Profile) _, err2 = provisionImpl(dbConn, &dockermaster, false) if err2 != nil { logit.Error.Println("AutoCluster: error-provision master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE master container created") var node types.Container //update node with cluster iD node, err2 = admindb.GetContainerByName(dbConn, dockermaster.ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = "master" err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update standby node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } var sleepSetting types.Setting sleepSetting, err2 = admindb.GetSetting(dbConn, "SLEEP-PROV") if err2 != nil { logit.Error.Println("SLEEP-PROV setting error " + err2.Error()) rest.Error(w, err2.Error(), http.StatusInternalServerError) return } var sleepTime time.Duration sleepTime, err2 = time.ParseDuration(sleepSetting.Value) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, err2.Error(), http.StatusInternalServerError) return } //create standby containers var count int count, err2 = strconv.Atoi(profile.Count) if err2 != nil { logit.Error.Println(err2.Error()) rest.Error(w, err2.Error(), http.StatusBadRequest) return } dockerstandby := make([]swarmapi.DockerRunRequest, count) for i := 0; i < count; i++ { logit.Info.Println("working on standby ....") // loop - provision standby dockerstandby[i].ProjectID = params.ProjectID dockerstandby[i].Image = "cpm-node" dockerstandby[i].ContainerName = params.Name + "-" + STANDBY + "-" + strconv.Itoa(i) dockerstandby[i].Standalone = "false" dockerstandby[i].Profile = profile.StandbyProfile _, err2 = provisionImpl(dbConn, &dockerstandby[i], true) if err2 != nil { logit.Error.Println("AutoCluster: error-provision master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //update node with cluster iD node, err2 = admindb.GetContainerByName(dbConn, dockerstandby[i].ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = STANDBY err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update standby node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } } logit.Info.Println("AUTO CLUSTER PROFILE standbys created") //create pgpool container // provision dockerpgpool := swarmapi.DockerRunRequest{} dockerpgpool.ContainerName = params.Name + "-pgpool" dockerpgpool.Image = "cpm-pgpool" dockerpgpool.ProjectID = params.ProjectID dockerpgpool.Standalone = "false" dockerpgpool.Profile = profile.StandbyProfile _, err2 = provisionImpl(dbConn, &dockerpgpool, true) if err2 != nil { logit.Error.Println("AutoCluster: error-provision pgpool " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE pgpool created") //update node with cluster ID node, err2 = admindb.GetContainerByName(dbConn, dockerpgpool.ContainerName) if err2 != nil { logit.Error.Println("AutoCluster: error-get pgpool node by name " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } node.ClusterID = clusterID node.Role = "pgpool" err2 = admindb.UpdateContainer(dbConn, node) if err2 != nil { logit.Error.Println("AutoCluster: error-update pgpool node " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //init the master DB // provision the master dockermaster.Profile = profile.MasterProfile err2 = provisionImplInit(dbConn, &dockermaster, false) if err2 != nil { logit.Error.Println("AutoCluster: error-provisionInit master " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //make sure every node is ready err2 = waitTillAllReady(dockermaster, dockerpgpool, dockerstandby, sleepTime) if err2 != nil { logit.Error.Println("cluster members not responding in time") rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } //configure cluster // ConfigureCluster logit.Info.Println("AUTO CLUSTER PROFILE configure cluster ") err2 = configureCluster(profile.MasterProfile, dbConn, dbcluster, true) if err2 != nil { logit.Error.Println("AutoCluster: error-configure cluster " + err2.Error()) rest.Error(w, "AutoCluster error"+err2.Error(), http.StatusBadRequest) return } logit.Info.Println("AUTO CLUSTER PROFILE done") w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }
// TODO func EventJoinCluster(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() err = secimpl.Authorize(dbConn, r.PathParam("Token"), "perm-cluster") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } IDList := r.PathParam("IDList") if IDList == "" { logit.Error.Println("IDList required") rest.Error(w, "IDList required", http.StatusBadRequest) return } else { logit.Info.Println("EventJoinCluster: IDList=[" + IDList + "]") } MasterID := r.PathParam("MasterID") if MasterID == "" { logit.Error.Println("MasterID required") rest.Error(w, "MasterID required", http.StatusBadRequest) return } else { logit.Info.Println("EventJoinCluster: MasterID=[" + MasterID + "]") } ClusterID := r.PathParam("ClusterID") if ClusterID == "" { logit.Error.Println("ClusterID required") rest.Error(w, "node ClusterID required", http.StatusBadRequest) return } else { logit.Info.Println("EventJoinCluster: ClusterID=[" + ClusterID + "]") } var idList = strings.Split(IDList, "_") i := 0 pgpoolCount := 0 origDBNode := types.Container{} for i = range idList { if idList[i] != "" { logit.Info.Println("EventJoinCluster: idList[" + strconv.Itoa(i) + "]=" + idList[i]) origDBNode, err = admindb.GetContainer(dbConn, idList[i]) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //update the node to be in the cluster origDBNode.ClusterID = ClusterID if origDBNode.Image == "cpm-node" { origDBNode.Role = STANDBY } else { origDBNode.Role = "pgpool" pgpoolCount++ } if pgpoolCount > 1 { logit.Error.Println("EventJoinCluster: more than 1 pgpool is in the cluster") rest.Error(w, "only 1 pgpool is allowed in a cluster", http.StatusBadRequest) return } err = admindb.UpdateContainer(dbConn, origDBNode) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } } i++ } //we use the -1 value to indicate that we are only adding //to an existing cluster, the UI doesn't know who the master //is at this point if MasterID != "-1" { //update the master node origDBNode, err = admindb.GetContainer(dbConn, MasterID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } origDBNode.ClusterID = ClusterID origDBNode.Role = "master" err = admindb.UpdateContainer(dbConn, origDBNode) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } } w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }
// ScaleUpCluster increases the count of standby containers in a cluster func ScaleUpCluster(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() err = secimpl.Authorize(dbConn, r.PathParam("Token"), "perm-read") if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } ID := r.PathParam("ID") cluster, err := admindb.GetCluster(dbConn, ID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } var containers []types.Container containers, err = admindb.GetAllContainersForCluster(dbConn, ID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //determine number of standby nodes currently standbyCnt := 0 for i := range containers { if containers[i].Role == STANDBY { standbyCnt++ } } //logit.Info.Printf("standbyCnt ends at %d\n", standbyCnt) //provision new container params := new(swarmapi.DockerRunRequest) params.Image = "cpm-node" //TODO make the server choice smart params.ProjectID = cluster.ProjectID params.ContainerName = cluster.Name + "-" + STANDBY + "-" + fmt.Sprintf("%d", standbyCnt) params.Standalone = "false" var standby = true params.Profile = "LG" //logit.Info.Printf("here with ProjectID %s\n", cluster.ProjectID) _, err = provisionImpl(dbConn, params, standby) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } err = provisionImplInit(dbConn, params, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //need to update the new container's ClusterID var node types.Container node, err = admindb.GetContainerByName(dbConn, params.ContainerName) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "error"+err.Error(), http.StatusBadRequest) return } node.ClusterID = cluster.ID node.Role = STANDBY err = admindb.UpdateContainer(dbConn, node) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, "error"+err.Error(), http.StatusBadRequest) return } err = configureCluster(params.Profile, dbConn, cluster, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) }
// AdminFailover causes a cluster failorver to be performed for a given cluster func AdminFailover(w rest.ResponseWriter, r *rest.Request) { dbConn, err := util.GetConnection(CLUSTERADMIN_DB) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), 400) return } defer dbConn.Close() err = secimpl.Authorize(dbConn, r.PathParam("Token"), "perm-cluster") if err != nil { logit.Error.Println("authorize error " + err.Error()) rest.Error(w, err.Error(), http.StatusUnauthorized) return } ID := r.PathParam("ID") if ID == "" { logit.Error.Println("node ID required error") rest.Error(w, "node ID required", http.StatusBadRequest) return } //dbNode is the standby node we are going to fail over and //make the new master in the cluster var dbNode types.Container dbNode, err = admindb.GetContainer(dbConn, ID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } cluster, err := admindb.GetCluster(dbConn, dbNode.ClusterID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } var failoverResp cpmcontainerapi.FailoverResponse failoverResp, err = cpmcontainerapi.FailoverClient(dbNode.Name) if err != nil { logit.Error.Println("fail-over error " + err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } logit.Info.Println("AdminFailover: fail-over output " + failoverResp.Output) //update the old master to standalone role oldMaster := types.Container{} oldMaster, err = admindb.GetContainerMaster(dbConn, dbNode.ClusterID) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } oldMaster.Role = "standalone" oldMaster.ClusterID = "-1" err = admindb.UpdateContainer(dbConn, oldMaster) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //update the failover node to master role dbNode.Role = "master" err = admindb.UpdateContainer(dbConn, dbNode) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } //stop pg on the old master //params.IPAddress1 = oldMaster.IPAddress var stopPGResp cpmcontainerapi.StopPGResponse stopPGResp, err = cpmcontainerapi.StopPGClient(oldMaster.Name) if err != nil { logit.Error.Println(err.Error() + stopPGResp.Output) rest.Error(w, err.Error(), http.StatusBadRequest) return } err = configureCluster("SM", dbConn, cluster, false) if err != nil { logit.Error.Println(err.Error()) rest.Error(w, err.Error(), http.StatusBadRequest) return } w.WriteHeader(http.StatusOK) status := types.SimpleStatus{} status.Status = "OK" w.WriteJson(&status) return }
func provisionImpl(dbConn *sql.DB, params *swarmapi.DockerRunRequest, standby bool) (string, error) { logit.Info.Println("PROFILE: provisionImpl starts 1") var errorStr string //make sure the container name is not already taken _, err := admindb.GetContainerByName(dbConn, params.ContainerName) if err != nil { if err != sql.ErrNoRows { return "", err } } else { errorStr = "container name " + params.ContainerName + " already used can't provision" logit.Error.Println(errorStr) return "", errors.New(errorStr) } //get the pg data path var pgdatapath types.Setting pgdatapath, err = admindb.GetSetting(dbConn, "PG-DATA-PATH") if err != nil { logit.Error.Println(err.Error()) return "", err } var infoResponse swarmapi.DockerInfoResponse infoResponse, err = swarmapi.DockerInfo() servers := make([]types.Server, len(infoResponse.Output)) i := 0 for i = range infoResponse.Output { servers[i].ID = infoResponse.Output[i] servers[i].Name = infoResponse.Output[i] servers[i].IPAddress = infoResponse.Output[i] i++ } //for database nodes, on the target server, we need to allocate //a disk volume on all CPM servers for the /pgdata container volume to work with //this causes a volume to be created with the directory //named the same as the container name params.PGDataPath = pgdatapath.Value + "/" + params.ContainerName logit.Info.Println("PROFILE provisionImpl 2 about to provision volume " + params.PGDataPath) if params.Image != "cpm-pgpool" { preq := &cpmserverapi.DiskProvisionRequest{} preq.Path = params.PGDataPath var response cpmserverapi.DiskProvisionResponse for _, each := range servers { logit.Info.Println("Provision: provisionvolume on server " + each.Name) response, err = cpmserverapi.DiskProvisionClient(each.Name, preq) if err != nil { logit.Info.Println("Provision: provisionvolume error" + err.Error()) logit.Error.Println(err.Error()) return "", err } logit.Info.Println("Provision: provisionvolume call response=" + response.Status) } } logit.Info.Println("PROFILE provisionImpl 3 provision volume completed") //run docker run to create the container params.CPU, params.MEM, err = getDockerResourceSettings(dbConn, params.Profile) if err != nil { logit.Error.Println(err.Error()) return "", err } //inspect and remove any existing container logit.Info.Println("PROFILE provisionImpl inspect 4") inspectReq := &swarmapi.DockerInspectRequest{} inspectReq.ContainerName = params.ContainerName var inspectResponse swarmapi.DockerInspectResponse inspectResponse, err = swarmapi.DockerInspect(inspectReq) if err != nil { logit.Error.Println(err.Error()) return "", err } if inspectResponse.RunningState != "not-found" { logit.Info.Println("PROFILE provisionImpl remove existing container 4a") rreq := &swarmapi.DockerRemoveRequest{} rreq.ContainerName = params.ContainerName _, err = swarmapi.DockerRemove(rreq) if err != nil { logit.Error.Println(err.Error()) return "", err } } //pass any restore env vars to the new container if params.RestoreJob != "" { if params.EnvVars == nil { //logit.Info.Println("making envvars map") params.EnvVars = make(map[string]string) } params.EnvVars["RestoreJob"] = params.RestoreJob params.EnvVars["RestoreRemotePath"] = params.RestoreRemotePath params.EnvVars["RestoreRemoteHost"] = params.RestoreRemoteHost params.EnvVars["RestoreRemoteUser"] = params.RestoreRemoteUser params.EnvVars["RestoreDbUser"] = params.RestoreDbUser params.EnvVars["RestoreDbPass"] = params.RestoreDbPass params.EnvVars["RestoreSet"] = params.RestoreSet } // runReq := swarmapi.DockerRunRequest{} runReq.PGDataPath = params.PGDataPath runReq.Profile = params.Profile runReq.Image = params.Image runReq.ContainerName = params.ContainerName runReq.EnvVars = params.EnvVars //logit.Info.Println("CPU=" + params.CPU) //logit.Info.Println("MEM=" + params.MEM) runReq.CPU = "0" runReq.MEM = "0" var runResp swarmapi.DockerRunResponse runResp, err = swarmapi.DockerRun(&runReq) if err != nil { logit.Error.Println(err.Error()) return "", err } logit.Info.Println("PROFILE provisionImpl created container 5 " + runResp.ID) dbnode := types.Container{} dbnode.ID = "" dbnode.Name = params.ContainerName dbnode.Image = params.Image dbnode.ClusterID = "-1" dbnode.ProjectID = params.ProjectID if params.Standalone == "true" { dbnode.Role = "standalone" } else { dbnode.Role = "unassigned" } var strid int strid, err = admindb.InsertContainer(dbConn, dbnode) newid := strconv.Itoa(strid) if err != nil { logit.Error.Println(err.Error()) return "", err } dbnode.ID = newid if params.Image != "cpm-node-proxy" { //register default db users on the new node err = createDBUsers(dbConn, dbnode) } return newid, err }