func checkAndUpdateAutoExpand(t *task.Task, clusterId uuid.UUID, ctxt string) { sessionCopy := db.GetDatastore().Copy() defer sessionCopy.Close() coll := sessionCopy.DB(conf.SystemConfig.DBConfig.Database).C(models.COLL_NAME_STORAGE_LOGICAL_UNITS) var slus []models.StorageLogicalUnit if err := coll.Find(bson.M{"clusterid": clusterId}).All(&slus); err != nil { logger.Get().Error( "%s-Error getting SLUs of cluster: %v for colocation check. error: %v", ctxt, clusterId, err) return } for _, slu := range slus { journalDet := slu.Options["journal"].(map[string]interface{}) if slu.Options["device"] == journalDet["journaldisk"].(string) { coll1 := sessionCopy.DB(conf.SystemConfig.DBConfig.Database).C(models.COLL_NAME_STORAGE_CLUSTERS) if err := coll1.Update( bson.M{"clusterid": clusterId}, bson.M{"$set": bson.M{"autoexpand": false}}); err != nil { logger.Get().Error( "%s-Error setting autoexpand flag for cluster: %v. error: %v", ctxt, clusterId, err) return } t.UpdateStatus("Disabled auto expand for cluster") break } } return }
func createBlockDevices( ctxt string, mon string, cluster models.Cluster, poolId uuid.UUID, request models.AddStorageRequest, t *task.Task) { t.UpdateStatus("Creating block devices") var failedBlkDevices []string for _, entry := range request.BlockDevices { blockDevice := models.BlockDevice{ Name: entry.Name, Tags: entry.Tags, ClusterId: cluster.ClusterId, ClusterName: cluster.Name, StorageId: poolId, StorageName: request.Name, Size: entry.Size, SnapshotsEnabled: entry.SnapshotsEnabled, // TODO: Populate the schedule ids once schedule created // SnapshotScheduleIds = <created schedule ids> QuotaEnabled: entry.QuotaEnabled, QuotaParams: entry.QuotaParams, Options: entry.Options, } if ok := createBlockStorage( ctxt, mon, cluster.ClusterId, cluster.Name, request.Name, blockDevice, t); !ok { failedBlkDevices = append(failedBlkDevices, entry.Name) } } if len(failedBlkDevices) > 0 { t.UpdateStatus("Block device creation failed for: %v", failedBlkDevices) } }
func createPool(ctxt string, clusterId uuid.UUID, request models.AddStorageRequest, t *task.Task) (*uuid.UUID, bool) { sessionCopy := db.GetDatastore().Copy() defer sessionCopy.Close() t.UpdateStatus("Getting cluster details") // Get cluster details var cluster models.Cluster coll := sessionCopy.DB(conf.SystemConfig.DBConfig.Database).C(models.COLL_NAME_STORAGE_CLUSTERS) if err := coll.Find(bson.M{"clusterid": clusterId}).One(&cluster); err != nil { utils.FailTask(fmt.Sprintf("Error getting the cluster details for :%v", clusterId), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } t.UpdateStatus("Getting a mon from cluster") monnode, err := GetCalamariMonNode(clusterId, ctxt) if err != nil { utils.FailTask(fmt.Sprintf("Error getting mon node details for cluster: %v", clusterId), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } t.UpdateStatus("Creating pool") // Get quota related details if quota enabled // If quota enabled, looks for quota config values var quotaMaxObjects int var quotaMaxBytes uint64 if request.QuotaEnabled { var err error if request.QuotaParams["quota_max_objects"] != "" { if quotaMaxObjects, err = strconv.Atoi(request.QuotaParams["quota_max_objects"]); err != nil { utils.FailTask(fmt.Sprintf("Error parsing quota config value quota_max_objects for pool %s on cluster: %v", request.Name, clusterId), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } } if request.QuotaParams["quota_max_bytes"] != "" { if quotaMaxBytes, err = strconv.ParseUint(request.QuotaParams["quota_max_bytes"], 10, 64); err != nil { utils.FailTask(fmt.Sprintf("Error parsing quota config value quota_max_bytes for pool %s on cluster: %v", request.Name, clusterId), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } } } // Invoke backend api to create pool var pgNum uint if request.Options["pgnum"] == "" { utils.FailTask("", fmt.Errorf("%s - Pg num not provided", ctxt), t) return nil, false } else { val, _ := strconv.ParseUint(request.Options["pgnum"], 10, 32) pgNum = uint(val) } if request.Type == models.STORAGE_TYPE_ERASURE_CODED { ok, err := validECProfile(ctxt, monnode.Hostname, cluster, request.Options["ecprofile"]) if err != nil { utils.FailTask("", fmt.Errorf("%s - Error checking validity of ec profile value. error: %v", ctxt, err), t) return nil, false } if !ok { utils.FailTask( "", fmt.Errorf( "%s-Invalid EC profile value: %s passed for pool: %s creation on cluster: %s. error: %v", ctxt, request.Options["ecprofile"], request.Name, cluster.Name, err), t) return nil, false } } rulesetmapval, ok := cluster.Options["rulesetmap"] if !ok { logger.Get().Error("Error getting the ruleset for cluster: %s", cluster.Name) utils.FailTask("", fmt.Errorf("%s - Error getting the ruleset for cluster: %s", ctxt, cluster.Name), t) return nil, false } rulesetmap := rulesetmapval.(map[string]interface{}) rulesetval, ok := rulesetmap[request.Profile] if !ok { logger.Get().Error("Error getting the ruleset for cluster: %s", cluster.Name) return nil, false } ruleset := rulesetval.(map[string]interface{}) if request.Type == models.STORAGE_TYPE_ERASURE_CODED { // cmd := fmt.Sprintf("ceph --cluster %s osd pool create %s %d %d erasure %s", cluster.Name, request.Name, uint(pgNum), uint(pgNum), request.Options["ecprofile"]) // ok, _, err = cephapi_backend.ExecCmd(monnode.Hostname, clusterId, cmd, ctxt) // time.Sleep(10 * time.Second) ok, err = cephapi_backend.CreateECPool( request.Name, monnode.Hostname, cluster.Name, uint(pgNum), request.Replicas, quotaMaxObjects, quotaMaxBytes, request.Options["ecprofile"], ruleset, request.Profile, ctxt) } else { ok, err = cephapi_backend.CreatePool( request.Name, monnode.Hostname, cluster.Name, uint(pgNum), request.Replicas, quotaMaxObjects, quotaMaxBytes, ruleset["rulesetid"].(int), ctxt) } if err == cephapi.ErrTimedOut || err == nil { pools, err := cephapi_backend.GetPools(monnode.Hostname, clusterId, ctxt) if err != nil { utils.FailTask("Error getting created pools", fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } storage_id, err := uuid.New() if err != nil { utils.FailTask("Error creating id for pool", fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } for _, pool := range pools { if request.Name == pool.Name { t.UpdateStatus("Perisisting the storage entity") var storage models.Storage storage.StorageId = *storage_id storage.Name = request.Name storage.Type = request.Type storage.Tags = request.Tags storage.ClusterId = clusterId storage.Size = request.Size storage.Status = models.STORAGE_STATUS_OK storage.Replicas = request.Replicas storage.Profile = request.Profile storage.SnapshotsEnabled = request.SnapshotsEnabled // TODO: Populate the schedule ids once schedule created // storage.SnapshotScheduleIds = <created schedule ids> storage.QuotaEnabled = request.QuotaEnabled storage.QuotaParams = request.QuotaParams options := make(map[string]string) options["id"] = strconv.Itoa(pool.Id) options["pg_num"] = strconv.Itoa(pool.PgNum) options["pgp_num"] = strconv.Itoa(pool.PgpNum) options["full"] = strconv.FormatBool(pool.Full) options["hashpspool"] = strconv.FormatBool(pool.HashPsPool) options["min_size"] = strconv.FormatUint(pool.MinSize, 10) options["crash_replay_interval"] = strconv.Itoa(pool.CrashReplayInterval) options["crush_ruleset"] = strconv.Itoa(pool.CrushRuleSet) if request.Type == models.STORAGE_TYPE_ERASURE_CODED { options["ecprofile"] = request.Options["ecprofile"] } storage.Options = options coll := sessionCopy.DB(conf.SystemConfig.DBConfig.Database).C(models.COLL_NAME_STORAGE) if _, err := coll.Upsert(bson.M{"name": storage.Name, "clusterid": storage.ClusterId}, bson.M{"$set": storage}); err != nil { utils.FailTask(fmt.Sprintf("Error persisting pool %s for cluster: %s", request.Name, cluster.Name), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } break } } return storage_id, true } else { utils.FailTask(fmt.Sprintf("Create pool %s failed on cluster: %s", request.Name, cluster.Name), fmt.Errorf("%s - %v", ctxt, err), t) return nil, false } }
func initializeStorageNode(node string, t *task.Task, ctxt string) error { sProfiles, err := GetDbProvider().StorageProfileInterface().StorageProfiles(ctxt, nil, models.QueryOps{}) if err != nil { logger.Get().Error("%s-Unable to get the storage profiles. May not be able to apply storage profiles for node: %v err:%v", ctxt, node, err) } if storage_node, ok := saltnodemanager.GetStorageNodeInstance(node, sProfiles, ctxt); ok { if nodeErrorMap, configureError := GetCoreNodeManager().SetUpMonitoring( node, curr_hostname, ctxt); configureError != nil && len(nodeErrorMap) != 0 { if len(nodeErrorMap) != 0 { logger.Get().Error("%s-Unable to setup collectd on %s because of %v", ctxt, node, nodeErrorMap) t.UpdateStatus("Unable to setup collectd on %s because of %v", node, nodeErrorMap) skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return err } else { logger.Get().Error("%s-Config Error during monitoring setup for node:%s Error:%v", ctxt, node, configureError) t.UpdateStatus("Config Error during monitoring setup for node:%s Error:%v", node, configureError) skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return err } } util.AppendServiceToNode(bson.M{"hostname": node}, models.SkyringServices[0], models.STATUS_UP, ctxt) if ok, err := GetCoreNodeManager().SyncModules(node, ctxt); !ok || err != nil { logger.Get().Error("%s-Failed to sync modules on the node: %s. error: %v", ctxt, node, err) t.UpdateStatus("Failed to sync modules") skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return err } if err := saltnodemanager.SetupSkynetService(node, ctxt); err != nil { logger.Get().Error("%s-Failed to setup skynet service on the node: %s. error: %v", ctxt, node, err) t.UpdateStatus("Failed to setup skynet service") skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return err } if err := updateStorageNodeToDB(*storage_node, ctxt); err != nil { logger.Get().Error("%s-Unable to add details of node: %s to DB. error: %v", ctxt, node, err) t.UpdateStatus("Unable to add details of node: %s to DB. error: %v", node, err) skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return err } if ok, err := GetCoreNodeManager().EnableNode(node, ctxt); !ok || err != nil { logger.Get().Error("%s- Failed to enable the node. Error: %v", ctxt, err) } return nil } else { logger.Get().Critical("%s-Error getting the details for node: %s", ctxt, node) t.UpdateStatus("Error getting the details for node: %s", node) skyringutils.UpdateNodeState(ctxt, node, models.NODE_STATE_FAILED) skyringutils.UpdateNodeStatus(ctxt, node, models.NODE_STATUS_UNKNOWN) return fmt.Errorf("Error getting the details for node: %s", node) } }
func FailTask(msg string, err error, t *task.Task) { logger.Get().Error("%s: %v", msg, err) t.UpdateStatus("Failed. error: %v", err) t.Done(models.TASK_STATUS_FAILURE) }
func (c CephInstaller) Install(ctxt string, t *task.Task, providerName string, nodes []models.ClusterNode) []models.ClusterNode { db_nodes, err := util.GetNodesByIdStr(nodes) if err != nil { logger.Get().Error("%s-Error getting nodes while package installation: %v", ctxt, err) return nodes } var ( //wg sync.WaitGroup failedNodes []models.ClusterNode //syncMutex sync.Mutex ) // The below code segment invokes the ceph-installer // parallely as seperate go-routines for each node available. // But since ceph-installer is not capable of parallely installing // the nodes. So till that capability is added to ceph-installer // this can be invoked serially /* for _, node := range nodes { wg.Add(1) go func(node models.ClusterNode, hostname string) { defer wg.Done() var route models.ApiRoute hosts := []string{hostname} data := make(map[string]interface{}) if util.StringInSlice(MON, node.NodeType) { route = CEPH_INSTALLER_API_ROUTES["monInstall"] data["calamari"] = true } else if util.StringInSlice(OSD, node.NodeType) { route = CEPH_INSTALLER_API_ROUTES["osdInstall"] } data["hosts"] = hosts data["redhat_storage"] = conf.SystemConfig.Provisioners[providerName].RedhatStorage reqData, err := formConfigureData(data) if err != nil { failedNodes = append(failedNodes, node) return } resp, body, errs := httprequest.Post(formUrl(route)).Send(reqData).End() respData, err := parseInstallResponseData(ctxt, resp, body, errs) if err != nil { syncMutex.Lock() defer syncMutex.Unlock() failedNodes = append(failedNodes, node) return } logger.Get().Info( "%s-Started installation on node :%s. TaskId: %s. Request Data: %v. Route: %s", ctxt, hostname, respData.Identifier, data, formUrl(route)) if ok := syncRequestStatus(ctxt, respData.Identifier); !ok { syncMutex.Lock() defer syncMutex.Unlock() failedNodes = append(failedNodes, node) return } t.UpdateStatus("Installed packages on: %s", hostname) }(node, db_nodes[node.NodeId].Hostname) } wg.Wait() */ for _, node := range nodes { if ok := installNode(node, db_nodes[node.NodeId].Hostname, providerName, ctxt); !ok { t.UpdateStatus("Package installation failed on: %s", db_nodes[node.NodeId].Hostname) failedNodes = append(failedNodes, node) } else { t.UpdateStatus("Installed packages on: %s", db_nodes[node.NodeId].Hostname) } } return failedNodes }