Ejemplo n.º 1
0
func (this *ControlPlaneDao) GetRunningServicesForService(serviceID string, services *[]dao.RunningService) error {
	// we initialize the data container to something here in case it has not been initialized yet
	*services = make([]dao.RunningService, 0)

	poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID)
	if err != nil {
		glog.Errorf("Unable to get service %v: %v", serviceID, err)
		return err
	}

	poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err)
		return err
	}

	svcs, err := zkservice.LoadRunningServicesByService(poolBasedConn, serviceID)
	if err != nil {
		glog.Errorf("LoadRunningServicesByService failed (conn: %+v serviceID: %v): %v", poolBasedConn, serviceID, err)
		return err
	}

	for _, svc := range svcs {
		*services = append(*services, svc)
	}

	return nil
}
Ejemplo n.º 2
0
func (z *zkf) RemoveVirtualIP(virtualIP *pool.VirtualIP) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(virtualIP.PoolID))
	if err != nil {
		return err
	}
	return zkvirtualip.RemoveVirtualIP(conn, virtualIP.IP)
}
Ejemplo n.º 3
0
func (this *ControlPlaneDao) GetRunningServicesForHost(hostID string, services *[]dao.RunningService) error {
	// we initialize the data container to something here in case it has not been initialized yet
	*services = make([]dao.RunningService, 0)
	myHost, err := this.facade.GetHost(datastore.Get(), hostID)
	if err != nil {
		glog.Errorf("Unable to get host %v: %v", hostID, err)
		return err
	} else if myHost == nil {
		return nil
	}

	poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(myHost.PoolID))
	if err != nil {
		glog.Errorf("Error in getting a connection based on pool %v: %v", myHost.PoolID, err)
		return err
	}

	*services, err = zkservice.LoadRunningServicesByHost(poolBasedConn, hostID)
	if err != nil {
		glog.Errorf("zkservice.LoadRunningServicesByHost (conn: %+v host: %v) failed: %v", poolBasedConn, hostID, err)
		return err
	}

	return nil
}
Ejemplo n.º 4
0
func (z *zkf) UpdateHost(host *host.Host) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(host.PoolID))
	if err != nil {
		return err
	}
	return zkhost.UpdateHost(conn, host)
}
Ejemplo n.º 5
0
func (this *ControlPlaneDao) GetRunningService(request dao.ServiceStateRequest, running *dao.RunningService) error {
	glog.V(3).Infof("ControlPlaneDao.GetRunningService: request=%v", request)
	*running = dao.RunningService{}

	serviceID := request.ServiceID
	poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID)
	if err != nil {
		glog.Errorf("Unable to get service %v: %v", serviceID, err)
		return err
	}

	poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err)
		return err
	}

	if thisRunning, err := zkservice.LoadRunningService(poolBasedConn, request.ServiceID, request.ServiceStateID); err != nil {
		glog.Errorf("zkservice.LoadRunningService failed (conn: %+v serviceID: %v): %v", poolBasedConn, request.ServiceID, err)
		return err
	} else {
		if thisRunning != nil {
			*running = *thisRunning
		}
	}

	return nil
}
Ejemplo n.º 6
0
func (this *ControlPlaneDao) GetRunningServices(request dao.EntityRequest, allRunningServices *[]dao.RunningService) error {
	// we initialize the data container to something here in case it has not been initialized yet
	*allRunningServices = make([]dao.RunningService, 0)
	allPools, err := this.facade.GetResourcePools(datastore.Get())
	if err != nil {
		glog.Error("runningservice.go failed to get resource pool")
		return err
	} else if allPools == nil || len(allPools) == 0 {
		return fmt.Errorf("no resource pools found")
	}

	for _, aPool := range allPools {
		poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(aPool.ID))
		if err != nil {
			glog.Error("runningservice.go Failed to get connection based on pool: %v", aPool.ID)
			return err
		}

		singlePoolRunningServices := []dao.RunningService{}
		singlePoolRunningServices, err = zkservice.LoadRunningServices(poolBasedConn)
		if err != nil {
			glog.Errorf("Failed GetAllRunningServices: %v", err)
			return err
		}

		for _, rs := range singlePoolRunningServices {
			*allRunningServices = append(*allRunningServices, rs)
		}
	}

	return nil
}
Ejemplo n.º 7
0
func (zk *zkf) RemoveService(service *service.Service) error {
	// acquire the service lock to prevent that service from being scheduled
	// as it is being deleted
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID))
	if err != nil {
		return err
	}

	// remove the global list of all vhosts deployed
	if rootconn, err := zzk.GetLocalConnection("/"); err != nil {
		return err
	} else if err := zkservice.RemoveServiceVhosts(rootconn, service); err != nil {
		return err
	}

	// Ensure that the service's pool is locked for the duration
	finish := make(chan interface{})
	defer close(finish)
	if err := zkservice.EnsureServiceLock(nil, finish, conn); err != nil {
		return err
	}

	// FIXME: this may be a long-running operation, should we institute a timeout?
	return zkservice.RemoveService(conn, service.ID)
}
Ejemplo n.º 8
0
func (zk *zkf) StopServiceInstance(poolID, hostID, stateID string) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		return err
	}

	return zkservice.StopServiceInstance(conn, hostID, stateID)
}
Ejemplo n.º 9
0
func (zk *zkf) WaitService(service *service.Service, state service.DesiredState, cancel <-chan interface{}) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID))
	if err != nil {
		return err
	}

	return zkservice.WaitService(cancel, conn, service.ID, state)
}
Ejemplo n.º 10
0
func (z *zkf) GetActiveHosts(poolID string, hosts *[]string) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		return err
	}
	*hosts, err = zkhost.GetActiveHosts(conn)
	return err
}
Ejemplo n.º 11
0
func (zk *zkf) GetServiceStates(poolID string, states *[]servicestate.ServiceState, serviceIDs ...string) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		return err
	}

	*states, err = zkservice.GetServiceStates(conn, serviceIDs...)
	return err
}
Ejemplo n.º 12
0
func (dfs *DistributedFilesystem) desynchronize(image *docker.Image) error {
	// inspect the image
	dImg, err := image.Inspect()
	if err != nil {
		glog.Errorf("Could not inspect image %s (%s): %s", image.ID, image.UUID, err)
		return err
	}

	// look up services for that tenant
	svcs, err := dfs.facade.GetServices(datastore.Get(), dao.ServiceRequest{TenantID: image.ID.User})
	if err != nil {
		glog.Errorf("Could not get services for tenant %s from %s (%s): %s", image.ID.User, image.ID, image.UUID, err)
		return err
	}

	for _, svc := range svcs {
		// figure out which services are using the provided image
		svcImageID, err := commons.ParseImageID(svc.ImageID)
		if err != nil {
			glog.Warningf("Could not parse image %s for %s (%s): %s", svc.ImageID, svc.Name, svc.ID)
			continue
		} else if !svcImageID.Equals(image.ID) {
			continue
		}

		// TODO: we need to switch to using dao.ControlPlane
		conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID))
		if err != nil {
			glog.Warningf("Could not acquire connection to the coordinator (%s): %s", svc.PoolID, err)
			continue
		}

		states, err := zkservice.GetServiceStates(conn, svc.ID)
		if err != nil {
			glog.Warningf("Could not get running services for %s (%s): %s", svc.Name, svc.ID)
			continue
		}

		for _, state := range states {
			// check if the instance has been running since before the commit
			if state.IsRunning() && state.Started.Before(dImg.Created) {
				state.InSync = false
				if err := zkservice.UpdateServiceState(conn, &state); err != nil {
					glog.Warningf("Could not update service state %s for %s (%s) as out of sync: %s", state.ID, svc.Name, svc.ID, err)
					continue
				}
			}
		}
	}
	return nil
}
Ejemplo n.º 13
0
func (this *ControlPlaneDao) getPoolBasedConnection(serviceID string) (client.Connection, error) {
	poolID, err := this.facade.GetPoolForService(datastore.Get(), serviceID)
	if err != nil {
		glog.V(2).Infof("ControlPlaneDao.GetPoolForService service=%+v err=%s", serviceID, err)
		return nil, err
	}

	poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
	if err != nil {
		glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err)
		return nil, err
	}
	return poolBasedConn, nil
}
Ejemplo n.º 14
0
func (zk *zkf) UpdateService(service *service.Service) error {
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID))
	if err != nil {
		return err
	}

	if err := zkservice.UpdateService(conn, service); err != nil {
		return err
	}

	rootconn, err := zzk.GetLocalConnection("/")
	if err != nil {
		return err
	}

	return zkservice.UpdateServiceVhosts(rootconn, service)
}
Ejemplo n.º 15
0
func (z *zkf) RemoveHost(host *host.Host) error {
	// acquire the service lock to prevent services from being scheduled
	// to that pool
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(host.PoolID))
	if err != nil {
		return err
	}

	cancel := make(chan interface{})
	go func() {
		defer close(cancel)
		<-time.After(2 * time.Minute)
	}()

	// Ensure that the service's pool is locked for the duration
	finish := make(chan interface{})
	defer close(finish)
	if err := zkservice.EnsureServiceLock(cancel, finish, conn); err != nil {
		return err
	}
	return zkhost.RemoveHost(cancel, conn, host.ID)
}
Ejemplo n.º 16
0
func (this *ControlPlaneDao) StopRunningInstance(request dao.HostServiceRequest, unused *int) error {
	myHost, err := this.facade.GetHost(datastore.Get(), request.HostID)
	if err != nil {
		glog.Errorf("Unable to get host %v: %v", request.HostID, err)
		return err
	}
	if myHost == nil {
		return fmt.Errorf("Host %s does not exist", request.HostID)
	}
	poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(myHost.PoolID))
	if err != nil {
		glog.Errorf("Error in getting a connection based on pool %v: %v", myHost.PoolID, err)
		return err
	}

	if err := zkservice.StopServiceInstance(poolBasedConn, request.HostID, request.ServiceStateID); err != nil {
		glog.Errorf("zkservice.StopServiceInstance failed (conn: %+v hostID: %v serviceStateID: %v): %v", poolBasedConn, request.HostID, request.ServiceStateID, err)
		return err
	}

	return nil
}
Ejemplo n.º 17
0
func (this *ControlPlaneDao) Action(request dao.AttachRequest, unused *int) error {
	ctx := datastore.Get()
	svc, err := this.facade.GetService(ctx, request.Running.ServiceID)
	if err != nil {
		return err
	}

	var command []string
	if request.Command == "" {
		return fmt.Errorf("missing command")
	}

	if err := svc.EvaluateActionsTemplate(serviceGetter(ctx, this.facade), childFinder(ctx, this.facade), request.Running.InstanceID); err != nil {
		return err
	}

	action, ok := svc.Actions[request.Command]
	if !ok {
		return fmt.Errorf("action not found for service %s: %s", svc.ID, request.Command)
	}

	command = append([]string{action}, request.Args...)
	req := zkdocker.Action{
		HostID:   request.Running.HostID,
		DockerID: request.Running.DockerID,
		Command:  command,
	}

	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID))
	if err != nil {
		return err
	}

	_, err = zkdocker.SendAction(conn, &req)
	return err
}
Ejemplo n.º 18
0
func (s *scheduler) localSync(shutdown <-chan interface{}, rootConn client.Connection) {
	wait := time.After(0)

retry:
	for {
		select {
		case <-wait:
		case <-shutdown:
			return
		}

		pools, err := s.GetResourcePools()
		if err != nil {
			glog.Errorf("Could not get resource pools: %s", err)
			wait = time.After(minWait)
			continue
		} else if err := zkservice.SyncResourcePools(rootConn, pools); err != nil {
			glog.Errorf("Could not do a local sync of resource pools: %s", err)
			wait = time.After(minWait)
			continue
		}

		for _, pool := range pools {
			conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(pool.ID))
			if err != nil {
				glog.Errorf("Could not get a pool-based connection for %s to zookeeper: %s", pool.ID, err)
				wait = time.After(minWait)
				continue retry
			}

			// Update the hosts
			if hosts, err := s.GetHostsByPool(pool.ID); err != nil {
				glog.Errorf("Could not get hosts in pool %s: %s", pool.ID, err)
				wait = time.After(minWait)
				continue retry
			} else if err := zkservice.SyncHosts(conn, hosts); err != nil {
				glog.Errorf("Could not do a local sync of hosts: %s", err)
				wait = time.After(minWait)
				continue retry
			}

			// Update the services
			if svcs, err := s.GetServicesByPool(pool.ID); err != nil {
				glog.Errorf("Could not get services: %s", err)
				wait = time.After(minWait)
				continue retry
			} else if zkservice.SyncServices(conn, svcs); err != nil {
				glog.Error("Could not do a local sync of services: %s", err)
				wait = time.After(minWait)
				continue retry
			}

			// Update Virtual IPs
			if err := zkvirtualips.SyncVirtualIPs(conn, pool.VirtualIPs); err != nil {
				glog.Errorf("Could not sync virtual ips for %s: %s", pool.ID, err)
				wait = time.After(minWait)
				continue retry
			}
		}

		wait = time.After(maxWait)
	}
}
Ejemplo n.º 19
0
func (dfs *DistributedFilesystem) Restore(filename string) error {
	// fail if any services are running
	dfs.log("Checking running services")
	svcs, err := dfs.facade.GetServices(datastore.Get(), dao.ServiceRequest{})
	if err != nil {
		glog.Errorf("Could not acquire the list of all services: %s", err)
		return err
	}

	for _, svc := range svcs {
		conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(svc.PoolID))
		if err != nil {
			glog.Errorf("Could not acquire connection to coordinator (%s): %s", svc.PoolID, err)
			return err
		}
		if states, err := zkservice.GetServiceStates(conn, svc.ID); err != nil {
			glog.Errorf("Could not look up running instances for %s (%s): %s", svc.Name, svc.ID, err)
			return err
		} else if running := len(states); running > 0 {
			err := fmt.Errorf("service %s (%s) has %d running instances", svc.Name, svc.ID, running)
			glog.Errorf("Cannot restore to %s: %s", filename, err)
			return err
		}
	}

	var reloadLogstashContainer bool
	defer func() {
		if reloadLogstashContainer {
			go facade.LogstashContainerReloader(datastore.Get(), dfs.facade) // don't block main thread
		}
	}()

	dirpath := filepath.Join(utils.BackupDir(dfs.varpath), "restore")
	if err := os.RemoveAll(dirpath); err != nil {
		glog.Errorf("Could not remove %s: %s", dirpath, err)
		return err
	}

	if err := mkdir(dirpath); err != nil {
		glog.Errorf("Could neither find nor create %s: %s", dirpath, err)
		return err
	}

	defer func() {
		if err := os.RemoveAll(dirpath); err != nil {
			glog.Warningf("Could not remove %s: %s", dirpath, err)
		}
	}()

	dfs.log("Extracting backup file %s", filename)
	if err := importTGZ(dirpath, filename); err != nil {
		glog.Errorf("Could not expand %s to %s: %s", filename, dirpath, err)
		return err
	}

	var metadata metadata
	if err := importJSON(filepath.Join(dirpath, meta), &metadata); err != nil {
		glog.Errorf("Could not import %s: %s", meta, err)
		return err
	} else if metadata.FSType != dfs.fsType {
		err = fmt.Errorf("this backup can only be run on %s", metadata.FSType)
		glog.Errorf("Could not extract backup %s: %s", filename, err)
		return err
	}

	var pools []pool.ResourcePool
	if err := importJSON(filepath.Join(dirpath, poolJSON), &pools); err != nil {
		glog.Errorf("Could not read resource pools from %s: %s", filename, err)
		return err
	}

	// restore the resource pools (and virtual ips)
	dfs.log("Loading resource pools")
	for _, pool := range pools {
		pool.DatabaseVersion = 0

		glog.V(1).Infof("Restoring resource pool %s", pool.ID)
		var err error
		if err = dfs.facade.AddResourcePool(datastore.Get(), &pool); err == facade.ErrPoolExists {
			err = dfs.facade.UpdateResourcePool(datastore.Get(), &pool)
		}
		if err != nil {
			glog.Errorf("Could not restore resource pool %s: %s", pool.ID, err)
			return err
		}
	}
	dfs.log("Resource pool load successful")

	var hosts []host.Host
	if err := importJSON(filepath.Join(dirpath, hostJSON), &hosts); err != nil {
		glog.Errorf("Could not read hosts from %s: %s", filename, err)
		return err
	}

	// restore the hosts (add it if it doesn't exist; assume hosts don't need to be updated from backup)
	dfs.log("Loading static ips")
	for _, host := range hosts {
		host.DatabaseVersion = 0
		glog.V(1).Infof("Restoring host %s (%s)", host.ID, host.IPAddr)
		if exists, err := dfs.facade.HasIP(datastore.Get(), host.PoolID, host.IPAddr); err != nil {
			glog.Errorf("Could not check IP %s for pool %s: %s", host.IPAddr, host.PoolID, err)
			return err
		} else if exists {
			glog.Warningf("Could not restore host %s (%s): ip already exists", host.ID, host.IPAddr)
		} else if err := dfs.facade.AddHost(datastore.Get(), &host); err != nil {
			glog.Errorf("Could not add host %s: %s", host.ID, err)
			return err
		}
	}
	dfs.log("Static ip load successful")

	var templates map[string]servicetemplate.ServiceTemplate
	if err := importJSON(filepath.Join(dirpath, templateJSON), &templates); err != nil {
		glog.Errorf("Could not read templates from %s: %s", filename, err)
		return err
	}

	// restore the service templates
	dfs.log("Loading service templates")
	for templateID, template := range templates {
		template.DatabaseVersion = 0

		glog.V(1).Infof("Restoring service template %s", templateID)
		template.ID = templateID
		if err := dfs.facade.UpdateServiceTemplate(datastore.Get(), template); err != nil {
			glog.Errorf("Could not restore template %s: %s", templateID, err)
			return err
		}
		reloadLogstashContainer = true
	}
	dfs.log("Service template load successful")

	// Get the tenant of all the services to be restored
	snapshotFiles, err := ls(filepath.Join(dirpath, snapshotDir))
	tenantIDs := make(map[string]struct{})
	for _, f := range snapshotFiles {
		tenantIDs[strings.TrimSuffix(f, ".tgz")] = struct{}{}
	}

	// restore docker images
	dfs.log("Loading docker images")
	var images []imagemeta
	if err := importJSON(filepath.Join(dirpath, imageJSON), &images); err != nil {
		glog.Errorf("Could not read images from %s: %s", filename, err)
		return err
	}

	if err := dfs.importImages(filepath.Join(dirpath, imageDir), images, tenantIDs); err != nil {
		glog.Errorf("Could not import images from %s: %s", filename, err)
		return err
	}
	dfs.log("Docker image load successful")

	// restore snapshots
	glog.V(1).Infof("Restoring services and snapshots")
	for _, f := range snapshotFiles {
		dfs.log("Loading %s", f)
		if err := dfs.loadSnapshots(strings.TrimSuffix(f, ".tgz"), filepath.Join(dirpath, snapshotDir, f)); err != nil {
			glog.Errorf("Could not import snapshot from %s: %s", f, err)
			return err
		}
		dfs.log("Successfully loaded %s", f)
	}

	glog.Infof("Restore succeeded with fsType:%s from file:%s", dfs.fsType, filename)
	return nil
}
Ejemplo n.º 20
0
func (d *daemon) startAgent() error {
	muxListener, err := createMuxListener()
	if err != nil {
		return err
	}
	mux, err := proxy.NewTCPMux(muxListener)
	if err != nil {
		return err
	}

	agentIP := options.OutboundIP
	if agentIP == "" {
		var err error
		agentIP, err = utils.GetIPAddress()
		if err != nil {
			glog.Fatalf("Failed to acquire ip address: %s", err)
		}
	}

	rpcPort := "0"
	parts := strings.Split(options.Listen, ":")
	if len(parts) > 1 {
		rpcPort = parts[1]
	}

	thisHost, err := host.Build(agentIP, rpcPort, "unknown")
	if err != nil {
		panic(err)
	}

	myHostID, err := utils.HostID()
	if err != nil {
		return fmt.Errorf("HostID failed: %v", err)
	} else if err := validation.ValidHostID(myHostID); err != nil {
		glog.Errorf("invalid hostid: %s", myHostID)
	}

	go func() {
		var poolID string
		for {
			poolID = func() string {
				glog.Infof("Trying to discover my pool...")
				var myHost *host.Host
				masterClient, err := master.NewClient(d.servicedEndpoint)
				if err != nil {
					glog.Errorf("master.NewClient failed (endpoint %+v) : %v", d.servicedEndpoint, err)
					return ""
				}
				defer masterClient.Close()
				myHost, err = masterClient.GetHost(myHostID)
				if err != nil {
					glog.Warningf("masterClient.GetHost %v failed: %v (has this host been added?)", myHostID, err)
					return ""
				}
				poolID = myHost.PoolID
				glog.Infof(" My PoolID: %v", poolID)
				//send updated host info
				updatedHost, err := host.UpdateHostInfo(*myHost)
				if err != nil {
					glog.Infof("Could not send updated host information: %v", err)
					return poolID
				}
				err = masterClient.UpdateHost(updatedHost)
				if err != nil {
					glog.Warningf("Could not update host information: %v", err)
					return poolID
				}
				glog.V(2).Infof("Sent updated host info %#v", updatedHost)
				return poolID
			}()
			if poolID != "" {
				break
			}
			select {
			case <-d.shutdown:
				return
			case <-time.After(5 * time.Second):
				continue
			}
		}

		thisHost.PoolID = poolID

		basePoolPath := "/pools/" + poolID
		dsn := coordzk.NewDSN(options.Zookeepers, time.Second*15).String()
		glog.Infof("zookeeper dsn: %s", dsn)
		zClient, err := coordclient.New("zookeeper", dsn, basePoolPath, nil)
		if err != nil {
			glog.Errorf("failed create a new coordclient: %v", err)
		}
		zzk.InitializeLocalClient(zClient)

		poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID))
		if err != nil {
			glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err)
		}

		if options.NFSClient != "0" {
			nfsClient, err := storage.NewClient(thisHost, path.Join(options.VarPath, "volumes"))
			if err != nil {
				glog.Fatalf("could not create an NFS client: %s", err)
			}

			go func() {
				<-d.shutdown
				glog.Infof("shutting down storage client")
				nfsClient.Close()
			}()

			//loop and log waiting for Storage Leader
			nfsDone := make(chan struct{})
			go func() {
				defer close(nfsDone)
				nfsClient.Wait()
			}()
			//wait indefinitely(?) for storage to work before starting
			glog.Info("Waiting for Storage Leader")
			nfsUp := false
			for !nfsUp {
				select {
				case <-nfsDone:
					nfsUp = true
					glog.Info("Found Storage Leader")
					break
				case <-time.After(time.Second * 30):
					glog.Info("Waiting for Storage Leader, will not be available for running services. ")
					continue
				}
			}
		} else {
			glog.Info("NFS Client disabled")
		}

		agentOptions := node.AgentOptions{
			PoolID:               thisHost.PoolID,
			Master:               options.Endpoint,
			UIPort:               options.UIPort,
			RPCPort:              options.RPCPort,
			DockerDNS:            options.DockerDNS,
			VarPath:              options.VarPath,
			Mount:                options.Mount,
			FSType:               options.FSType,
			Zookeepers:           options.Zookeepers,
			Mux:                  mux,
			UseTLS:               options.TLS,
			DockerRegistry:       dockerRegistry,
			MaxContainerAge:      time.Duration(int(time.Second) * options.MaxContainerAge),
			VirtualAddressSubnet: options.VirtualAddressSubnet,
		}
		// creates a zClient that is not pool based!
		hostAgent, err := node.NewHostAgent(agentOptions)
		d.hostAgent = hostAgent

		d.waitGroup.Add(1)
		go func() {
			hostAgent.Start(d.shutdown)
			glog.Info("Host Agent has shutdown")
			d.waitGroup.Done()
		}()

		// register the API
		glog.V(0).Infoln("registering ControlPlaneAgent service")
		if err = d.rpcServer.RegisterName("ControlPlaneAgent", hostAgent); err != nil {
			glog.Fatalf("could not register ControlPlaneAgent RPC server: %v", err)
		}

		if options.ReportStats {
			statsdest := fmt.Sprintf("http://%s/api/metrics/store", options.HostStats)
			statsduration := time.Duration(options.StatsPeriod) * time.Second
			glog.V(1).Infoln("Staring container statistics reporter")
			statsReporter, err := stats.NewStatsReporter(statsdest, statsduration, poolBasedConn)
			if err != nil {
				glog.Errorf("Error kicking off stats reporter %v", err)
			} else {
				go func() {
					defer statsReporter.Close()
					<-d.shutdown
				}()
			}
		}
	}()

	glog.Infof("agent start staticips: %v [%d]", d.staticIPs, len(d.staticIPs))
	if err = d.rpcServer.RegisterName("Agent", agent.NewServer(d.staticIPs)); err != nil {
		glog.Fatalf("could not register Agent RPC server: %v", err)
	}
	if err != nil {
		glog.Fatalf("Could not start ControlPlane agent: %v", err)
	}

	// TODO: Integrate this server into the rpc server, or something.
	// Currently its only use is for command execution.
	go func() {
		sio := shell.NewProcessExecutorServer(options.Endpoint, dockerRegistry)
		http.ListenAndServe(":50000", sio)
	}()

	return nil
}
Ejemplo n.º 21
0
// main loop of the HostAgent
func (a *HostAgent) Start(shutdown <-chan interface{}) {
	glog.Info("Starting HostAgent")

	var wg sync.WaitGroup
	defer func() {
		glog.Info("Waiting for agent routines...")
		wg.Wait()
		glog.Info("Agent routines ended")
	}()

	wg.Add(1)
	go func() {
		glog.Info("reapOldContainersLoop starting")
		a.reapOldContainersLoop(time.Minute, shutdown)
		glog.Info("reapOldContainersLoop Done")
		wg.Done()
	}()

	// Increase the number of maximal tracked connections for iptables
	maxConnections := "655360"
	if cnxns := strings.TrimSpace(os.Getenv("SERVICED_IPTABLES_MAX_CONNECTIONS")); cnxns != "" {
		maxConnections = cnxns
	}
	glog.Infof("Set sysctl maximum tracked connections for iptables to %s", maxConnections)
	utils.SetSysctl("net.netfilter.nf_conntrack_max", maxConnections)

	// Clean up any extant iptables chain, just in case
	a.servicedChain.Remove()
	// Add our chain for assigned IP rules
	if err := a.servicedChain.Inject(); err != nil {
		glog.Errorf("Error creating SERVICED iptables chain (%v)", err)
	}
	// Clean up when we're done
	defer a.servicedChain.Remove()

	for {
		// handle shutdown if we are waiting for a zk connection
		var conn coordclient.Connection
		select {
		case conn = <-zzk.Connect(zzk.GeneratePoolPath(a.poolID), zzk.GetLocalConnection):
		case <-shutdown:
			return
		}
		if conn == nil {
			continue
		}

		glog.Info("Got a connected client")

		// watch virtual IP zookeeper nodes
		virtualIPListener := virtualips.NewVirtualIPListener(a, a.hostID)

		// watch docker action nodes
		actionListener := zkdocker.NewActionListener(a, a.hostID)

		// watch the host state nodes
		// this blocks until
		// 1) has a connection
		// 2) its node is registered
		// 3) receieves signal to shutdown or breaks
		hsListener := zkservice.NewHostStateListener(a, a.hostID)

		glog.Infof("Host Agent successfully started")
		zzk.Start(shutdown, conn, hsListener, virtualIPListener, actionListener)

		select {
		case <-shutdown:
			glog.Infof("Host Agent shutting down")
			return
		default:
			glog.Infof("Host Agent restarting")
		}
	}
}
Ejemplo n.º 22
0
// getEndpoints builds exportedEndpoints and importedEndpoints
func (c *Controller) getEndpoints(service *service.Service) error {
	var err error
	c.zkInfo, err = getAgentZkInfo(c.options.ServicedEndpoint)
	if err != nil {
		glog.Errorf("Invalid zk info: %v", err)
		return err //ErrInvalidZkInfo
	}
	glog.Infof(" c.zkInfo: %+v", c.zkInfo)

	// endpoints are created at the root level (not pool aware)
	rootBasePath := ""
	zClient, err := coordclient.New("zookeeper", c.zkInfo.ZkDSN, rootBasePath, nil)
	if err != nil {
		glog.Errorf("failed create a new coordclient: %v", err)
		return err
	}

	zzk.InitializeLocalClient(zClient)

	// get zookeeper connection
	conn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(service.PoolID))
	if err != nil {
		return fmt.Errorf("getEndpoints zzk.GetLocalConnection failed: %v", err)
	}

	if os.Getenv("SERVICED_IS_SERVICE_SHELL") == "true" {
		// this is not a running service, i.e. serviced shell/run
		if hostname, err := os.Hostname(); err != nil {
			glog.Errorf("could not get hostname: %s", err)
			return fmt.Errorf("getEndpoints failed could not get hostname: %v", err)
		} else {
			c.dockerID = hostname
		}

		// TODO: deal with exports in the future when there is a use case for it

		sstate, err := servicestate.BuildFromService(service, c.hostID)
		if err != nil {
			return fmt.Errorf("Unable to create temporary service state")
		}
		// initialize importedEndpoints
		c.importedEndpoints, err = buildImportedEndpoints(conn, c.tenantID, sstate)
		if err != nil {
			glog.Errorf("Invalid ImportedEndpoints")
			return ErrInvalidImportedEndpoints
		}
	} else {
		// get service state
		glog.Infof("getting service state: %s %v", c.options.Service.ID, c.options.Service.InstanceID)
		sstate, err := getServiceState(conn, c.options.Service.ID, c.options.Service.InstanceID)
		if err != nil {
			return fmt.Errorf("getEndpoints getServiceState failed: %v", err)
		}
		c.dockerID = sstate.DockerID

		// keep a copy of the service EndPoint exports
		c.exportedEndpoints, err = buildExportedEndpoints(conn, c.tenantID, sstate)
		if err != nil {
			glog.Errorf("Invalid ExportedEndpoints")
			return ErrInvalidExportedEndpoints
		}

		// initialize importedEndpoints
		c.importedEndpoints, err = buildImportedEndpoints(conn, c.tenantID, sstate)
		if err != nil {
			glog.Errorf("Invalid ImportedEndpoints")
			return ErrInvalidImportedEndpoints
		}
	}

	return nil
}