// Snapshots() returns the current snapshots on the volume func (v *BtrfsVolume) Snapshots() (labels []string, err error) { labels = make([]string, 0) glog.Info("about to execute subvolume list command") if output, err := BtrfsCmd("subvolume", "list", "-apucr", v.baseDir).CombinedOutput(); err != nil { glog.Errorf("got an error with subvolume list: %s", string(output)) return labels, err } else { glog.Info("btrfs subvolume list:, baseDir: %s", v.baseDir) prefixedName := v.name + "_" for _, line := range strings.Split(string(output), "\n") { glog.Infof("btrfs subvolume list: %s", line) fields := strings.Fields(line) for i, field := range fields { if field == "path" { fstree := fields[i+1] parts := strings.Split(fstree, "/") label := parts[len(parts)-1] if strings.HasPrefix(label, prefixedName) { labels = append(labels, label) break } } } } } return labels, err }
// elasticsearchHealthCheck() determines if elasticsearch is healthy func elasticsearchHealthCheck() error { start := time.Now() lastError := time.Now() minUptime := time.Second * 2 timeout := time.Second * 30 schemaFile := localDir("resources/controlplane.json") for { if healthResponse, err := cluster.Health(true); err == nil && (healthResponse.Status == "green" || healthResponse.Status == "yellow") { if buffer, err := os.Open(schemaFile); err != nil { glog.Fatalf("problem reading %s", err) return err } else { http.Post("http://localhost:9200/controlplane", "application/json", buffer) buffer.Close() } } else { lastError = time.Now() glog.V(2).Infof("Still trying to connect to elastic: %v: %s", err, healthResponse) } if time.Since(lastError) > minUptime { break } if time.Since(start) > timeout { return fmt.Errorf("Could not startup elastic search container.") } time.Sleep(time.Millisecond * 1000) } glog.Info("elasticsearch container started, browser at http://localhost:9200/_plugin/head/") return nil }
// a health check for zookeeper func zkHealthCheck() error { start := time.Now() lastError := time.Now() minUptime := time.Second * 2 timeout := time.Second * 30 zookeepers := []string{"127.0.0.1:2181"} for { if conn, _, err := zk.Connect(zookeepers, time.Second*10); err == nil { conn.Close() } else { conn.Close() glog.V(1).Infof("Could not connect to zookeeper: %s", err) lastError = time.Now() } // make sure that service has been good for at least minUptime if time.Since(lastError) > minUptime { break } if time.Since(start) > timeout { return fmt.Errorf("Zookeeper did not respond.") } time.Sleep(time.Millisecond * 1000) } glog.Info("zookeeper container started, browser at http://localhost:12181/exhibitor/v1/ui/index.html") return nil }
func (sr StatsReporter) postStats(stats []byte) error { statsreq, err := http.NewRequest("POST", sr.destination, bytes.NewBuffer(stats)) if err != nil { glog.V(3).Info("Couldn't create stats request: ", err) return err } statsreq.Header["User-Agent"] = []string{"Zenoss Metric Publisher"} statsreq.Header["Content-Type"] = []string{"application/json"} if glog.V(4) { glog.Info(string(stats)) } resp, reqerr := http.DefaultClient.Do(statsreq) if reqerr != nil { glog.V(3).Info("Couldn't post stats: ", reqerr) return reqerr } if strings.Contains(resp.Status, "200") == false { glog.V(3).Info("Non-success: ", resp.Status) return fmt.Errorf("Couldn't post stats: ", resp.Status) } resp.Body.Close() return nil }
func (d *daemon) runScheduler() { for { sched, err := scheduler.NewScheduler(d.masterPoolID, d.hostID, d.storageHandler, d.cpDao, d.facade, options.SnapshotTTL) if err != nil { glog.Errorf("Could not start scheduler: %s", err) return } sched.Start() select { case <-d.shutdown: glog.Info("Shutting down scheduler") sched.Stop() glog.Info("Scheduler stopped") return } } }
func (s *Server) Run(shutdown <-chan interface{}, conn client.Connection) error { node := &Node{ Host: *s.host, ExportPath: fmt.Sprintf("%s:%s", s.host.IPAddr, s.driver.ExportPath()), ExportTime: strconv.FormatInt(time.Now().UnixNano(), 16), } // Create the storage leader and client nodes if exists, _ := conn.Exists("/storage/leader"); !exists { conn.CreateDir("/storage/leader") } storageClientsPath := "/storage/clients" if exists, _ := conn.Exists(storageClientsPath); !exists { conn.CreateDir(storageClientsPath) } leader := conn.NewLeader("/storage/leader", node) leaderW, err := leader.TakeLead() if err != zookeeper.ErrDeadlock && err != nil { glog.Errorf("Could not take storage lead: %s", err) return err } // monitor dfs; log warnings each cycle; restart dfs if needed go s.monitor.MonitorDFSVolume(path.Join("/exports", s.driver.ExportPath()), s.host.IPAddr, node.ExportTime, shutdown, s.monitor.DFSVolumeMonitorPollUpdateFunc) // loop until shutdown event defer leader.ReleaseLead() for { clients, clientW, err := conn.ChildrenW(storageClientsPath) if err != nil { glog.Errorf("Could not set up watch for storage clients: %s", err) return err } s.monitor.SetMonitorStorageClients(conn, storageClientsPath) s.driver.SetClients(clients...) if err := s.driver.Sync(); err != nil { glog.Errorf("Error syncing driver: %s", err) return err } select { case e := <-clientW: glog.Info("storage.server: receieved event: %s", e) case <-leaderW: err := fmt.Errorf("storage.server: lost lead") return err case <-shutdown: return nil } } }
// Spawn attaches to a container and performs the requested action func (l *ActionListener) Spawn(shutdown <-chan interface{}, actionID string) { defer func() { glog.V(2).Infof("Action %s complete: ", actionID) if err := l.conn.Delete(l.GetPath(actionID)); err != nil { glog.Errorf("Could not delete %s: %s", l.GetPath(actionID), err) } }() var action Action if err := l.conn.Get(l.GetPath(actionID), &action); err != nil { glog.V(1).Infof("Could not get action %s: %s", l.GetPath(actionID), err) return } result, err := l.handler.AttachAndRun(action.DockerID, action.Command) if result != nil && len(result) > 0 { glog.Info(string(result)) } if err != nil { glog.Warningf("Error running command `%s` on container %s: %s", action.Command, action.DockerID, err) } else { glog.V(1).Infof("Successfully ran command `%s` on container %s", action.Command, action.DockerID) } }
// main loop of the HostAgent func (a *HostAgent) Start(shutdown <-chan interface{}) { glog.Info("Starting HostAgent") var wg sync.WaitGroup defer func() { glog.Info("Waiting for agent routines...") wg.Wait() glog.Info("Agent routines ended") }() wg.Add(1) go func() { glog.Info("reapOldContainersLoop starting") a.reapOldContainersLoop(time.Minute, shutdown) glog.Info("reapOldContainersLoop Done") wg.Done() }() // Increase the number of maximal tracked connections for iptables maxConnections := "655360" if cnxns := strings.TrimSpace(os.Getenv("SERVICED_IPTABLES_MAX_CONNECTIONS")); cnxns != "" { maxConnections = cnxns } glog.Infof("Set sysctl maximum tracked connections for iptables to %s", maxConnections) utils.SetSysctl("net.netfilter.nf_conntrack_max", maxConnections) // Clean up any extant iptables chain, just in case a.servicedChain.Remove() // Add our chain for assigned IP rules if err := a.servicedChain.Inject(); err != nil { glog.Errorf("Error creating SERVICED iptables chain (%v)", err) } // Clean up when we're done defer a.servicedChain.Remove() for { // handle shutdown if we are waiting for a zk connection var conn coordclient.Connection select { case conn = <-zzk.Connect(zzk.GeneratePoolPath(a.poolID), zzk.GetLocalConnection): case <-shutdown: return } if conn == nil { continue } glog.Info("Got a connected client") // watch virtual IP zookeeper nodes virtualIPListener := virtualips.NewVirtualIPListener(a, a.hostID) // watch docker action nodes actionListener := zkdocker.NewActionListener(a, a.hostID) // watch the host state nodes // this blocks until // 1) has a connection // 2) its node is registered // 3) receieves signal to shutdown or breaks hsListener := zkservice.NewHostStateListener(a, a.hostID) glog.Infof("Host Agent successfully started") zzk.Start(shutdown, conn, hsListener, virtualIPListener, actionListener) select { case <-shutdown: glog.Infof("Host Agent shutting down") return default: glog.Infof("Host Agent restarting") } } }
// createVolumeDir() creates a directory on the running host using the user ids // found within the specified image. For example, it can create a directory owned // by the mysql user (as seen by the container) despite there being no mysql user // on the host system. // Assumes that the local docker image (imageSpec) exists and has been sync'd // with the registry. func createVolumeDir(hostPath, containerSpec, imageSpec, userSpec, permissionSpec string) error { createVolumeDirMutex.Lock() defer createVolumeDirMutex.Unlock() dotfileCompatibility := path.Join(hostPath, ".serviced.initialized") // for compatibility with previous versions of serviced dotfileHostPath := path.Join(filepath.Dir(hostPath), fmt.Sprintf(".%s.serviced.initialized", filepath.Base(hostPath))) dotfiles := []string{dotfileCompatibility, dotfileHostPath} for _, dotfileHostPath := range dotfiles { _, err := os.Stat(dotfileHostPath) if err == nil { glog.V(2).Infof("DFS volume initialized earlier for src:%s dst:%s image:%s user:%s perm:%s", hostPath, containerSpec, imageSpec, userSpec, permissionSpec) return nil } } starttime := time.Now() var err error var output []byte command := [...]string{ "docker", "run", "--rm", "-v", hostPath + ":/mnt/dfs", imageSpec, "/bin/bash", "-c", fmt.Sprintf(` set -e if [ ! -d "%s" ]; then echo "WARNING: DFS mount %s does not exist in image %s" else cp -rp %s/. /mnt/dfs/ fi chown %s /mnt/dfs chmod %s /mnt/dfs sync `, containerSpec, containerSpec, imageSpec, containerSpec, userSpec, permissionSpec), } for i := 0; i < 2; i++ { docker := exec.Command(command[0], command[1:]...) output, err = docker.CombinedOutput() if err == nil { duration := time.Now().Sub(starttime) if strings.Contains(string(output), "WARNING:") { glog.Warning(string(output)) } else { glog.Info(string(output)) } glog.Infof("DFS volume init #%d took %s for src:%s dst:%s image:%s user:%s perm:%s", i, duration, hostPath, containerSpec, imageSpec, userSpec, permissionSpec) if e := ioutil.WriteFile(dotfileHostPath, []byte(""), 0664); e != nil { glog.Errorf("unable to create DFS volume initialized dotfile %s: %s", dotfileHostPath, e) return e } return nil } time.Sleep(time.Second) glog.Warningf("retrying due to error creating DFS volume %+v: %s", hostPath, string(output)) } glog.Errorf("could not create DFS volume %+v: %s", hostPath, string(output)) return err }
func TestServer(t *testing.T) { t.Skip() // the zookeeper part doesnt work in this test, but does work in real life zookeeper.EnsureZkFatjar() basePath := "" tc, err := zklib.StartTestCluster(1, nil, nil) if err != nil { t.Fatalf("could not start test zk cluster: %s", err) } defer os.RemoveAll(tc.Path) defer tc.Stop() time.Sleep(time.Second) servers := []string{fmt.Sprintf("127.0.0.1:%d", tc.Servers[0].Port)} dsnBytes, err := json.Marshal(zookeeper.DSN{Servers: servers, Timeout: time.Second * 15}) if err != nil { t.Fatal("unexpected error creating zk DSN: %s", err) } dsn := string(dsnBytes) zClient, err := client.New("zookeeper", dsn, basePath, nil) if err != nil { t.Fatal("unexpected error getting zk client") } zzk.InitializeLocalClient(zClient) defer func(orig func(nfs.Driver, string, string) error) { nfsMount = orig }(nfsMount) var local, remote string nfsMount = func(driver nfs.Driver, a, b string) error { glog.Infof("client is mounting %s to %s", a, b) remote = a local = b return nil } // creating a UUID in order to make a unique poolID // the poolID is somehow being saved on the filesystem (zookeeper config somewhere?) // making the poolID unique on every run will ensure it is stateless uuid, err := utils.NewUUID() if err != nil { t.Fatal("New UUID could not be created") } hostServer := host.New() hostServer.ID = "nodeID" hostServer.IPAddr = "192.168.1.50" hostServer.PoolID = uuid hostClient1 := host.New() hostClient1.ID = "nodeID_client1" hostClient1.IPAddr = "192.168.1.100" hostClient1.PoolID = uuid mockNfsDriver := &mockNfsDriverT{ exportPath: "/exports", exportName: "serviced_var", } // TODO: this gets stuck at server.go:90 call to conn.CreateDir hangs s, err := NewServer(mockNfsDriver, hostServer, path.Join(mockNfsDriver.exportPath, mockNfsDriver.exportName)) if err != nil { t.Fatalf("unexpected error creating Server: %s", err) } conn, err := zzk.GetLocalConnection("/") if err != nil { t.Fatalf("unexpected error getting connection: %s", err) } shutdown := make(chan interface{}) defer close(shutdown) go s.Run(shutdown, conn) // give it some time time.Sleep(time.Second * 5) if !mockNfsDriver.syncCalled { t.Fatalf("sync() should have been called by now") } if len(mockNfsDriver.clients) != 0 { t.Fatalf("Expected number of clients: 0 --- Found: %v (%v)", len(mockNfsDriver.clients), mockNfsDriver.clients) } mockNfsDriver.syncCalled = false tmpVar, err := ioutil.TempDir("", "serviced_var") if err != nil { t.Fatalf("could not create tempdir: %s", err) } defer os.RemoveAll(tmpVar) c1, err := NewClient(hostClient1, tmpVar) if err != nil { t.Fatalf("could not create client: %s", err) } // give it some time time.Sleep(time.Second * 2) if !mockNfsDriver.syncCalled { t.Fatalf("sync() should have been called by now") } if len(mockNfsDriver.clients) != 1 { t.Fatalf("expecting 1 client, got %d", len(mockNfsDriver.clients)) } if mockNfsDriver.clients[0] != hostClient1.IPAddr { t.Fatalf("expecting '%s', got '%s'", hostServer.IPAddr, mockNfsDriver.clients[0]) } shareName := fmt.Sprintf("%s:%s", hostServer.IPAddr, mockNfsDriver.ExportPath()) if remote != shareName { t.Fatalf("remote should be %s, not %s", remote, shareName) } glog.Info("about to call c1.Close()") c1.Close() }
func (d *daemon) startAgent() error { muxListener, err := createMuxListener() if err != nil { return err } mux, err := proxy.NewTCPMux(muxListener) if err != nil { return err } agentIP := options.OutboundIP if agentIP == "" { var err error agentIP, err = utils.GetIPAddress() if err != nil { glog.Fatalf("Failed to acquire ip address: %s", err) } } rpcPort := "0" parts := strings.Split(options.Listen, ":") if len(parts) > 1 { rpcPort = parts[1] } thisHost, err := host.Build(agentIP, rpcPort, "unknown") if err != nil { panic(err) } myHostID, err := utils.HostID() if err != nil { return fmt.Errorf("HostID failed: %v", err) } else if err := validation.ValidHostID(myHostID); err != nil { glog.Errorf("invalid hostid: %s", myHostID) } go func() { var poolID string for { poolID = func() string { glog.Infof("Trying to discover my pool...") var myHost *host.Host masterClient, err := master.NewClient(d.servicedEndpoint) if err != nil { glog.Errorf("master.NewClient failed (endpoint %+v) : %v", d.servicedEndpoint, err) return "" } defer masterClient.Close() myHost, err = masterClient.GetHost(myHostID) if err != nil { glog.Warningf("masterClient.GetHost %v failed: %v (has this host been added?)", myHostID, err) return "" } poolID = myHost.PoolID glog.Infof(" My PoolID: %v", poolID) //send updated host info updatedHost, err := host.UpdateHostInfo(*myHost) if err != nil { glog.Infof("Could not send updated host information: %v", err) return poolID } err = masterClient.UpdateHost(updatedHost) if err != nil { glog.Warningf("Could not update host information: %v", err) return poolID } glog.V(2).Infof("Sent updated host info %#v", updatedHost) return poolID }() if poolID != "" { break } select { case <-d.shutdown: return case <-time.After(5 * time.Second): continue } } thisHost.PoolID = poolID basePoolPath := "/pools/" + poolID dsn := coordzk.NewDSN(options.Zookeepers, time.Second*15).String() glog.Infof("zookeeper dsn: %s", dsn) zClient, err := coordclient.New("zookeeper", dsn, basePoolPath, nil) if err != nil { glog.Errorf("failed create a new coordclient: %v", err) } zzk.InitializeLocalClient(zClient) poolBasedConn, err := zzk.GetLocalConnection(zzk.GeneratePoolPath(poolID)) if err != nil { glog.Errorf("Error in getting a connection based on pool %v: %v", poolID, err) } if options.NFSClient != "0" { nfsClient, err := storage.NewClient(thisHost, path.Join(options.VarPath, "volumes")) if err != nil { glog.Fatalf("could not create an NFS client: %s", err) } go func() { <-d.shutdown glog.Infof("shutting down storage client") nfsClient.Close() }() //loop and log waiting for Storage Leader nfsDone := make(chan struct{}) go func() { defer close(nfsDone) nfsClient.Wait() }() //wait indefinitely(?) for storage to work before starting glog.Info("Waiting for Storage Leader") nfsUp := false for !nfsUp { select { case <-nfsDone: nfsUp = true glog.Info("Found Storage Leader") break case <-time.After(time.Second * 30): glog.Info("Waiting for Storage Leader, will not be available for running services. ") continue } } } else { glog.Info("NFS Client disabled") } agentOptions := node.AgentOptions{ PoolID: thisHost.PoolID, Master: options.Endpoint, UIPort: options.UIPort, RPCPort: options.RPCPort, DockerDNS: options.DockerDNS, VarPath: options.VarPath, Mount: options.Mount, FSType: options.FSType, Zookeepers: options.Zookeepers, Mux: mux, UseTLS: options.TLS, DockerRegistry: dockerRegistry, MaxContainerAge: time.Duration(int(time.Second) * options.MaxContainerAge), VirtualAddressSubnet: options.VirtualAddressSubnet, } // creates a zClient that is not pool based! hostAgent, err := node.NewHostAgent(agentOptions) d.hostAgent = hostAgent d.waitGroup.Add(1) go func() { hostAgent.Start(d.shutdown) glog.Info("Host Agent has shutdown") d.waitGroup.Done() }() // register the API glog.V(0).Infoln("registering ControlPlaneAgent service") if err = d.rpcServer.RegisterName("ControlPlaneAgent", hostAgent); err != nil { glog.Fatalf("could not register ControlPlaneAgent RPC server: %v", err) } if options.ReportStats { statsdest := fmt.Sprintf("http://%s/api/metrics/store", options.HostStats) statsduration := time.Duration(options.StatsPeriod) * time.Second glog.V(1).Infoln("Staring container statistics reporter") statsReporter, err := stats.NewStatsReporter(statsdest, statsduration, poolBasedConn) if err != nil { glog.Errorf("Error kicking off stats reporter %v", err) } else { go func() { defer statsReporter.Close() <-d.shutdown }() } } }() glog.Infof("agent start staticips: %v [%d]", d.staticIPs, len(d.staticIPs)) if err = d.rpcServer.RegisterName("Agent", agent.NewServer(d.staticIPs)); err != nil { glog.Fatalf("could not register Agent RPC server: %v", err) } if err != nil { glog.Fatalf("Could not start ControlPlane agent: %v", err) } // TODO: Integrate this server into the rpc server, or something. // Currently its only use is for command execution. go func() { sio := shell.NewProcessExecutorServer(options.Endpoint, dockerRegistry) http.ListenAndServe(":50000", sio) }() return nil }
func (d *daemon) run() (err error) { if d.hostID, err = utils.HostID(); err != nil { glog.Fatalf("Could not get host ID: %s", err) } else if err := validation.ValidHostID(d.hostID); err != nil { glog.Errorf("invalid hostid: %s", d.hostID) } if currentDockerVersion, err := node.GetDockerVersion(); err != nil { glog.Fatalf("Could not get docker version: %s", err) } else if minDockerVersion.Compare(currentDockerVersion.Client) < 0 { glog.Fatalf("serviced requires docker >= %s", minDockerVersion) } if _, ok := volume.Registered(options.FSType); !ok { glog.Fatalf("no driver registered for %s", options.FSType) } d.startRPC() d.startDockerRegistryProxy() if options.Master { d.startISVCS() if err := d.startMaster(); err != nil { glog.Fatal(err) } } if options.Agent { if err := d.startAgent(); err != nil { glog.Fatal(err) } } signalC := make(chan os.Signal, 10) signal.Notify(signalC, syscall.SIGINT, syscall.SIGTERM, syscall.SIGHUP) sig := <-signalC glog.Info("Shutting down due to interrupt") close(d.shutdown) done := make(chan struct{}) go func() { defer close(done) glog.Info("Stopping sub-processes") d.waitGroup.Wait() glog.Info("Sub-processes have stopped") }() select { case <-done: defer glog.Info("Shutdown") case <-time.After(60 * time.Second): defer glog.Infof("Timeout waiting for shutdown") } zzk.ShutdownConnections() if options.Master { switch sig { case syscall.SIGHUP: glog.Infof("Not shutting down isvcs") command := os.Args glog.Infof("Reloading by calling syscall.exec for command: %+v\n", command) syscall.Exec(command[0], command[0:], os.Environ()) default: d.stopISVCS() } } return nil }