Example #1
0
func (s *TestSuite) TestApiDisconnect(t *C) {
	/**
	 * If using direct interface, Recv() should return error if API disconnects.
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan()

	// No error yet.
	got := test.WaitErr(ws.ErrorChan())
	t.Assert(len(got), Equals, 0)

	mock.DisconnectClient(c)

	/**
	 * I cannot provoke an error on websocket.Send(), only Receive().
	 * Perhaps errors (e.g. ws closed) are only reported on recv?
	 * This only affect the logger since it's ws send-only: it will
	 * need a goroutine blocking on Recieve() that, upon error, notifies
	 * the sending goroutine to reconnect.
	 */
	var data interface{}
	err = ws.Recv(data, 5)
	t.Assert(err, NotNil) // EOF due to disconnect.
}
Example #2
0
func (s *TestSuite) TestDialTimeout(t *C) {
	/**
	 * This test simulates a dial timeout by listening on a port that does nothing.
	 * The TCP connection completes, but the TLS handshake times out because the
	 * little goroutine below does nothing after net.Listen() (normally code would
	 * net.Accept() after listening).  To simulate a lower-level dial timeout would
	 * require a very low-level handling of the network socket: having the port open
	 * but not completing the TCP syn-syn+ack-ack handshake; this is too complicate,
	 * so breaking the TLS handshake is close enough.
	 */
	addr := "localhost:9443"
	url := "wss://" + addr + "/"
	links := map[string]string{"agent": url}
	api := mock.NewAPI("http://localhost", url, "apikey", "uuid", links)
	wss, err := client.NewWebsocketClient(s.logger, api, "agent", nil)
	t.Assert(err, IsNil)

	doneChan := make(chan bool, 1)
	go func() {
		l, err := net.Listen("tcp", addr)
		if err != nil {
			log.Fatal(err)
		}
		defer l.Close()
		<-doneChan
	}()
	time.Sleep(1 * time.Second)

	err = wss.ConnectOnce(2)
	t.Check(err, NotNil)

	doneChan <- true
}
Example #3
0
func (s *TestSuite) TestConnectBackoff(t *C) {
	/**
	 * Connect() should wait between attempts, using pct.Backoff (pct/backoff.go).
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan()
	defer ws.Disconnect()

	// 0s wait, connect, err="Lost connection",
	// 1s wait, connect, err="Lost connection",
	// 3s wait, connect, ok
	t0 := time.Now()
	for i := 0; i < 2; i++ {
		mock.DisconnectClient(c)
		ws.Connect()
		c = <-mock.ClientConnectChan
		<-ws.ConnectChan() // connect ack
	}
	d := time.Now().Sub(t0)
	if d < time.Duration(3*time.Second) {
		t.Errorf("Exponential backoff wait time between connect attempts: %s\n", d)
	}
}
Example #4
0
func (s *TestSuite) TestChannelsAfterReconnect(t *C) {
	/**
	 * Client send/recv chans should work after disconnect and reconnect.
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	ws.Start()
	defer ws.Stop()
	defer ws.Disconnect()

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan() // connect ack

	// Send cmd and wait for reply to ensure we're fully connected.
	cmd := &proto.Cmd{
		User: "******",
		Ts:   time.Now(),
		Cmd:  "Status",
	}
	c.SendChan <- cmd
	got := test.WaitCmd(ws.RecvChan())
	t.Assert(len(got), Equals, 1)
	reply := cmd.Reply(nil, nil)
	ws.SendChan() <- reply
	data := test.WaitData(c.RecvChan)
	t.Assert(len(data), Equals, 1)

	// Disconnect client.
	mock.DisconnectClient(c)
	<-ws.ConnectChan() // disconnect ack

	// Reconnect client and send/recv again.
	ws.Connect()
	c = <-mock.ClientConnectChan
	<-ws.ConnectChan() // connect ack

	c.SendChan <- cmd
	got = test.WaitCmd(ws.RecvChan())
	t.Assert(len(got), Equals, 1)
	reply = cmd.Reply(nil, nil)
	ws.SendChan() <- reply
	data = test.WaitData(c.RecvChan)
	t.Assert(len(data), Equals, 1)
}
func (s *TestSuite) TestChannels(t *C) {
	/**
	 * Agent uses send/recv channels instead of "direct" interface.
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent")
	t.Assert(err, IsNil)

	// Start send/recv chans, but idle until successful Connect.
	ws.Start()
	defer ws.Stop()

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan()

	// API sends Cmd to client.
	cmd := &proto.Cmd{
		User: "******",
		Ts:   time.Now(),
		Cmd:  "Status",
	}
	c.SendChan <- cmd

	// If client's recvChan is working, it will receive the Cmd.
	got := test.WaitCmd(ws.RecvChan())
	t.Assert(len(got), Equals, 1)
	t.Assert(got[0], DeepEquals, *cmd)

	// Client sends Reply in response to Cmd.
	reply := cmd.Reply(nil, nil)
	ws.SendChan() <- reply

	// If client's sendChan is working, we/API will receive the Reply.
	data := test.WaitData(c.RecvChan)
	t.Assert(len(data), Equals, 1)

	// We're dealing with generic data again.
	m := data[0].(map[string]interface{})
	t.Assert(m["Cmd"], Equals, "Status")
	t.Assert(m["Error"], Equals, "")

	err = ws.Disconnect()
	t.Assert(err, IsNil)
}
Example #6
0
func (s *TestSuite) TestSendBytes(t *C) {
	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	ws.ConnectOnce(5)
	c := <-mock.ClientConnectChan

	data := []byte(`["Hello"]`)
	err = ws.SendBytes(data, 5)
	t.Assert(err, IsNil)

	// Recv what we just sent.
	got := test.WaitData(c.RecvChan)
	t.Assert(len(got), Equals, 1)
	gotData := got[0].([]interface{})
	t.Check(gotData[0].(string), Equals, "Hello")

	ws.DisconnectOnce()
}
func (s *TestSuite) TestErrorChan(t *C) {
	/**
	 * When client disconnects due to send or recv error,
	 * it should send the error on its ErrorChan().
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent")
	t.Assert(err, IsNil)

	ws.Start()
	defer ws.Stop()

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan()

	// No error yet.
	got := test.WaitErr(ws.ErrorChan())
	t.Assert(len(got), Equals, 0)

	// API sends Cmd to client.
	cmd := &proto.Cmd{
		User: "******",
		Ts:   time.Now(),
		Cmd:  "Status",
	}
	c.SendChan <- cmd

	// No error yet.
	got = test.WaitErr(ws.ErrorChan())
	t.Assert(len(got), Equals, 0)

	// Disconnect the client.
	mock.DisconnectClient(c)

	// Client should send error from disconnect.
	got = test.WaitErr(ws.ErrorChan())
	t.Assert(len(got), Equals, 1)
	t.Assert(got[0], NotNil)

	err = ws.Disconnect()
	t.Assert(err, IsNil)
}
Example #8
0
func (s *TestSuite) TestWssConnection(t *C) {
	/**
	 * This test ensures our slighly customized wss connectio handling works,
	 * i.e. that TLS works.  Only drawback is: client disables cert verification
	 * because the mock ws server uses a self-signed cert, but this only happens
	 * when the remote addr is localhost:8443, so it shouldn't affect real connections.
	 */
	ws, err := client.NewWebsocketClient(s.logger, s.apiWss, "agent", nil)
	t.Assert(err, IsNil)

	// Client sends state of connection (true=connected, false=disconnected)
	// on its ConnectChan.
	connected := false
	doneChan := make(chan bool)
	go func() {
		connected = <-ws.ConnectChan()
		doneChan <- true
	}()

	// Wait for connection in mock ws server.
	ws.Connect()
	c := <-mock.ClientConnectChanWss

	<-doneChan
	t.Check(connected, Equals, true)

	// Send a log entry.
	logEntry := &proto.LogEntry{
		Level:   2,
		Service: "qan",
		Msg:     "Hello",
	}
	err = ws.Send(logEntry, 5)
	t.Assert(err, IsNil)

	// Recv what we just sent.
	got := test.WaitData(c.RecvChanWss)
	t.Assert(len(got), Equals, 1)

	ws.Conn().Close()
}
Example #9
0
func (s *TestSuite) TestChannelsApiDisconnect(t *C) {
	/**
	 * If using chnanel interface, ErrorChan() should return error if API disconnects.
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	var gotErr error
	doneChan := make(chan bool)
	go func() {
		gotErr = <-ws.ErrorChan()
		doneChan <- true
	}()

	ws.Start()
	defer ws.Stop()
	defer ws.Disconnect()

	ws.Connect()
	c := <-mock.ClientConnectChan
	<-ws.ConnectChan() // connect ack

	// No error yet.
	select {
	case <-doneChan:
		t.Error("No error yet")
	default:
	}

	mock.DisconnectClient(c)

	// Wait for error.
	select {
	case <-doneChan:
		t.Check(gotErr, NotNil) // EOF due to disconnect.
	case <-time.After(1 * time.Second):
		t.Error("Get error")
	}
}
Example #10
0
func (s *TestSuite) TestCloseTimeout(t *C) {
	// https://jira.percona.com/browse/PCT-1045

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	connected := false
	doneChan := make(chan bool)
	go func() {
		connected = <-ws.ConnectChan()
		doneChan <- true
	}()

	// Wait for connection in mock ws server.
	ws.Connect()
	c := <-mock.ClientConnectChan

	<-doneChan
	t.Check(connected, Equals, true)

	// Send a log entry.
	logEntry := &proto.LogEntry{
		Level:   2,
		Service: "qan",
		Msg:     "Hello",
	}
	err = ws.Send(logEntry, 1)
	t.Assert(err, IsNil)

	// Recv what we just sent.
	got := test.WaitData(c.RecvChan)
	t.Assert(len(got), Equals, 1)

	// Wait 1s for that ^ 1s timeout to pass.
	time.Sleep(1400 * time.Millisecond)

	err = ws.Disconnect()
	t.Check(err, IsNil)
}
Example #11
0
func (s *TestSuite) TestSend(t *C) {
	/**
	 * LogRelay (logrelay/) uses "direct" interface, not send/recv chans.
	 */

	ws, err := client.NewWebsocketClient(s.logger, s.api, "agent", nil)
	t.Assert(err, IsNil)

	// Client sends state of connection (true=connected, false=disconnected)
	// on its ConnectChan.
	connected := false
	doneChan := make(chan bool)
	go func() {
		connected = <-ws.ConnectChan()
		doneChan <- true
	}()

	// Wait for connection in mock ws server.
	ws.Connect()
	c := <-mock.ClientConnectChan

	<-doneChan
	t.Check(connected, Equals, true)

	// Send a log entry.
	logEntry := &proto.LogEntry{
		Level:   2,
		Service: "qan",
		Msg:     "Hello",
	}
	err = ws.Send(logEntry, 5)
	t.Assert(err, IsNil)

	// Recv what we just sent.
	got := test.WaitData(c.RecvChan)
	t.Assert(len(got), Equals, 1)

	// We're dealing with generic data.
	m := got[0].(map[string]interface{})
	t.Check(m["Level"], Equals, float64(2))
	t.Check(m["Service"], Equals, "qan")
	t.Check(m["Msg"], Equals, "Hello")

	// Quick check that Conn() works.
	conn := ws.Conn()
	t.Check(conn, NotNil)

	// Status should report connected to the proper link.
	status := ws.Status()
	t.Check(status, DeepEquals, map[string]string{
		"ws":      "Connected " + URL,
		"ws-link": URL,
	})

	ws.Disconnect()

	select {
	case connected = <-ws.ConnectChan():
	case <-time.After(1 * time.Second):
		t.Error("No connected=false notify on Disconnect()")
	}

	// Status should report disconnected and still the proper link.
	status = ws.Status()
	t.Check(status, DeepEquals, map[string]string{
		"ws":      "Disconnected",
		"ws-link": URL,
	})
}
Example #12
0
func run() error {
	version := fmt.Sprintf("percona-agent %s rev %s", agent.VERSION, agent.REVISION)
	if flagVersion {
		fmt.Println(version)
		return nil
	}
	golog.Printf("Running %s pid %d\n", version, os.Getpid())

	if err := pct.Basedir.Init(flagBasedir); err != nil {
		return err
	}

	// Start-lock file is used to let agent1 self-update, create start-lock,
	// start updated agent2, exit cleanly, then agent2 starts.  agent1 may
	// not use a PID file, so this special file is required.
	if err := pct.WaitStartLock(); err != nil {
		return err
	}
	// NOTE: This must run last, and defer if LIFO, so it must be declared first.
	defer os.Remove(pct.Basedir.File("start-lock"))

	/**
	 * Agent config (require API key and agent UUID)
	 */

	if !pct.FileExists(pct.Basedir.ConfigFile("agent")) {
		return fmt.Errorf("Agent config file %s does not exist", pct.Basedir.ConfigFile("agent"))
	}

	bytes, err := agent.LoadConfig()
	if err != nil {
		return fmt.Errorf("Invalid agent config: %s\n", err)
	}
	agentConfig := &agent.Config{}
	if err := json.Unmarshal(bytes, agentConfig); err != nil {
		return fmt.Errorf("Error parsing "+pct.Basedir.ConfigFile("agent")+": ", err)
	}

	golog.Println("ApiHostname: " + agentConfig.ApiHostname)
	golog.Println("AgentUuid: " + agentConfig.AgentUuid)

	/**
	 * Ping and exit, maybe.
	 */

	if flagPing {
		t0 := time.Now()
		code, err := pct.Ping(agentConfig.ApiHostname, agentConfig.ApiKey)
		d := time.Now().Sub(t0)
		if err != nil || code != 200 {
			return fmt.Errorf("Ping FAIL (%d %d %s)", d, code, err)
		} else {
			golog.Printf("Ping OK (%s)", d)
			return nil
		}
	}

	/**
	 * PID file
	 */

	if flagPidFile != "" {
		pidFile := pct.NewPidFile()
		if err := pidFile.Set(flagPidFile); err != nil {
			golog.Fatalln(err)
		}
		defer pidFile.Remove()
	}

	/**
	 * REST API
	 */

	api, err := ConnectAPI(agentConfig)
	if err != nil {
		golog.Fatal(err)
	}

	/**
	 * Log relay
	 */

	logChan := make(chan *proto.LogEntry, log.BUFFER_SIZE*3)

	// Log websocket client, possibly disabled later.
	logClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "log-ws"), api, "log")
	if err != nil {
		golog.Fatalln(err)
	}
	logManager := log.NewManager(
		logClient,
		logChan,
	)
	if err := logManager.Start(); err != nil {
		return fmt.Errorf("Error starting logmanager: %s\n", err)
	}

	/**
	 * Instance manager
	 */

	itManager := instance.NewManager(
		pct.NewLogger(logChan, "instance-manager"),
		pct.Basedir.Dir("config"),
		api,
	)
	if err := itManager.Start(); err != nil {
		return fmt.Errorf("Error starting instance manager: %s\n", err)
	}

	/**
	 * Data spooler and sender
	 */

	hostname, _ := os.Hostname()

	dataClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "data-ws"), api, "data")
	if err != nil {
		golog.Fatalln(err)
	}
	dataManager := data.NewManager(
		pct.NewLogger(logChan, "data"),
		pct.Basedir.Dir("data"),
		hostname,
		dataClient,
	)
	if err := dataManager.Start(); err != nil {
		return fmt.Errorf("Error starting data manager: %s\n", err)
	}

	/**
	 * Collecct/report ticker (master clock)
	 */

	nowFunc := func() int64 { return time.Now().UTC().UnixNano() }
	clock := ticker.NewClock(&ticker.RealTickerFactory{}, nowFunc)

	/**
	 * Metric and system config monitors
	 */

	mmManager := mm.NewManager(
		pct.NewLogger(logChan, "mm"),
		mmMonitor.NewFactory(logChan, itManager.Repo()),
		clock,
		dataManager.Spooler(),
		itManager.Repo(),
	)
	if err := mmManager.Start(); err != nil {
		return fmt.Errorf("Error starting mm manager: %s\n", err)
	}

	sysconfigManager := sysconfig.NewManager(
		pct.NewLogger(logChan, "sysconfig"),
		sysconfigMonitor.NewFactory(logChan, itManager.Repo()),
		clock,
		dataManager.Spooler(),
		itManager.Repo(),
	)
	if err := sysconfigManager.Start(); err != nil {
		return fmt.Errorf("Error starting sysconfig manager: %s\n", err)
	}

	/**
	 * Query Analytics
	 */

	qanManager := qan.NewManager(
		pct.NewLogger(logChan, "qan"),
		&mysql.RealConnectionFactory{},
		clock,
		qan.NewFileIntervalIterFactory(logChan),
		qan.NewSlowLogWorkerFactory(logChan),
		dataManager.Spooler(),
		itManager.Repo(),
	)
	if err := qanManager.Start(); err != nil {
		return fmt.Errorf("Error starting qan manager: %s\n", err)
	}

	/**
	 * Signal handler
	 */

	// Generally the agent has a crash-only design, but QAN is so far the only service
	// which reconfigures MySQL: it enables the slow log, sets long_query_time, etc.
	// It's not terrible to leave slow log on, but it's nicer to turn it off.
	sigChan := make(chan os.Signal, 1)
	stopChan := make(chan error, 2)
	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
	go func() {
		sig := <-sigChan
		golog.Printf("Caught %s signal, shutting down...\n", sig)
		stopChan <- qanManager.Stop()
	}()

	/**
	 * Agent
	 */

	cmdClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "agent-ws"), api, "cmd")
	if err != nil {
		golog.Fatal(err)
	}

	// The official list of services known to the agent.  Adding a new service
	// requires a manager, starting the manager as above, and adding the manager
	// to this map.
	services := map[string]pct.ServiceManager{
		"log":       logManager,
		"data":      dataManager,
		"qan":       qanManager,
		"mm":        mmManager,
		"instance":  itManager,
		"sysconfig": sysconfigManager,
	}

	agent := agent.NewAgent(
		agentConfig,
		pct.NewLogger(logChan, "agent"),
		api,
		cmdClient,
		services,
	)

	/**
	 * Run agent, wait for it to stop or signal.
	 */

	go func() {
		stopChan <- agent.Run()
	}()
	stopErr := <-stopChan // agent or signal
	golog.Println("Agent stopped, shutting down...")
	qanManager.Stop()           // see Signal handler ^
	time.Sleep(2 * time.Second) // wait for final replies and log entries
	return stopErr
}
Example #13
0
func run() error {
	version := fmt.Sprintf("percona-agent %s%s rev %s", agent.VERSION, agent.REL, agent.REVISION)
	if flagVersion {
		fmt.Println(version)
		return nil
	}
	golog.Printf("Running %s pid %d\n", version, os.Getpid())

	if err := pct.Basedir.Init(flagBasedir); err != nil {
		return err
	}

	// Start-lock file is used to let agent1 self-update, create start-lock,
	// start updated agent2, exit cleanly, then agent2 starts.  agent1 may
	// not use a PID file, so this special file is required.
	if err := pct.WaitStartLock(); err != nil {
		return err
	}
	// NOTE: This must run last, and defer if LIFO, so it must be declared first.
	defer os.Remove(pct.Basedir.File("start-lock"))

	/**
	 * Agent config (require API key and agent UUID)
	 */

	if !pct.FileExists(pct.Basedir.ConfigFile("agent")) {
		return fmt.Errorf("Agent config file %s does not exist", pct.Basedir.ConfigFile("agent"))
	}

	bytes, err := agent.LoadConfig()
	if err != nil {
		return fmt.Errorf("Invalid agent config: %s\n", err)
	}
	agentConfig := &agent.Config{}
	if err := json.Unmarshal(bytes, agentConfig); err != nil {
		return fmt.Errorf("Error parsing "+pct.Basedir.ConfigFile("agent")+": ", err)
	}

	golog.Println("ApiHostname: " + agentConfig.ApiHostname)
	golog.Println("AgentUuid: " + agentConfig.AgentUuid)

	/**
	 * Ping and exit, maybe.
	 */

	// Set for all connections to API.  X-Percona-API-Key is set automatically
	// using the pct.APIConnector.
	headers := map[string]string{
		"X-Percona-Agent-Version": agent.VERSION,
	}

	if flagPing {
		t0 := time.Now()
		code, err := pct.Ping(agentConfig.ApiHostname, agentConfig.ApiKey, headers)
		d := time.Now().Sub(t0)
		if err != nil || code != 200 {
			return fmt.Errorf("Ping FAIL (%d %d %s)", d, code, err)
		} else {
			golog.Printf("Ping OK (%s)", d)
			return nil
		}
	}

	/**
	 * PID file
	 */

	pidFilePath := agentConfig.PidFile
	if flagPidFile != "" {
		pidFilePath = flagPidFile
	}
	if pidFilePath != "" {
		pidFile := pct.NewPidFile()
		if err := pidFile.Set(pidFilePath); err != nil {
			golog.Fatalln(err)
		}
		defer pidFile.Remove()
	}

	/**
	 * REST API
	 */

	retry := -1 // unlimited
	if flagStatus {
		retry = 1
	}
	api, err := ConnectAPI(agentConfig, retry)
	if err != nil {
		golog.Fatal(err)
	}

	// Get agent status via API and exit.
	if flagStatus {
		code, bytes, err := api.Get(agentConfig.ApiKey, api.AgentLink("self")+"/status")
		if err != nil {
			return err
		}
		if code == 404 {
			return fmt.Errorf("Agent not found")
		}
		status := make(map[string]string)
		if err := json.Unmarshal(bytes, &status); err != nil {
			return err
		}
		golog.Println(status)
		return nil
	}

	/**
	 * Connection factory
	 */
	connFactory := &mysql.RealConnectionFactory{}

	/**
	 * Log relay
	 */

	logChan := make(chan *proto.LogEntry, log.BUFFER_SIZE*3)

	// Log websocket client, possibly disabled later.
	logClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "log-ws"), api, "log", headers)
	if err != nil {
		golog.Fatalln(err)
	}
	logManager := log.NewManager(
		logClient,
		logChan,
	)
	if err := logManager.Start(); err != nil {
		return fmt.Errorf("Error starting logmanager: %s\n", err)
	}

	/**
	 * MRMS (MySQL Restart Monitoring Service)
	 */
	mrm := mrmsMonitor.NewMonitor(
		pct.NewLogger(logChan, "mrms-monitor"),
		connFactory,
	)
	mrmsManager := mrms.NewManager(
		pct.NewLogger(logChan, "mrms-manager"),
		mrm,
	)
	if err := mrmsManager.Start(); err != nil {
		return fmt.Errorf("Error starting mrms manager: %s\n", err)
	}

	/**
	 * Instance manager
	 */
	itManager := instance.NewManager(
		pct.NewLogger(logChan, "instance-manager"),
		pct.Basedir.Dir("config"),
		api,
		mrm,
	)
	if err := itManager.Start(); err != nil {
		return fmt.Errorf("Error starting instance manager: %s\n", err)
	}

	/**
	 * Data spooler and sender
	 */

	hostname, _ := os.Hostname()

	dataClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "data-ws"), api, "data", headers)
	if err != nil {
		golog.Fatalln(err)
	}
	dataManager := data.NewManager(
		pct.NewLogger(logChan, "data"),
		pct.Basedir.Dir("data"),
		pct.Basedir.Dir("trash"),
		hostname,
		dataClient,
	)
	if err := dataManager.Start(); err != nil {
		return fmt.Errorf("Error starting data manager: %s\n", err)
	}

	/**
	 * Collecct/report ticker (master clock)
	 */

	nowFunc := func() int64 { return time.Now().UTC().UnixNano() }
	clock := ticker.NewClock(&ticker.RealTickerFactory{}, nowFunc)

	/**
	 * Metric and system config monitors
	 */

	mmManager := mm.NewManager(
		pct.NewLogger(logChan, "mm"),
		mmMonitor.NewFactory(logChan, itManager.Repo(), mrm),
		clock,
		dataManager.Spooler(),
		itManager.Repo(),
		mrm,
	)
	if err := mmManager.Start(); err != nil {
		return fmt.Errorf("Error starting mm manager: %s\n", err)
	}

	sysconfigManager := sysconfig.NewManager(
		pct.NewLogger(logChan, "sysconfig"),
		sysconfigMonitor.NewFactory(logChan, itManager.Repo()),
		clock,
		dataManager.Spooler(),
		itManager.Repo(),
	)
	if err := sysconfigManager.Start(); err != nil {
		return fmt.Errorf("Error starting sysconfig manager: %s\n", err)
	}

	/**
	 * Query service (real-time EXPLAIN, SHOW CREATE TABLE, etc.)
	 */

	queryManager := query.NewManager(
		pct.NewLogger(logChan, "query"),
		itManager.Repo(),
		&mysql.RealConnectionFactory{},
	)
	if err := queryManager.Start(); err != nil {
		return fmt.Errorf("Error starting query manager: %s\n", err)
	}

	/**
	 * Query Analytics
	 */

	qanManager := qan.NewManager(
		pct.NewLogger(logChan, "qan"),
		clock,
		itManager.Repo(),
		mrm,
		connFactory,
		qanFactory.NewRealAnalyzerFactory(
			logChan,
			qanFactory.NewRealIntervalIterFactory(logChan),
			slowlog.NewRealWorkerFactory(logChan),
			perfschema.NewRealWorkerFactory(logChan),
			dataManager.Spooler(),
			clock,
		),
	)

	if err := qanManager.Start(); err != nil {
		return fmt.Errorf("Error starting qan manager: %s\n", err)
	}

	/**
	 * Sysinfo
	 */
	sysinfoManager := sysinfo.NewManager(
		pct.NewLogger(logChan, "sysinfo"),
	)

	// MySQL Sysinfo
	mysqlSysinfoService := mysqlSysinfo.NewMySQL(
		pct.NewLogger(logChan, "sysinfo-mysql"),
		itManager.Repo(),
	)
	if err := sysinfoManager.RegisterService("MySQLSummary", mysqlSysinfoService); err != nil {
		return fmt.Errorf("Error registering Mysql Sysinfo service: %s\n", err)
	}

	// System Sysinfo
	systemSysinfoService := systemSysinfo.NewSystem(
		pct.NewLogger(logChan, "sysinfo-system"),
	)
	if err := sysinfoManager.RegisterService("SystemSummary", systemSysinfoService); err != nil {
		return fmt.Errorf("Error registering System Sysinfo service: %s\n", err)
	}

	// Start Sysinfo manager
	if err := sysinfoManager.Start(); err != nil {
		return fmt.Errorf("Error starting Sysinfo manager: %s\n", err)
	}

	/**
	 * Signal handler
	 */

	// Generally the agent has a crash-only design, but QAN is so far the only service
	// which reconfigures MySQL: it enables the slow log, sets long_query_time, etc.
	// It's not terrible to leave slow log on, but it's nicer to turn it off.
	sigChan := make(chan os.Signal, 1)
	stopChan := make(chan error, 2)
	signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
	go func() {
		sig := <-sigChan
		golog.Printf("Caught %s signal, shutting down...\n", sig)
		stopChan <- qanManager.Stop()
	}()

	/**
	 * Agent
	 */

	cmdClient, err := client.NewWebsocketClient(pct.NewLogger(logChan, "agent-ws"), api, "cmd", headers)
	if err != nil {
		golog.Fatal(err)
	}

	// The official list of services known to the agent.  Adding a new service
	// requires a manager, starting the manager as above, and adding the manager
	// to this map.
	services := map[string]pct.ServiceManager{
		"log":       logManager,
		"data":      dataManager,
		"qan":       qanManager,
		"mm":        mmManager,
		"instance":  itManager,
		"mrms":      mrmsManager,
		"sysconfig": sysconfigManager,
		"query":     queryManager,
		"sysinfo":   sysinfoManager,
	}

	// Set the global pct/cmd.Factory, used for the Restart cmd.
	pctCmd.Factory = &pctCmd.RealCmdFactory{}

	agentLogger := pct.NewLogger(logChan, "agent")

	agent := agent.NewAgent(
		agentConfig,
		agentLogger,
		api,
		cmdClient,
		services,
	)

	/**
	 * Run agent, wait for it to stop, signal, or crash.
	 */

	var stopErr error
	go func() {
		defer func() {
			if err := recover(); err != nil {
				errMsg := fmt.Sprintf("Agent crashed: %s", err)
				golog.Println(errMsg)
				agentLogger.Error(errMsg)
				stopChan <- fmt.Errorf("%s", errMsg)
			}
		}()
		stopChan <- agent.Run()
	}()

	// Wait for agent to stop, or for signals.
	agentRunning := true
	statusSigChan := make(chan os.Signal, 1)
	signal.Notify(statusSigChan, syscall.SIGUSR1) // kill -USER1 PID
	reconnectSigChan := make(chan os.Signal, 1)
	signal.Notify(reconnectSigChan, syscall.SIGHUP) // kill -HUP PID
	for agentRunning {
		select {
		case stopErr = <-stopChan: // agent or signal
			golog.Println("Agent stopped, shutting down...")
			agentLogger.Info("Agent stopped")
			agentRunning = false
		case <-statusSigChan:
			status := agent.AllStatus()
			golog.Printf("Status: %+v\n", status)
		case <-reconnectSigChan:
			u, _ := user.Current()
			cmd := &proto.Cmd{
				Ts:        time.Now().UTC(),
				User:      u.Username + " (SIGHUP)",
				AgentUuid: agentConfig.AgentUuid,
				Service:   "agent",
				Cmd:       "Reconnect",
			}
			agent.Handle(cmd)
		}
	}

	qanManager.Stop()           // see Signal handler ^
	time.Sleep(2 * time.Second) // wait for final replies and log entries
	return stopErr
}