func (s *ManagerTestSuite) TestGetConfig(t *C) { mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, s.workerFactory, s.spool, s.im) t.Assert(m, NotNil) config := &qan.Config{ ServiceInstance: s.mysqlInstance, Interval: 300, MaxSlowLogSize: 1000, MaxWorkers: 3, WorkerRunTime: 300, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=0.456"}, mysql.Query{Set: "SET GLOBAL slow_query_log=ON"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=10"}, }, } qanConfig, _ := json.Marshal(config) cmd := &proto.Cmd{ Ts: time.Now(), Cmd: "StartService", Data: qanConfig, } reply := m.Handle(cmd) t.Assert(reply.Error, Equals, "") test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") s.nullmysql.Reset() cmd = &proto.Cmd{ Cmd: "GetConfig", Service: "qan", } reply = m.Handle(cmd) t.Assert(reply.Error, Equals, "") t.Assert(reply.Data, NotNil) gotConfig := []proto.AgentConfig{} if err := json.Unmarshal(reply.Data, &gotConfig); err != nil { t.Fatal(err) } expectConfig := []proto.AgentConfig{ { InternalService: "qan", Config: string(qanConfig), Running: true, }, } if same, diff := test.IsDeeply(gotConfig, expectConfig); !same { test.Dump(gotConfig) t.Error(diff) } // Stop manager reply = m.Handle(&proto.Cmd{Cmd: "StopService"}) t.Assert(reply.Error, Equals, "") }
func (s *ManagerTestSuite) TestStart(t *C) { mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, s.workerFactory, s.spool, s.im) t.Assert(m, NotNil) // Starting qan without a config does nothing but start the qan manager. err := m.Start() t.Check(err, IsNil) status := m.Status() t.Check(status["qan-log-parser"], Equals, "") t.Check(status["qan-last-interval"], Equals, "") t.Check(status["qan-next-interval"], Equals, "") // Write a qan config to disk. config := &qan.Config{ ServiceInstance: s.mysqlInstance, Interval: 300, MaxWorkers: 1, WorkerRunTime: 600, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=0.456"}, mysql.Query{Set: "SET GLOBAL slow_query_log=ON"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=10"}, }, } err = pct.Basedir.WriteConfig("qan", config) t.Assert(err, IsNil) // qan.Start() should read and use config on disk. err = m.Start() t.Check(err, IsNil) if !test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") { t.Error("WaitStatusPrefix(qan-log-parser, Idle) failed") } status = m.Status() t.Check(status["qan-log-parser"], Equals, "Idle (0 of 1 running)") t.Check(status["qan-last-interval"], Equals, "") t.Check(status["qan-next-interval"], Not(Equals), "") // Stopping qan.Stop() should leave config file on disk. err = m.Stop() t.Assert(err, IsNil) t.Check(test.FileExists(pct.Basedir.ConfigFile("qan")), Equals, true) }
func (s *ManagerTestSuite) TestRecoverWorkerPanic(t *C) { // Create and start manager with mock workers. w1StopChan := make(chan bool) w1 := mock.NewQanWorker("qan-worker-1", w1StopChan, nil, nil, true) f := mock.NewQanWorkerFactory([]*mock.QanWorker{w1}) mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, f, s.spool, s.im) t.Assert(m, NotNil) config := &qan.Config{ ServiceInstance: s.mysqlInstance, MaxSlowLogSize: 1000, MaxWorkers: 2, Interval: 60, WorkerRunTime: 60, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, }, } qanConfig, _ := json.Marshal(config) cmd := &proto.Cmd{ Ts: time.Now(), Cmd: "StartService", Data: qanConfig, } reply := m.Handle(cmd) t.Assert(reply.Error, Equals, "") test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") test.DrainLogChan(s.logChan) // Start mock worker. All it does is panic, much like fipar. now := time.Now() i1 := &qan.Interval{ Filename: "slow.log", StartOffset: 0, EndOffset: 100, StartTime: now, StopTime: now, } s.intervalChan <- i1 <-w1.Running() // wait for manager to run worker // For now, worker panic only results in error to log. var gotError *proto.LogEntry timeout := time.After(200 * time.Millisecond) GET_LOG: for { select { case l := <-s.logChan: if l.Level == 3 && strings.HasPrefix(l.Msg, "Lost interval 0 slow.log") { gotError = l break GET_LOG } case <-timeout: break GET_LOG } } t.Check(gotError, NotNil) // Stop manager reply = m.Handle(&proto.Cmd{Cmd: "StopService"}) t.Assert(reply.Error, Equals, "") }
func (s *ManagerTestSuite) TestWaitRemoveSlowLog(t *C) { // Same as TestRotateAndRemoveSlowLog, but we use mock workers so we can // test that slow log is not removed until previous workers are done. // Mock worker factory will return our mock workers when manager calls Make(). w1StopChan := make(chan bool) w1 := mock.NewQanWorker("qan-worker-1", w1StopChan, nil, nil, false) w2StopChan := make(chan bool) w2 := mock.NewQanWorker("qan-worker-2", w2StopChan, nil, nil, false) // Let's take this time to also test that MaxWorkers is enforced. w3 := mock.NewQanWorker("qan-worker-3", nil, nil, nil, false) f := mock.NewQanWorkerFactory([]*mock.QanWorker{w1, w2, w3}) // Clean up files that may interfere with test. Then copy the test log. slowlog := "slow006.log" files, _ := filepath.Glob("/tmp/" + slowlog + "-[0-9]*") for _, file := range files { os.Remove(file) } cp := exec.Command("cp", testlog.Sample+slowlog, "/tmp/"+slowlog) cp.Run() // Create and start manager with mock workers. mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, f, s.spool, s.im) if m == nil { t.Fatal("Create qan.Manager") } config := &qan.Config{ ServiceInstance: s.mysqlInstance, MaxSlowLogSize: 1000, RemoveOldSlowLogs: true, // done after w2 and w1 done MaxWorkers: 2, // w1 and w2 but not w3 Interval: 60, WorkerRunTime: 60, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, }, } qanConfig, _ := json.Marshal(config) cmd := &proto.Cmd{ Ts: time.Now(), Cmd: "StartService", Data: qanConfig, } reply := m.Handle(cmd) t.Assert(reply.Error, Equals, "") test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") // Start first mock worker (w1) with interval 0 - 736. The worker's Run() // func won't return until we send true to its stop chan, so manager will // think worker is still running until then. now := time.Now() i1 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 0, EndOffset: 736, StartTime: now, StopTime: now, } s.intervalChan <- i1 <-w1.Running() // wait for manager to run worker // Start 2nd mock worker (w2) with interval 736 - 1833. Manager will rotate // but not remove original slow log because w1 is still running. i2 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 736, EndOffset: 1833, StartTime: now, StopTime: now, } s.intervalChan <- i2 <-w2.Running() test.WaitStatus(1, m, "qan-log-parser", "Idle (2 of 2 running)") /** * Worker status test */ // Workers should have status and QAN manager should report them all. status := m.Status() t.Check(status["qan-worker-1"], Equals, "ok") t.Check(status["qan-worker-2"], Equals, "ok") t.Check(status["qan-worker-3"], Equals, "") // not running due to MaxWorkers /** * Quick side test: qan.Config.MaxWorkers is enforced. */ test.DrainLogChan(s.logChan) s.intervalChan <- i2 logs := test.WaitLogChan(s.logChan, 3) test.WaitStatus(1, m, "qan-log-parser", "Idle (2 of 2 running)") gotWarning := false for _, log := range logs { if log.Level == proto.LOG_WARNING && strings.Contains(log.Msg, "All workers busy") { gotWarning = true break } } if !gotWarning { t.Error("Too many workers causes \"All workers busy\" warning") } // Original slow log should no longer exist; it was rotated away, but... if _, err := os.Stat("/tmp/" + slowlog); !os.IsNotExist(err) { t.Error("/tmp/" + slowlog + " no longer exists") } // ...old slow log should exist because w1 is still running. files, _ = filepath.Glob("/tmp/" + slowlog + "-[0-9]*") if len(files) != 1 { t.Errorf("w1 running so old slow log not removed, got %+v", files) } defer func() { for _, file := range files { os.Remove(file) } }() // Stop w2 which is holding "holding" the "lock" on removing the old // slog log (figuratively speaking; there are no real locks). Because // w1 is still running, manager should not remove the old log yet because // w1 could still be parsing it. w2StopChan <- true test.WaitStatus(1, m, "qan-log-parser", "Idle (1 of 2 running)") if _, err := os.Stat(files[0]); os.IsNotExist(err) { t.Errorf("w1 still running so old slow log not removed") } // Stop w1 and now, even though slow log was rotated for w2, manager // should remove old slow log. w1StopChan <- true test.WaitStatus(1, m, "qan-log-parser", "Idle (0 of 2 running)") if _, err := os.Stat(files[0]); !os.IsNotExist(err) { t.Errorf("w1 done running so old slow log removed") } // Stop manager reply = m.Handle(&proto.Cmd{Cmd: "StopService"}) t.Assert(reply.Error, Equals, "") }
func (s *ManagerTestSuite) TestRotateSlowLog(t *C) { // Same as TestRotateAndRemoveSlowLog, but with qan.Config.RemoveOldSlowLogs=false // and testing that Start and Stop queries were executed. slowlog := "slow006.log" files, _ := filepath.Glob("/tmp/" + slowlog + "-[0-9]*") for _, file := range files { os.Remove(file) } mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, s.workerFactory, s.spool, s.im) if m == nil { t.Fatal("Create qan.Manager") } config := &qan.Config{ ServiceInstance: s.mysqlInstance, Interval: 300, MaxSlowLogSize: 1000, RemoveOldSlowLogs: false, // <-- HERE ExampleQueries: false, MaxWorkers: 2, WorkerRunTime: 600, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=0.456"}, mysql.Query{Set: "SET GLOBAL slow_query_log=ON"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=10"}, }, } qanConfig, _ := json.Marshal(config) cmd := &proto.Cmd{ Ts: time.Now(), Cmd: "StartService", Data: qanConfig, } reply := m.Handle(cmd) t.Assert(reply.Error, Equals, "") test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") s.nullmysql.Reset() cp := exec.Command("cp", testlog.Sample+slowlog, "/tmp/"+slowlog) cp.Run() // First interval: 0 - 736 now := time.Now() i1 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 0, EndOffset: 736, StartTime: now, StopTime: now, } s.intervalChan <- i1 resultData := <-s.dataChan report := *resultData.(*qan.Report) if report.Global.TotalQueries != 2 { t.Error("First interval has 2 queries, got ", report.Global.TotalQueries) } if report.Global.UniqueQueries != 1 { t.Error("First interval has 1 unique query, got ", report.Global.UniqueQueries) } // Second interval: 736 - 1833, but will actually go to end: 2200, if not // the next two test will fail. i2 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 736, EndOffset: 1833, StartTime: now, StopTime: now, } s.intervalChan <- i2 resultData = <-s.dataChan report = *resultData.(*qan.Report) if report.Global.TotalQueries != 4 { t.Error("Second interval has 2 queries, got ", report.Global.TotalQueries) } if report.Global.UniqueQueries != 2 { t.Error("Second interval has 2 unique queries, got ", report.Global.UniqueQueries) } test.WaitStatus(1, m, "qan-log-parser", "Idle (0 of 2 running)") // Original slow log should no longer exist; it was rotated away. if _, err := os.Stat("/tmp/" + slowlog); !os.IsNotExist(err) { t.Error("/tmp/" + slowlog + " no longer exists") } // The original slow log should NOT have been removed. files, _ = filepath.Glob("/tmp/" + slowlog + "-[0-9]*") if len(files) != 1 { t.Errorf("Old slow log not removed, got %+v", files) } defer func() { for _, file := range files { os.Remove(file) } }() expect := []mysql.Query{} for _, q := range config.Stop { expect = append(expect, q) } for _, q := range config.Start { expect = append(expect, q) } if same, diff := test.IsDeeply(s.nullmysql.GetSet(), expect); !same { t.Logf("%+v", s.nullmysql.GetSet()) t.Logf("%+v", expect) t.Error(diff) } // Stop manager reply = m.Handle(&proto.Cmd{Cmd: "StopService"}) t.Assert(reply.Error, Equals, "") }
func (s *ManagerTestSuite) TestRotateAndRemoveSlowLog(t *C) { // Clean up files that may interfere with test. slowlog := "slow006.log" files, _ := filepath.Glob("/tmp/" + slowlog + "-[0-9]*") for _, file := range files { os.Remove(file) } /** * slow006.log is 2200 bytes large. Rotation happens when manager * see interval.EndOffset >= MaxSlowLogSize. So we'll use these * intervals, * 0 - 736 * 736 - 1833 * 1833 - 2200 * and set MaxSlowLogSize=1000 which should make manager rotate the log * after the 2nd interval. When manager rotates log, it 1) renames log * to NAME-TS where NAME is the original name and TS is the current Unix * timestamp (UTC); and 2) it sets interval.StopOff = file size of NAME-TS * to finish parsing the log. Therefore, results for 2nd interval should * include our 3rd interval. -- Manager also calls Start and Stop so the * nullmysql conn should record the queries being set. */ // See TestStartService() for description of these startup tasks. mockConnFactory := &mock.ConnectionFactory{Conn: s.nullmysql} m := qan.NewManager(s.logger, mockConnFactory, s.clock, s.iterFactory, s.workerFactory, s.spool, s.im) if m == nil { t.Fatal("Create qan.Manager") } config := &qan.Config{ ServiceInstance: s.mysqlInstance, Interval: 300, MaxSlowLogSize: 1000, // <-- HERE RemoveOldSlowLogs: true, // <-- HERE too ExampleQueries: false, MaxWorkers: 2, WorkerRunTime: 600, Start: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=0.456"}, mysql.Query{Set: "SET GLOBAL slow_query_log=ON"}, }, Stop: []mysql.Query{ mysql.Query{Set: "SET GLOBAL slow_query_log=OFF"}, mysql.Query{Set: "SET GLOBAL long_query_time=10"}, }, } qanConfig, _ := json.Marshal(config) cmd := &proto.Cmd{ Ts: time.Now(), Cmd: "StartService", Data: qanConfig, } reply := m.Handle(cmd) t.Assert(reply.Error, Equals, "") test.WaitStatusPrefix(1, m, "qan-log-parser", "Idle") // Make copy of slow log because test will mv/rename it. cp := exec.Command("cp", testlog.Sample+slowlog, "/tmp/"+slowlog) cp.Run() // First interval: 0 - 736 now := time.Now() i1 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 0, EndOffset: 736, StartTime: now, StopTime: now, } s.intervalChan <- i1 resultData := <-s.dataChan report := *resultData.(*qan.Report) if report.Global.TotalQueries != 2 { t.Error("First interval has 2 queries, got ", report.Global.TotalQueries) } if report.Global.UniqueQueries != 1 { t.Error("First interval has 1 unique query, got ", report.Global.UniqueQueries) } // Second interval: 736 - 1833, but will actually go to end: 2200, if not // the next two test will fail. i2 := &qan.Interval{ Filename: "/tmp/" + slowlog, StartOffset: 736, EndOffset: 1833, StartTime: now, StopTime: now, } s.intervalChan <- i2 resultData = <-s.dataChan report = *resultData.(*qan.Report) if report.Global.TotalQueries != 4 { t.Error("Second interval has 2 queries, got ", report.Global.TotalQueries) } if report.Global.UniqueQueries != 2 { t.Error("Second interval has 2 unique queries, got ", report.Global.UniqueQueries) } test.WaitStatus(1, m, "qan-log-parser", "Idle (0 of 2 running)") // Original slow log should no longer exist; it was rotated away. if _, err := os.Stat("/tmp/" + slowlog); !os.IsNotExist(err) { t.Error("/tmp/" + slowlog + " no longer exists") } // The original slow log should have been renamed to slow006-TS, parsed, and removed. files, _ = filepath.Glob("/tmp/" + slowlog + "-[0-9]*") if len(files) != 0 { t.Errorf("Old slow log removed, got %+v", files) } defer func() { for _, file := range files { os.Remove(file) } }() // https://jira.percona.com/browse/PCT-466 // Old slow log removed but space not freed in filesystem pid := fmt.Sprintf("%d", os.Getpid()) out, err := exec.Command("lsof", "-p", pid).Output() if err != nil { t.Fatal(err) } if strings.Contains(string(out), "/tmp/"+slowlog+"-") { t.Logf("%s\n", string(out)) t.Error("Old slow log removed but not freed in filesystem (PCT-466)") } // Stop manager reply = m.Handle(&proto.Cmd{Cmd: "StopService"}) t.Assert(reply.Error, Equals, "") }