func main() { fmt.Print(banner) log.SetLevelByString("info") args, err := docopt.Parse(usage, nil, true, "codis proxy v0.1", true) if err != nil { log.Error(err) } // set config file if args["-c"] != nil { configFile = args["-c"].(string) } // set output log file if args["-L"] != nil { log.SetOutputByName(args["-L"].(string)) } // set log level if args["--log-level"] != nil { log.SetLevelByString(args["--log-level"].(string)) } // set cpu if args["--cpu"] != nil { cpus, err = strconv.Atoi(args["--cpu"].(string)) if err != nil { log.Fatal(err) } } // set addr if args["--addr"] != nil { addr = args["--addr"].(string) } // set http addr if args["--http-addr"] != nil { httpAddr = args["--http-addr"].(string) } wd, _ := os.Getwd() log.Info("wd:", wd) log.CrashLog(wd + ".dump") router.CheckUlimit(1024) runtime.GOMAXPROCS(cpus) http.HandleFunc("/setloglevel", handleSetLogLevel) go http.ListenAndServe(httpAddr, nil) log.Info("running on ", addr) conf, err := router.LoadConf(configFile) if err != nil { log.Fatal(err) } s := router.NewServer(addr, httpAddr, conf) s.Run() log.Warning("exit") }
func (agent *Agent) CheckTimeout() { log.Info("checktimeout loop for every 5 sec") agent.Lock.Lock() defer agent.Lock.Unlock() for _, task := range agent.Running { // only check running task if task.Status != StatusRunning { continue } // we will kill timeout cronjob task log.Info("check timeout for task:", task.TaskId, task.Job.Name) if task.IsTimeout() { if task.Job.OnTimeout() == TriggerKill { agent.KillTask(task) } else { log.Warning("timeout but we just ignore this :", task.TaskId) } ts := &TaskStatus{ TaskPtr: task, Command: nil, Status: StatusTimeout, CreateAt: time.Now().Unix(), Err: fmt.Errorf("run task: %s jobname: %s timeout for %dsec", task.TaskId, task.Job.Name, time.Now().Unix()-task.ExecAt), } agent.JobStatusChan <- ts } } }
func (d *Dispatcher) Run() { go d.slotsReloadLoop() for { select { case req, ok := <-d.reqCh: // dispatch req if !ok { log.Info("exit dispatch loop") return } var server string if req.readOnly { server = d.slotTable.ReadServer(req.slot) } else { server = d.slotTable.WriteServer(req.slot) } taskRunner, ok := d.taskRunners[server] if !ok { log.Info("create task runner", server) taskRunner = NewTaskRunner(server, d.connPool) d.taskRunners[server] = taskRunner } taskRunner.in <- req case info := <-d.slotInfoChan: d.handleSlotInfoChanged(info) } } }
func (ps *ProxyServer) Init() { log.Info("Proxy Server Init ....") l, err := net.Listen("tcp4", "0.0.0.0:"+ps.Conf.Port) // net.Listen(net, laddr) if err != nil { log.Fatalf("Proxy Server Listen on port : %s failed ", ps.Conf.Port) } log.Info("Proxy Server Listen on port ", ps.Conf.Port) ps.Listen = l }
// experimental simple auto rebalance :) func Rebalance(zkConn zkhelper.Conn, delay int) error { targetQuota, err := getQuotaMap(zkConn) if err != nil { return errors.Trace(err) } livingNodes, err := getLivingNodeInfos(zkConn) if err != nil { return errors.Trace(err) } log.Info("start rebalance") for _, node := range livingNodes { for len(node.CurSlots) > targetQuota[node.GroupId] { for _, dest := range livingNodes { if dest.GroupId != node.GroupId && len(dest.CurSlots) < targetQuota[dest.GroupId] { slot := node.CurSlots[len(node.CurSlots)-1] // create a migration task t := NewMigrateTask(MigrateTaskInfo{ Delay: delay, FromSlot: slot, ToSlot: slot, NewGroupId: dest.GroupId, Status: MIGRATE_TASK_MIGRATING, CreateAt: strconv.FormatInt(time.Now().Unix(), 10), }) u, err := uuid.NewV4() if err != nil { return errors.Trace(err) } t.Id = u.String() if ok, err := preMigrateCheck(t); ok { // do migrate err := t.run() if err != nil { log.Warning(err) return errors.Trace(err) } } else { log.Warning(err) return errors.Trace(err) } node.CurSlots = node.CurSlots[0 : len(node.CurSlots)-1] dest.CurSlots = append(dest.CurSlots, slot) } } } } log.Info("rebalance finish") return nil }
func (c *Conn) Run() { defer func() { r := recover() if err, ok := r.(error); ok { const size = 4096 buf := make([]byte, size) buf = buf[:runtime.Stack(buf, false)] log.Errorf("lastCmd %s, %v, %s", c.lastCmd, err, buf) } c.Close() }() for { c.alloc.Reset() data, err := c.readPacket() if err != nil { if err.Error() != io.EOF.Error() { log.Info(err) } return } if err := c.dispatch(data); err != nil { log.Errorf("dispatch error %s, %s", errors.ErrorStack(err), c) if err != mysql.ErrBadConn { //todo: fix this c.writeError(err) } } c.pkg.Sequence = 0 } }
func (agent *Agent) CheckReady() { agent.Lock.Lock() defer agent.Lock.Unlock() for id, job := range agent.Jobs { if _, err := agent.Ready[id]; err { log.Warning("cron job aready in ready queue: ", id, job.Name) continue } if !job.NeedSchedule() || !job.IsValid() { continue } now := time.Now().Unix() task := &Task{ JobId: job.Id, TaskId: fmt.Sprintf("%d-%d", now, job.Id), Job: job, Status: StatusReady, ExecAt: 0, } log.Info("add job to read task queue: ", job.Id, job.Name) agent.Ready[job.Id] = task } }
func GerritHandler(w http.ResponseWriter, r *http.Request) { // get request parameter. body, err := ioutil.ReadAll(r.Body) if err != nil { w.WriteHeader(http.StatusBadRequest) w.Write([]byte(err.Error())) return } js, err := simplejson.NewJson(body) if err != nil { w.WriteHeader(http.StatusBadRequest) w.Write([]byte(err.Error())) return } project, err := js.Get("refUpdate").Get("project").String() if err != nil { w.WriteHeader(http.StatusBadRequest) w.Write([]byte(err.Error())) return } branch, err := js.Get("refUpdate").Get("refName").String() if err != nil { w.WriteHeader(http.StatusBadRequest) w.Write([]byte(err.Error())) return } logging.Info(fmt.Sprintf("project:%s, branch:%s, timeout:%s", project, branch, strconv.Itoa(config.DefaultTimeout))) // build image. go buildMain(project, branch, config.DefaultTimeout) }
func ForceRemoveDeadFence(zkConn zkhelper.Conn, productName string) error { proxies, err := ProxyList(zkConn, productName, func(p *ProxyInfo) bool { return p.State == PROXY_STATE_ONLINE }) if err != nil { return errors.Trace(err) } fenceProxies, err := GetFenceProxyMap(zkConn, productName) if err != nil { return errors.Trace(err) } // remove online proxies's fence for _, proxy := range proxies { delete(fenceProxies, proxy.Addr) } // delete dead fence in zookeeper path := GetProxyFencePath(productName) for remainFence, _ := range fenceProxies { fencePath := filepath.Join(path, remainFence) log.Info("removing fence: ", fencePath) if err := zkhelper.DeleteRecursive(zkConn, fencePath, -1); err != nil { return errors.Trace(err) } } return nil }
func main() { flag.Parse() log.Info("flag parse: ", *db, *port) runtime.GOMAXPROCS(runtime.NumCPU() * 2) LogVerbose(*verbose) cfg := &agent.AgentConf{ DBtype: *dbtype, MySQLdb: *db, HttpPort: *port, WorkDir: *work_dir, QuitTime: *quit_time, } agent := agent.NewAgent(cfg) quit := agent.QuitChan go agent.Run() // handle quit signal, we should quit after all TASK FINISHED sc := make(chan os.Signal, 1) signal.Notify(sc, os.Kill, os.Interrupt, syscall.SIGHUP, syscall.SIGINT, syscall.SIGTERM, syscall.SIGQUIT) <-sc log.Warning("main receive quit signal...") close(quit) agent.Clean() }
func (ta *Table) AddColumn(name string, columnType string, collation string, defval mysql.Value, extra string) { index := len(ta.Columns) name = strings.ToLower(name) ta.Columns = append(ta.Columns, TableColumn{Name: name}) columnType = strings.ToLower(columnType) endPos := strings.Index(columnType, "(") //handle something like: int(11) if endPos > 0 { ta.Columns[index].SqlType = str2mysqlType(strings.TrimSpace(columnType[:endPos])) } else { ta.Columns[index].SqlType = str2mysqlType(strings.TrimSpace(columnType)) } ta.Columns[index].Collation = collation if strings.Index(columnType, "unsigned") >= 0 { ta.Columns[index].IsUnsigned = true } log.Info(name, ta.Columns[index].SqlType, columnType) if extra == "auto_increment" { ta.Columns[index].IsAuto = true // Ignore default value, if any return } if defval == nil { return } ta.Columns[index].Default = defval }
func (t *Task) genLogFile() { defer func() { if e := recover(); e != nil { log.Warning("genLogFile fatal:", e) } }() d := time.Now().Format("20060102") filename := fmt.Sprintf("%s/DCMS-%s/%d-%s-%s.log", t.Job.Dcms.Conf.WorkDir, d, t.Job.Id, t.Job.Name, t.TaskId) log.Info("generate logfile :", filename) logdir := fmt.Sprintf("%s/DCMS-%s", t.Job.Dcms.Conf.WorkDir, d) if err := os.MkdirAll(logdir, os.ModePerm); err != nil { log.Warningf("in run exec goroutine, mkdir workdir %s failed!!!! ", t.Job.Dcms.Conf.WorkDir) } if f, err := os.OpenFile(filename, os.O_RDWR|os.O_CREATE|os.O_TRUNC, os.ModePerm); err != nil { log.Warning("in genLogFile os.OpenFile create failed: ", f) t.logfile = nil t.LogFilename = "" } else { t.logfile = f t.LogFilename = filename } }
func releaseDashboardNode() { zkPath := fmt.Sprintf("/zk/codis/db_%s/dashboard", globalEnv.ProductName()) if exists, _, _ := safeZkConn.Exists(zkPath); exists { log.Info("removing dashboard node") safeZkConn.Delete(zkPath, 0) } }
func (s *Server) handleConn(c net.Conn) { log.Info("new connection", c.RemoteAddr()) s.counter.Add("connections", 1) client := &session{ Conn: c, r: bufio.NewReader(c), CreateAt: time.Now(), } var err error defer func() { if err != nil { //todo: fix this ugly error check if GetOriginError(err.(*errors.Err)).Error() != io.EOF.Error() { log.Warningf("close connection %v, %+v, %v", c.RemoteAddr(), client, errors.ErrorStack(err)) } else { log.Infof("close connection %v, %+v", c.RemoteAddr(), client) } } else { log.Infof("close connection %v, %+v", c.RemoteAddr(), client) } c.Close() s.counter.Add("connections", -1) }() for { err = s.redisTunnel(client) if err != nil { return } client.Ops++ } }
func (s *Server) waitOnline() { s.mu.Lock() defer s.mu.Unlock() for { pi, err := s.top.GetProxyInfo(s.pi.Id) if err != nil { log.Fatal(errors.ErrorStack(err)) } if pi.State == models.PROXY_STATE_MARK_OFFLINE { s.handleMarkOffline() } if pi.State == models.PROXY_STATE_ONLINE { s.pi.State = pi.State println("good, we are on line", s.pi.Id) log.Info("we are online", s.pi.Id) _, err := s.top.WatchNode(path.Join(models.GetProxyPath(s.top.ProductName), s.pi.Id), s.evtbus) if err != nil { log.Fatal(errors.ErrorStack(err)) } return } println("wait to be online ", s.pi.Id) log.Warning(s.pi.Id, "wait to be online") time.Sleep(3 * time.Second) } }
func registerConfigNode() error { zkPath := fmt.Sprintf("/zk/codis/db_%s/living-codis-config", productName) hostname, err := os.Hostname() if err != nil { return errors.Trace(err) } pid := os.Getpid() content := fmt.Sprintf(`{"hostname": "%v", "pid": %v}`, hostname, pid) nodeName := fmt.Sprintf("%v-%v", hostname, pid) zkhelper.CreateRecursive(zkConn, zkPath, "", 0, zkhelper.DefaultDirACLs()) pathCreated, err := zkConn.Create(path.Join(zkPath, nodeName), []byte(content), zk.FlagEphemeral, zkhelper.DefaultDirACLs()) log.Info("living node created:", pathCreated) if err != nil { return errors.Trace(err) } livingNode = pathCreated return nil }
func (s *Server) responseAction(seq int64) { log.Info("send response", seq) err := s.top.DoResponse(int(seq), &s.pi) if err != nil { log.Error(errors.ErrorStack(err)) } }
func (self *ServerGroup) RemoveServer(zkConn zkhelper.Conn, addr string) error { zkPath := fmt.Sprintf("/zk/codis/db_%s/servers/group_%d/%s", self.ProductName, self.Id, addr) data, _, err := zkConn.Get(zkPath) if err != nil { return errors.Trace(err) } var s Server err = json.Unmarshal(data, &s) if err != nil { return errors.Trace(err) } log.Info(s) if s.Type == SERVER_TYPE_MASTER { return errors.New("cannot remove master, use promote first") } err = zkConn.Delete(zkPath, -1) if err != nil { return errors.Trace(err) } // update server list for i := 0; i < len(self.Servers); i++ { if self.Servers[i].Addr == s.Addr { self.Servers = append(self.Servers[:i], self.Servers[i+1:]...) break } } // remove slave won't need proxy confirm err = NewAction(zkConn, self.ProductName, ACTION_TYPE_SERVER_GROUP_CHANGED, self, "", false) return errors.Trace(err) }
func TestProxyOfflineInWaitActionReceiver(t *testing.T) { log.Info("test proxy offline when waiting action response") fakeZkConn := zkhelper.NewConn() for i := 1; i <= 4; i++ { CreateProxyInfo(fakeZkConn, productName, &ProxyInfo{ Id: strconv.Itoa(i), State: PROXY_STATE_ONLINE, }) go waitForProxyMarkOffline(fakeZkConn, strconv.Itoa(i)) } lst, _ := ProxyList(fakeZkConn, productName, nil) if len(lst) != 4 { t.Error("create proxy info error") } go func() { time.Sleep(500 * time.Millisecond) actionPath := path.Join(GetActionResponsePath(productName), fakeZkConn.Seq2Str(1)) //create test response for proxy 4, means proxy 1,2,3 are timeout fakeZkConn.Create(path.Join(actionPath, "4"), nil, 0, zkhelper.DefaultFileACLs()) }() err := NewActionWithTimeout(fakeZkConn, productName, ACTION_TYPE_SLOT_CHANGED, nil, "desc", true, 3*1000) if err != nil && err.Error() != ErrReceiverTimeout.Error() { t.Error(errors.ErrorStack(err)) } for i := 1; i <= 3; i++ { if info, _ := GetProxyInfo(fakeZkConn, productName, strconv.Itoa(i)); info.State != PROXY_STATE_OFFLINE { t.Error("shutdown offline proxy error") } } }
func uint_testSetup() { log.Info("setup suit: uint_test") if isTblExists(`tbl_uint_test`) { mustExec(MysqlDB, `DROP TABLE tbl_uint_test;`) } mustExec(MysqlDB, `CREATE TABLE tbl_uint_test (id INT NOT NULL AUTO_INCREMENT, PRIMARY KEY(id), data INT UNSIGNED);`) reloadConfig() }
func handleSetLogLevel(w http.ResponseWriter, r *http.Request) { r.ParseForm() level := r.Form.Get("level") log.SetLevelByString(level) log.Info("set log level to ", level) w.Header().Set("Content-Type", "text/html") w.Write([]byte("OK")) }
func pkey_string_testSetup() { log.Info("setup suit: pkey_string_test") if isTblExists(`tbl_pkey_string_test`) { mustExec(MysqlDB, `DROP TABLE tbl_pkey_string_test;`) } mustExec(MysqlDB, `CREATE TABLE tbl_pkey_string_test (id VARCHAR(20), UNIQUE KEY(id), data VARCHAR(20));`) reloadConfig() }
func multi_pkey_testSetup() { log.Info("setup suit: multi_pkey_test") if isTblExists(`tbl_multi_pkey_test`) { mustExec(MysqlDB, `DROP TABLE tbl_multi_pkey_test;`) } mustExec(MysqlDB, `CREATE TABLE tbl_multi_pkey_test (id1 VARCHAR(20), id2 VARCHAR(20), UNIQUE KEY(id1, id2), data INT);`) reloadConfig() }
func string_testSetup() { log.Info("setup suit: string_test") if isTblExists(`tbl_string_test`) { mustExec(MysqlDB, `DROP TABLE tbl_string_test;`) } mustExec(MysqlDB, `CREATE TABLE tbl_string_test (id INT NOT NULL AUTO_INCREMENT, PRIMARY KEY(id), data TEXT);`) reloadConfig() }
func SetProxyStatus(zkConn zkhelper.Conn, productName string, proxyName string, status string) error { p, err := GetProxyInfo(zkConn, productName, proxyName) if err != nil { return errors.Trace(err) } if status != PROXY_STATE_ONLINE && status != PROXY_STATE_MARK_OFFLINE && status != PROXY_STATE_OFFLINE { return errors.Errorf("%v, %s", ErrUnknownProxyStatus, status) } p.State = status b, _ := json.Marshal(p) _, err = zkConn.Set(path.Join(GetProxyPath(productName), proxyName), b, -1) if err != nil { return errors.Trace(err) } if status == PROXY_STATE_MARK_OFFLINE { // wait for the proxy down for { _, _, c, err := zkConn.GetW(path.Join(GetProxyPath(productName), proxyName)) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { return nil } else if err != nil { return errors.Trace(err) } <-c info, err := GetProxyInfo(zkConn, productName, proxyName) log.Info("mark_offline, check proxy status:", proxyName, info, err) if zkhelper.ZkErrorEqual(err, zk.ErrNoNode) { log.Info("shutdown proxy successful") return nil } else if err != nil { return errors.Trace(err) } if info.State == PROXY_STATE_OFFLINE { log.Info("proxy:", proxyName, "offline success!") return nil } } } return nil }
func MigrateSingleSlot(zkConn zkhelper.Conn, slotId, fromGroup, toGroup int, delay int, stopChan <-chan struct{}) error { groupFrom, err := models.GetGroup(zkConn, productName, fromGroup) if err != nil { return err } groupTo, err := models.GetGroup(zkConn, productName, toGroup) if err != nil { return err } fromMaster, err := groupFrom.Master(zkConn) if err != nil { return err } toMaster, err := groupTo.Master(zkConn) if err != nil { return err } if fromMaster == nil || toMaster == nil { return ErrGroupMasterNotFound } c, err := redis.Dial("tcp", fromMaster.Addr) if err != nil { return err } defer c.Close() _, remain, err := sendRedisMigrateCmd(c, slotId, toMaster.Addr) if err != nil { return err } for remain > 0 { if delay > 0 { time.Sleep(time.Duration(delay) * time.Millisecond) } if stopChan != nil { select { case <-stopChan: return ErrStopMigrateByUser default: } } _, remain, err = sendRedisMigrateCmd(c, slotId, toMaster.Addr) if remain%500 == 0 && remain > 0 { log.Info("remain:", remain) } if err != nil { return err } } return nil }
// request "CLUSTER SLOTS" to retrieve the cluster topology // try each start up nodes until the first success one func (d *Dispatcher) reloadTopology() (slotInfos []*SlotInfo, err error) { log.Info("reload slot table") indexes := rand.Perm(len(d.startupNodes)) for _, index := range indexes { if slotInfos, err = d.doReload(d.startupNodes[index]); err == nil { break } } return }
func (tr *taskRunner) cleanupOutgoingTasks(err error) { for e := tr.tasks.Front(); e != nil; { req := e.Value.(*PipelineRequest) log.Info("clean up", req) req.backQ <- &PipelineResponse{ctx: req, resp: nil, err: err} next := e.Next() tr.tasks.Remove(e) e = next } }
func TestRedisEnqueue(t *testing.T) { jq := NewJq("test_queue1", RedisQueueManagerFactory(RedisQueueFactory), MockWorkerFunc) go jq.DispatchForever() jq.Submit([]byte("hello"), func(ret []byte) { log.Info("i am from redis", string(ret)) if !bytes.Equal(ret, []byte("world")) { t.Error("error") } }, nil, true) }
func (m *MigrateManager) loop() error { for { m.lck.RLock() ele := m.pendingTasks.Front() m.lck.RUnlock() if ele == nil { time.Sleep(500 * time.Millisecond) continue } // get pending task, and run m.lck.Lock() m.pendingTasks.Remove(ele) m.lck.Unlock() t := ele.Value.(*MigrateTask) t.zkConn = m.zkConn t.productName = m.productName m.runningTask = t if m.preCheck != nil { log.Info("start migration pre-check") if ok, err := m.preCheck(t); !ok { if err != nil { log.Error(err) } log.Error("migration pre-check error", t) continue } } log.Info("migration pre-check done") // do migrate err := t.run() if err != nil { log.Error(err) } // reset runningtask m.lck.Lock() m.runningTask = nil m.lck.Unlock() } }