Exemple #1
0
// A non-nil return signals that event processing should stop.
func (agent *ActionAgent) dispatchAction(actionPath, data string) error {
	agent.actionMutex.Lock()
	defer agent.actionMutex.Unlock()

	log.Infof("action dispatch %v", actionPath)
	actionNode, err := actionnode.ActionNodeFromJson(data, actionPath)
	if err != nil {
		log.Errorf("action decode failed: %v %v", actionPath, err)
		return nil
	}

	cmd := []string{
		agent.vtActionBinFile,
		"-action", actionNode.Action,
		"-action-node", actionPath,
		"-action-guid", actionNode.ActionGuid,
	}
	cmd = append(cmd, logutil.GetSubprocessFlags()...)
	cmd = append(cmd, topo.GetSubprocessFlags()...)
	cmd = append(cmd, dbconfigs.GetSubprocessFlags()...)
	cmd = append(cmd, mysqlctl.GetSubprocessFlags()...)
	log.Infof("action launch %v", cmd)
	vtActionCmd := exec.Command(cmd[0], cmd[1:]...)

	stdOut, vtActionErr := vtActionCmd.CombinedOutput()
	if vtActionErr != nil {
		log.Errorf("agent action failed: %v %v\n%s", actionPath, vtActionErr, stdOut)
		// If the action failed, preserve single execution path semantics.
		return vtActionErr
	}

	log.Infof("Agent action completed %v %s", actionPath, stdOut)
	agent.afterAction(actionPath, actionNode.Action == actionnode.TABLET_ACTION_APPLY_SCHEMA)
	return nil
}
// startFakeTabletActionLoop will start the action loop for a fake tablet,
// using mysqlDaemon as the backing mysqld.
func startFakeTabletActionLoop(t *testing.T, wr *Wrangler, tabletAlias topo.TabletAlias, mysqlDaemon mysqlctl.MysqlDaemon, done chan struct{}) {
	go func() {
		f := func(actionPath, data string) error {
			actionNode, err := actionnode.ActionNodeFromJson(data, actionPath)
			if err != nil {
				t.Fatalf("ActionNodeFromJson failed: %v\n%v", err, data)
			}
			ta := tabletmanager.NewTabletActor(nil, mysqlDaemon, wr.ts, tabletAlias)
			if err := ta.HandleAction(actionPath, actionNode.Action, actionNode.ActionGuid, false); err != nil {
				// action may just fail for any good reason
				t.Logf("HandleAction failed for %v: %v", actionNode.Action, err)
			}

			// this part would also be done by the agent
			tablet, err := wr.ts.GetTablet(tabletAlias)
			if err != nil {
				t.Logf("Cannot get tablet: %v", err)
			} else {
				updatedTablet := tabletmanager.CheckTabletMysqlPort(wr.ts, mysqlDaemon, tablet)
				if updatedTablet != nil {
					t.Logf("Updated tablet record")
				}
			}
			return nil
		}
		wr.ts.ActionEventLoop(tabletAlias, f, done)
	}()
}
Exemple #3
0
func getActions(wr *wrangler.Wrangler, zconn zk.Conn, actionPath string) ([]*actionnode.ActionNode, error) {
	actions, _, err := zconn.Children(actionPath)
	if err != nil {
		return nil, fmt.Errorf("getActions failed: %v %v", actionPath, err)
	}
	sort.Strings(actions)
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	nodes := make([]*actionnode.ActionNode, 0, len(actions))
	for _, action := range actions {
		wg.Add(1)
		go func(action string) {
			defer wg.Done()
			actionNodePath := path.Join(actionPath, action)
			data, _, err := zconn.Get(actionNodePath)
			if err != nil && !zookeeper.IsError(err, zookeeper.ZNONODE) {
				wr.Logger().Warningf("getActions: %v %v", actionNodePath, err)
				return
			}
			actionNode, err := actionnode.ActionNodeFromJson(data, actionNodePath)
			if err != nil {
				wr.Logger().Warningf("getActions: %v %v", actionNodePath, err)
				return
			}
			mu.Lock()
			nodes = append(nodes, actionNode)
			mu.Unlock()
		}(action)
	}
	wg.Wait()

	return nodes, nil
}
Exemple #4
0
// StartActionLoop will start the action loop for a fake tablet,
// using ft.FakeMysqlDaemon as the backing mysqld.
func (ft *FakeTablet) StartActionLoop(t *testing.T, wr *wrangler.Wrangler) {
	if ft.Done != nil {
		t.Fatalf("ActionLoop for %v is already running", ft.Tablet.Alias)
	}
	ft.Done = make(chan struct{}, 1)
	go func() {
		wr.TopoServer().ActionEventLoop(ft.Tablet.Alias, func(actionPath, data string) error {
			actionNode, err := actionnode.ActionNodeFromJson(data, actionPath)
			if err != nil {
				t.Fatalf("ActionNodeFromJson failed: %v\n%v", err, data)
			}
			ta := actor.NewTabletActor(nil, ft.FakeMysqlDaemon, wr.TopoServer(), ft.Tablet.Alias)
			if err := ta.HandleAction(actionPath, actionNode.Action, actionNode.ActionGuid, false); err != nil {
				// action may just fail for any good reason
				t.Logf("HandleAction failed for %v: %v", actionNode.Action, err)
			}

			// this part would also be done by the agent
			tablet, err := wr.TopoServer().GetTablet(ft.Tablet.Alias)
			if err != nil {
				t.Logf("Cannot get tablet: %v", err)
			} else {
				updatedTablet := actor.CheckTabletMysqlPort(wr.TopoServer(), ft.FakeMysqlDaemon, tablet)
				if updatedTablet != nil {
					t.Logf("Updated tablet record")
				}
			}
			return nil
		}, ft.Done)
	}()
}
Exemple #5
0
// startFakeTabletActionLoop will start the action loop for a fake
// tablet.
func (fix *Fixture) startFakeTabletActionLoop(tablet *tabletPack) {
	go func() {
		f := func(actionPath, data string) error {
			actionNode, err := actionnode.ActionNodeFromJson(data, actionPath)
			if err != nil {
				fix.Fatalf("ActionNodeFromJson failed: %v\n%v", err, data)
			}

			ta := actor.NewTabletActor(nil, tablet.mysql, fix.Topo, tablet.Alias)
			if err := ta.HandleAction(actionPath, actionNode.Action, actionNode.ActionGuid, false); err != nil {
				// action may just fail for any good reason
				fix.Logf("HandleAction failed for %v: %v", actionNode.Action, err)
			}
			return nil
		}
		fix.Topo.ActionEventLoop(tablet.Alias, f, fix.done)
	}()
}
Exemple #6
0
func staleActions(zkts *zktopo.Server, zkActionPath string, maxStaleness time.Duration) ([]*actionnode.ActionNode, error) {
	// get the stale strings
	actionNodes, err := zkts.StaleActions(zkActionPath, maxStaleness, actionnode.ActionNodeIsStale)
	if err != nil {
		return nil, err
	}

	// convert to ActionNode
	staleActions := make([]*actionnode.ActionNode, len(actionNodes))
	for i, actionNodeStr := range actionNodes {
		actionNode, err := actionnode.ActionNodeFromJson(actionNodeStr, "")
		if err != nil {
			return nil, err
		}
		staleActions[i] = actionNode
	}

	return staleActions, nil
}
Exemple #7
0
func WaitForCompletion(ts topo.Server, actionPath string, waitTime time.Duration) (interface{}, error) {
	// If there is no duration specified, block for a sufficiently long time
	if waitTime <= 0 {
		waitTime = 24 * time.Hour
	}

	data, err := ts.WaitForTabletAction(actionPath, waitTime, interrupted)
	if err != nil {
		return nil, err
	}

	// parse it
	actionNode, dataErr := actionnode.ActionNodeFromJson(data, "")
	if dataErr != nil {
		return nil, fmt.Errorf("action data error: %v %v %#v", actionPath, dataErr, data)
	} else if actionNode.Error != "" {
		return nil, fmt.Errorf("action failed: %v %v", actionPath, actionNode.Error)
	}

	return actionNode.Reply, nil
}
Exemple #8
0
// This function should be protected from unforseen panics, as
// dispatchAction will catch everything. The rest of the code in this
// function should not panic.
func (ta *TabletActor) HandleAction(actionPath, action, actionGuid string, forceRerun bool) error {
	tabletAlias, data, version, err := ta.ts.ReadTabletActionPath(actionPath)
	ta.tabletAlias = tabletAlias
	actionNode, err := actionnode.ActionNodeFromJson(data, actionPath)
	if err != nil {
		log.Errorf("HandleAction failed unmarshaling %v: %v", actionPath, err)
		return err
	}

	switch actionNode.State {
	case actionnode.ACTION_STATE_RUNNING:
		// see if the process is still running, and if so, wait for it
		proc, _ := os.FindProcess(actionNode.Pid)
		if proc.Signal(syscall.Signal(0)) == syscall.ESRCH {
			// process is dead, either clean up or re-run
			if !forceRerun {
				actionErr := fmt.Errorf("Previous vtaction process died")
				if err := StoreActionResponse(ta.ts, actionNode, actionPath, actionErr); err != nil {
					log.Errorf("Dead process detector failed to update actionNode: %v", err)
					return actionErr
				}
				if err := ta.ts.UnblockTabletAction(actionPath); err != nil {
					log.Errorf("Dead process detector failed unblocking: %v", err)
				}
				return actionErr
			}
		} else {
			log.Warningf("HandleAction waiting for running action: %v", actionPath)
			_, err := initiator.WaitForCompletion(ta.ts, actionPath, 0)
			return err
		}
	case actionnode.ACTION_STATE_FAILED:
		// this happens only in a couple cases:
		// - vtaction was killed by a signal and we caught it
		// - vtaction died unexpectedly, and the next vtaction run detected it
		return fmt.Errorf(actionNode.Error)
	case actionnode.ACTION_STATE_DONE:
		// this is bad
		return fmt.Errorf("Unexpected finished ActionNode in action queue: %v", actionPath)
	}

	// Claim the action by this process.
	actionNode.State = actionnode.ACTION_STATE_RUNNING
	actionNode.Pid = os.Getpid()
	newData := actionNode.ToJson()
	err = ta.ts.UpdateTabletAction(actionPath, newData, version)
	if err != nil {
		if err == topo.ErrBadVersion {
			// The action is schedule by another
			// actor. Most likely the tablet restarted
			// during an action. Just wait for completion.
			log.Warningf("HandleAction waiting for scheduled action: %v", actionPath)
			_, err = initiator.WaitForCompletion(ta.ts, actionPath, 0)
			return err
		} else {
			return err
		}
	}

	// signal handler after we've signed up for the action
	c := make(chan os.Signal, 2)
	signal.Notify(c, syscall.SIGTERM, syscall.SIGINT)
	go func() {
		for sig := range c {
			err := StoreActionResponse(ta.ts, actionNode, actionPath, fmt.Errorf("vtaction interrupted by signal: %v", sig))
			if err != nil {
				log.Errorf("Signal handler failed to update actionNode: %v", err)
				os.Exit(-2)
			}
			os.Exit(-1)
		}
	}()

	log.Infof("HandleAction: %v %v", actionPath, data)
	// validate actions, but don't write this back into topo.Server
	if actionNode.Action != action || actionNode.ActionGuid != actionGuid {
		log.Errorf("HandleAction validation failed %v: (%v,%v) (%v,%v)",
			actionPath, actionNode.Action, action, actionNode.ActionGuid, actionGuid)
		return TabletActorError("invalid action initiation: " + action + " " + actionGuid)
	}
	actionErr := ta.dispatchAction(actionNode)
	if err := StoreActionResponse(ta.ts, actionNode, actionPath, actionErr); err != nil {
		return err
	}

	// unblock in topo.Server on completion
	if err := ta.ts.UnblockTabletAction(actionPath); err != nil {
		log.Errorf("HandleAction failed unblocking: %v", err)
		return err
	}
	return actionErr
}