Exemple #1
0
func EnableUpdateStreamService(tabletType string, dbcfgs dbconfigs.DBConfigs) {
	defer logError()
	UpdateStreamRpcService.actionLock.Lock()
	defer UpdateStreamRpcService.actionLock.Unlock()

	if !dbcfgsCorrect(tabletType, dbcfgs) {
		relog.Warning("missing/incomplete db configs file, cannot enable update stream service")
		return
	}

	if UpdateStreamRpcService.mycnf.BinLogPath == "" {
		relog.Warning("Update stream service requires binlogs enabled")
		return
	}

	if UpdateStreamRpcService.isServiceEnabled() {
		relog.Warning("Update stream service is already enabled")
		return
	}

	UpdateStreamRpcService.setState(ENABLED)

	UpdateStreamRpcService.mysqld = NewMysqld(UpdateStreamRpcService.mycnf, dbcfgs.Dba, dbcfgs.Repl)
	UpdateStreamRpcService.dbname = dbcfgs.App.Dbname
	relog.Info("dbcfgs.App.Dbname %v DbName %v", dbcfgs.App.Dbname, UpdateStreamRpcService.dbname)
	relog.Info("mycnf.BinLogPath %v mycnf.RelayLogPath %v", UpdateStreamRpcService.mycnf.BinLogPath, UpdateStreamRpcService.mycnf.RelayLogPath)
	UpdateStreamRpcService.tabletType = tabletType
	UpdateStreamRpcService.binlogPrefix = UpdateStreamRpcService.mycnf.BinLogPath
	UpdateStreamRpcService.logsDir = path.Dir(UpdateStreamRpcService.binlogPrefix)

	relog.Info("Update Stream enabled, logsDir %v", UpdateStreamRpcService.logsDir)
}
Exemple #2
0
func (zkts *Server) ActionEventLoop(tabletAlias topo.TabletAlias, dispatchAction func(actionPath, data string) error, done chan struct{}) {
	for {
		// Process any pending actions when we startup, before we start listening
		// for events.
		watch, err := zkts.handleActionQueue(tabletAlias, dispatchAction)
		if err != nil {
			relog.Warning("action queue failed: %v", err)
			time.Sleep(5 * time.Second)
			continue
		}

		// FIXME(msolomon) Add a skewing timer here to guarantee we wakeup
		// periodically even if events are missed?
		select {
		case event := <-watch:
			if !event.Ok() {
				// NOTE(msolomon) The zk meta conn will reconnect automatically, or
				// error out. At this point, there isn't much to do.
				relog.Warning("zookeeper not OK: %v", event)
				time.Sleep(5 * time.Second)
			}
			// Otherwise, just handle the queue above.
		case <-done:
			return
		}
	}
}
func StartRowCacheInvalidation() {
	if !shouldInvalidatorRun() {
		relog.Warning("Row-cache invalidator not being enabled, criteria not met")
		CacheInvalidationProcessor.stopRowCacheInvalidation()
		return
	}

	if CacheInvalidationProcessor.isServiceEnabled() {
		relog.Warning("Row-cache invalidator service is already enabled")
		return
	}

	CacheInvalidationProcessor.stateLock.Lock()
	if shouldInvalidatorRun() {
		CacheInvalidationProcessor.setState(ENABLED)
		CacheInvalidationProcessor.stateLock.Unlock()
	} else {
		CacheInvalidationProcessor.setState(DISABLED)
		CacheInvalidationProcessor.stateLock.Unlock()
		return
	}
	relog.Info("Starting RowCacheInvalidation Service")

	CacheInvalidationProcessor.runInvalidationLoop()
}
Exemple #4
0
func getActions(zconn zk.Conn, actionPath string) ([]*tm.ActionNode, error) {
	actions, _, err := zconn.Children(actionPath)
	if err != nil {
		return nil, fmt.Errorf("getActions failed: %v %v", actionPath, err)
	}
	sort.Strings(actions)
	wg := sync.WaitGroup{}
	mu := sync.Mutex{}
	nodes := make([]*tm.ActionNode, 0, len(actions))
	for _, action := range actions {
		wg.Add(1)
		go func(action string) {
			defer wg.Done()
			actionNodePath := path.Join(actionPath, action)
			data, _, err := zconn.Get(actionNodePath)
			if err != nil && !zookeeper.IsError(err, zookeeper.ZNONODE) {
				relog.Warning("getActions: %v %v", actionNodePath, err)
				return
			}
			actionNode, err := tm.ActionNodeFromJson(data, actionNodePath)
			if err != nil {
				relog.Warning("getActions: %v %v", actionNodePath, err)
				return
			}
			mu.Lock()
			nodes = append(nodes, actionNode)
			mu.Unlock()
		}(action)
	}
	wg.Wait()

	return nodes, nil
}
func (rowCache *InvalidationProcessor) stopCache(reason string) {
	relog.Warning("Stopping rowcache invalidation, reason: '%v'", reason)
	rowCache.stopRowCacheInvalidation()
	if IsCachePoolAvailable() {
		relog.Warning("Disallowing Query Service as row-cache invalidator cannot run")
		DisallowQueries(false)
	}
}
Exemple #6
0
// return a few values that need to be refreshed
func (zkc *ZkCache) refreshSomeValues(zconn zk.Conn, maxToRefresh int) {
	// build a list of a few values we want to refresh
	refreshThreshold := time.Now().Add(-10 * time.Minute)

	// range will randomize the traversal order, so we will not always try
	// the same entries in the same order
	dataEntries := make([]*zkCacheEntry, 0, maxToRefresh)
	childrenEntries := make([]*zkCacheEntry, 0, maxToRefresh)
	zkc.mutex.Lock()
	for _, entry := range zkc.Cache {
		shouldBeDataRefreshed, shouldBeChildrenRefreshed := entry.checkForRefresh(refreshThreshold)
		if shouldBeDataRefreshed {
			dataEntries = append(dataEntries, entry)
		}
		if shouldBeChildrenRefreshed {
			childrenEntries = append(childrenEntries, entry)
		}

		// check if we have enough work to do
		if len(dataEntries) == maxToRefresh || len(childrenEntries) == maxToRefresh {
			break
		}
	}
	zkc.mutex.Unlock()

	// now refresh the values
	for _, entry := range dataEntries {
		data, stat, watch, err := zconn.GetW(entry.node.Path)
		if err == nil {
			zkStat := &zk.ZkStat{}
			zkStat.FromZookeeperStat(stat)
			entry.updateData(data, zkStat, watch)
		} else if zookeeper.IsError(err, zookeeper.ZCLOSING) {
			// connection is closing, no point in asking for more
			relog.Warning("failed to refresh cache: %v (and stopping refresh)", err.Error())
			return
		} else {
			// individual failure
			relog.Warning("failed to refresh cache: %v", err.Error())
		}
	}

	for _, entry := range childrenEntries {
		children, stat, watch, err := zconn.ChildrenW(entry.node.Path)
		if err == nil {
			zkStat := &zk.ZkStat{}
			zkStat.FromZookeeperStat(stat)
			entry.updateChildren(children, zkStat, watch)
		} else if zookeeper.IsError(err, zookeeper.ZCLOSING) {
			// connection is closing, no point in asking for more
			relog.Warning("failed to refresh cache: %v (and stopping refresh)", err.Error())
			return
		} else {
			// individual failure
			relog.Warning("failed to refresh cache: %v", err.Error())
		}
	}
}
Exemple #7
0
func (zkts *Server) WaitForTabletAction(actionPath string, waitTime time.Duration, interrupted chan struct{}) (string, error) {
	timer := time.NewTimer(waitTime)
	defer timer.Stop()

	// see if the file exists or sets a watch
	// the loop is to resist zk disconnects while we're waiting
	actionLogPath := strings.Replace(actionPath, "/action/", "/actionlog/", 1)
wait:
	for {
		var retryDelay <-chan time.Time
		stat, watch, err := zkts.zconn.ExistsW(actionLogPath)
		if err != nil {
			delay := 5*time.Second + time.Duration(rand.Int63n(55e9))
			relog.Warning("unexpected zk error, delay retry %v: %v", delay, err)
			// No one likes a thundering herd.
			retryDelay = time.After(delay)
		} else if stat != nil {
			// file exists, go on
			break wait
		}

		// if the file doesn't exist yet, wait for creation event.
		// On any other event we'll retry the ExistsW
		select {
		case actionEvent := <-watch:
			if actionEvent.Type == zookeeper.EVENT_CREATED {
				break wait
			} else {
				// Log unexpected events. Reconnects are
				// handled by zk.Conn, so calling ExistsW again
				// will handle a disconnect.
				relog.Warning("unexpected zk event: %v", actionEvent)
			}
		case <-retryDelay:
			continue wait
		case <-timer.C:
			return "", topo.ErrTimeout
		case <-interrupted:
			return "", topo.ErrInterrupted
		}
	}

	// the node exists, read it
	data, _, err := zkts.zconn.Get(actionLogPath)
	if err != nil {
		return "", fmt.Errorf("action err: %v %v", actionLogPath, err)
	}

	return data, nil
}
Exemple #8
0
func (qe *QueryEngine) compareRow(logStats *sqlQueryStats, plan *CompiledPlan, cacheRow []sqltypes.Value, pk []sqltypes.Value) (dbrow []sqltypes.Value) {
	rowsAreEquql := func(row1, row2 []sqltypes.Value) bool {
		if len(row1) != len(row2) {
			return false
		}
		for i := 0; i < len(row1); i++ {
			if row1[i].IsNull() && row2[i].IsNull() {
				continue
			}
			if (row1[i].IsNull() && !row2[i].IsNull()) || (!row1[i].IsNull() && row2[i].IsNull()) || row1[i].String() != row2[i].String() {
				return false
			}
		}
		return true
	}
	reloadFromCache := func(pk []sqltypes.Value) (newRow []sqltypes.Value) {
		keys := make([]string, 1)
		keys[0] = buildKey(pk)
		rcresults := plan.TableInfo.Cache.Get(keys)
		if len(rcresults) == 0 {
			return nil
		}
		return rcresults[keys[0]].Row
	}

	resultFromdb := qe.qFetch(logStats, plan, plan.OuterQuery, pk)
	if len(resultFromdb.Rows) == 0 {
		// Reload from cache for verification
		if reloadFromCache(pk) == nil {
			return nil
		}
		relog.Warning("unexpected number of rows for %v", pk)
		errorStats.Add("Mismatch", 1)
		return nil
	}
	dbrow = resultFromdb.Rows[0]
	if !rowsAreEquql(cacheRow, dbrow) {
		// Reload from cache for verification
		newRow := reloadFromCache(pk)
		if newRow == nil {
			return
		}
		if !rowsAreEquql(newRow, dbrow) {
			relog.Warning("query: %v", plan.FullQuery)
			relog.Warning("mismatch for: %v, cache: %v, db: %v", pk, newRow, dbrow)
			errorStats.Add("Mismatch", 1)
		}
	}
	return dbrow
}
Exemple #9
0
// lockForAction creates the action node in zookeeper, waits for the
// queue lock, displays a nice error message if it cant get it
func (zkts *Server) lockForAction(actionDir, contents string, timeout time.Duration, interrupted chan struct{}) (string, error) {
	// create the action path
	actionPath, err := zkts.zconn.Create(actionDir, contents, zookeeper.SEQUENCE, zookeeper.WorldACL(zookeeper.PERM_ALL))
	if err != nil {
		return "", err
	}

	err = zk.ObtainQueueLock(zkts.zconn, actionPath, timeout, interrupted)
	if err != nil {
		var errToReturn error
		switch err {
		case zk.ErrInterrupted:
			errToReturn = topo.ErrInterrupted
		case zk.ErrTimeout:
			errToReturn = topo.ErrTimeout
		default:
			errToReturn = fmt.Errorf("failed to obtain action lock: %v %v", actionPath, err)
		}

		// Regardless of the reason, try to cleanup.
		relog.Warning("Failed to obtain action lock: %v", err)
		zkts.zconn.Delete(actionPath, -1)

		// Show the other actions in the directory
		dir := path.Dir(actionPath)
		children, _, err := zkts.zconn.Children(dir)
		if err != nil {
			relog.Warning("Failed to get children of %v: %v", dir, err)
			return "", errToReturn
		}

		if len(children) == 0 {
			relog.Warning("No other action running, you may just try again now.")
			return "", errToReturn
		}

		childPath := path.Join(dir, children[0])
		data, _, err := zkts.zconn.Get(childPath)
		if err != nil {
			relog.Warning("Failed to get first action node %v (may have just ended): %v", childPath, err)
			return "", errToReturn
		}

		relog.Warning("------ Most likely blocking action: %v\n%v", childPath, data)
		return "", errToReturn
	}

	return actionPath, nil
}
Exemple #10
0
func (node *Node) execAnalyzeDelete(getTable TableGetter) (plan *ExecPlan) {
	// Default plan
	plan = &ExecPlan{PlanId: PLAN_PASS_DML, FullQuery: node.GenerateFullQuery()}

	tableName := string(node.At(DELETE_TABLE_OFFSET).Value)
	tableInfo := plan.setTableInfo(tableName, getTable)

	if len(tableInfo.Indexes) == 0 || tableInfo.Indexes[0].Name != "PRIMARY" {
		relog.Warning("no primary key for table %s", tableName)
		plan.Reason = REASON_TABLE_NOINDEX
		return plan
	}

	plan.PlanId = PLAN_DML_SUBQUERY
	plan.OuterQuery = node.GenerateDeleteOuterQuery(tableInfo.Indexes[0])
	plan.Subquery = node.GenerateDeleteSubquery(tableInfo)

	conditions := node.At(DELETE_WHERE_OFFSET).execAnalyzeWhere()
	if conditions == nil {
		plan.Reason = REASON_WHERE
		return plan
	}

	if pkValues := getPKValues(conditions, tableInfo.Indexes[0]); pkValues != nil {
		plan.PlanId = PLAN_DML_PK
		plan.OuterQuery = plan.FullQuery
		plan.PKValues = pkValues
		return plan
	}

	return plan
}
Exemple #11
0
// FmtBindVariables returns the map of bind variables as JSON. For
// values that are strings or byte slices it only reports their type
// and length.
func (stats *sqlQueryStats) FmtBindVariables(full bool) string {
	var out map[string]interface{}
	if full {
		out = stats.BindVariables
	} else {
		// NOTE(szopa): I am getting rid of potentially large bind
		// variables.
		out := make(map[string]interface{})
		for k, v := range stats.BindVariables {
			switch val := v.(type) {
			case string:
				out[k] = fmt.Sprintf("string %v", len(val))
			case []byte:
				out[k] = fmt.Sprintf("bytes %v", len(val))
			default:
				out[k] = v
			}
		}
	}
	b, err := json.Marshal(out)
	if err != nil {
		relog.Warning("could not marshal %q", stats.BindVariables)
		return ""
	}
	return string(b)
}
Exemple #12
0
func (updateStream *UpdateStream) ServeUpdateStream(req *UpdateStreamRequest, sendReply SendUpdateStreamResponse) error {
	defer func() {
		if x := recover(); x != nil {
			//Send the error to the client.
			//panic(x)
			SendError(sendReply, x.(error), nil)
		}
	}()

	if !updateStream.isServiceEnabled() {
		relog.Warning("Unable to serve client request: Update stream service is not enabled yet")
		return fmt.Errorf("Update stream service is not enabled yet")
	}

	if !IsStartPositionValid(&req.StartPosition) {
		return fmt.Errorf("Invalid start position, cannot serve the stream")
	}
	relog.Info("ServeUpdateStream starting @ %v", req.StartPosition.String())

	startCoordinates := &req.StartPosition.Position
	blp := NewBlp(startCoordinates, updateStream)

	//locate the relay filename and position based on the masterPosition map
	if !IsMasterPositionValid(startCoordinates) {
		return fmt.Errorf("Invalid start position %v", req.StartPosition)
	}

	updateStream.actionLock.Lock()
	updateStream.stateWaitGroup.Add(1)
	updateStream.actionLock.Unlock()
	defer updateStream.clientDone()

	blp.StreamBinlog(sendReply, updateStream.binlogPrefix)
	return nil
}
Exemple #13
0
// A non-nil return signals that event processing should stop.
func (agent *ActionAgent) dispatchAction(actionPath, data string) error {
	relog.Info("action dispatch %v", actionPath)
	actionNode, err := ActionNodeFromJson(data, actionPath)
	if err != nil {
		relog.Error("action decode failed: %v %v", actionPath, err)
		return nil
	}

	logfile := flag.Lookup("logfile").Value.String()
	if !strings.HasPrefix(logfile, "/dev") {
		logfile = path.Join(path.Dir(logfile), "vtaction.log")
	}
	cmd := []string{
		agent.vtActionBinFile,
		"-action", actionNode.Action,
		"-action-node", actionPath,
		"-action-guid", actionNode.ActionGuid,
		"-mycnf-file", agent.MycnfFile,
		"-logfile", logfile,
	}
	cmd = append(cmd, agent.ts.GetSubprocessFlags()...)
	if agent.DbConfigsFile != "" {
		cmd = append(cmd, "-db-configs-file", agent.DbConfigsFile)
	}
	if agent.DbCredentialsFile != "" {
		cmd = append(cmd, "-db-credentials-file", agent.DbCredentialsFile)
	}
	relog.Info("action launch %v", cmd)
	vtActionCmd := exec.Command(cmd[0], cmd[1:]...)

	stdOut, vtActionErr := vtActionCmd.CombinedOutput()
	if vtActionErr != nil {
		relog.Error("agent action failed: %v %v\n%s", actionPath, vtActionErr, stdOut)
		// If the action failed, preserve single execution path semantics.
		return vtActionErr
	}

	relog.Info("agent action completed %v %s", actionPath, stdOut)

	// Save the old tablet so callbacks can have a better idea of the precise
	// nature of the transition.
	oldTablet := agent.Tablet().Tablet

	// Actions should have side effects on the tablet, so reload the data.
	if err := agent.readTablet(); err != nil {
		relog.Warning("failed rereading tablet after action - services may be inconsistent: %v %v", actionPath, err)
	} else {
		agent.runChangeCallbacks(oldTablet, actionPath)
	}

	// Maybe invalidate the schema.
	// This adds a dependency between tabletmanager and tabletserver,
	// so it's not ideal. But I (alainjobart) think it's better
	// to have up to date schema in vtocc.
	if actionNode.Action == TABLET_ACTION_APPLY_SCHEMA {
		tabletserver.ReloadSchema()
	}

	return nil
}
Exemple #14
0
func getInsertPKValues(pkColumnNumbers []int, rowList *Node, tableInfo *schema.Table) (pkValues []interface{}) {
	pkValues = make([]interface{}, len(pkColumnNumbers))
	for index, columnNumber := range pkColumnNumbers {
		if columnNumber == -1 {
			pkValues[index] = tableInfo.GetPKColumn(index).Default
			continue
		}
		values := make([]interface{}, rowList.Len())
		for j := 0; j < rowList.Len(); j++ {
			if columnNumber >= rowList.At(j).At(0).Len() { // NODE_LIST->'('->NODE_LIST
				panic(NewParserError("Column count doesn't match value count"))
			}
			node := rowList.At(j).At(0).At(columnNumber) // NODE_LIST->'('->NODE_LIST->Value
			value := node.execAnalyzeValue()
			if value == nil {
				relog.Warning("insert is too complex %v", node)
				return nil
			}
			values[j] = asInterface(value)
		}
		if len(values) == 1 {
			pkValues[index] = values[0]
		} else {
			pkValues[index] = values
		}
	}
	return pkValues
}
Exemple #15
0
func (mysqld *Mysqld) SnapshotSourceEnd(slaveStartRequired, readOnly, deleteSnapshot bool) error {
	if deleteSnapshot {
		// clean out our files
		relog.Info("removing snapshot links: %v", mysqld.SnapshotDir)
		if err := os.RemoveAll(mysqld.SnapshotDir); err != nil {
			relog.Warning("failed to remove old snapshot: %v", err)
			return err
		}
	}

	// Try to restart mysqld
	if err := Start(mysqld, MysqlWaitTime); err != nil {
		return err
	}

	// Restore original mysqld state that we saved above.
	if slaveStartRequired {
		if err := mysqld.StartSlave(); err != nil {
			return err
		}

		// this should be quick, but we might as well just wait
		if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil {
			return err
		}
	}

	// And set read-only mode
	if err := mysqld.SetReadOnly(readOnly); err != nil {
		return err
	}

	return nil
}
Exemple #16
0
// If error is not nil, the results in the dictionary are incomplete.
func GetTabletMap(ts topo.Server, tabletAliases []topo.TabletAlias) (map[topo.TabletAlias]*topo.TabletInfo, error) {
	wg := sync.WaitGroup{}
	mutex := sync.Mutex{}

	tabletMap := make(map[topo.TabletAlias]*topo.TabletInfo)
	var someError error

	for _, tabletAlias := range tabletAliases {
		wg.Add(1)
		go func(tabletAlias topo.TabletAlias) {
			defer wg.Done()
			tabletInfo, err := ts.GetTablet(tabletAlias)
			mutex.Lock()
			if err != nil {
				relog.Warning("%v: %v", tabletAlias, err)
				// There can be data races removing nodes - ignore them for now.
				if err != topo.ErrNoNode {
					someError = err
				}
			} else {
				tabletMap[tabletAlias] = tabletInfo
			}
			mutex.Unlock()
		}(tabletAlias)
	}
	wg.Wait()
	return tabletMap, someError
}
Exemple #17
0
func getPKValues(conditions []*Node, pkIndex *schema.Index) (pkValues []interface{}) {
	if pkIndex.Name != "PRIMARY" {
		relog.Warning("Table has no primary key")
		return nil
	}
	pkIndexScore := NewIndexScore(pkIndex)
	pkValues = make([]interface{}, len(pkIndexScore.ColumnMatch))
	for _, condition := range conditions {
		if condition.Type != '=' && condition.Type != IN {
			return nil
		}
		index := pkIndexScore.FindMatch(string(condition.At(0).Value))
		if index == -1 {
			return nil
		}
		switch condition.Type {
		case '=':
			pkValues[index] = asInterface(condition.At(1))
		case IN:
			pkValues[index], _ = condition.At(1).At(0).parseList()
		}
	}
	if pkIndexScore.GetScore() == PERFECT_SCORE {
		return pkValues
	}
	return nil
}
Exemple #18
0
// Return a sorted list of tablets.
func GetAllTablets(ts topo.Server, cell string) ([]*topo.TabletInfo, error) {
	aliases, err := ts.GetTabletsByCell(cell)
	if err != nil {
		return nil, err
	}
	sort.Sort(topo.TabletAliasList(aliases))

	tabletMap, err := GetTabletMap(ts, aliases)
	if err != nil {
		// we got another error than ZNONODE
		return nil, err
	}
	tablets := make([]*topo.TabletInfo, 0, len(aliases))
	for _, tabletAlias := range aliases {
		tabletInfo, ok := tabletMap[tabletAlias]
		if !ok {
			// tablet disappeared on us (GetTabletMap ignores
			// ZNONODE), just echo a warning
			relog.Warning("failed to load tablet %v", tabletAlias)
		} else {
			tablets = append(tablets, tabletInfo)
		}
	}

	return tablets, nil
}
Exemple #19
0
// CopyKeyspaces will create the keyspaces in the destination topo
func CopyKeyspaces(fromTS, toTS topo.Server) {
	keyspaces, err := fromTS.GetKeyspaces()
	if err != nil {
		relog.Fatal("fromTS.GetKeyspaces failed: %v", err)
	}

	wg := sync.WaitGroup{}
	rec := concurrency.AllErrorRecorder{}
	for _, keyspace := range keyspaces {
		wg.Add(1)
		go func(keyspace string) {
			defer wg.Done()
			if err := toTS.CreateKeyspace(keyspace); err != nil {
				if err == topo.ErrNodeExists {
					relog.Warning("keyspace %v already exists", keyspace)
				} else {
					rec.RecordError(err)
				}
			}
		}(keyspace)
	}
	wg.Wait()
	if rec.HasErrors() {
		relog.Fatal("copyKeyspaces failed: %v", rec.Error())
	}
}
Exemple #20
0
func copyBufN(dst io.Writer, src io.Reader, totalLen int64, buf []byte) (written int64, err error) {
	for written < totalLen {
		toBeRead := totalLen
		if diffLen := totalLen - written; diffLen < toBeRead {
			toBeRead = diffLen
		}
		nr, er := src.Read(buf[0:toBeRead])
		if nr > 0 {
			nw, ew := dst.Write(buf[0:nr])
			if nw > 0 {
				written += int64(nw)
			}
			if ew != nil {
				err = ew
				break
			}
			if nr != nw {
				relog.Warning("Short write to dst")
				err = io.ErrShortWrite
				break
			}
		}
		if er != nil {
			err = er
			break
		}
	}
	return written, err
}
Exemple #21
0
func (vtc *VtConn) Exec(query string, bindVars map[string]interface{}) (db.Result, error) {
	attempt := 0
	for {
		result, err := vtc.Conn.Exec(query, bindVars)
		if err == nil {
			vtc.timeFailed = zeroTime
			return result, nil
		}

		errType, err := vtc.handleErr(err)
		if errType != ErrTypeRetry {
			return nil, err
		}
		for {
			attempt++
			if attempt > vtc.maxAttempts {
				return nil, fmt.Errorf("vt: max recovery attempts exceeded: %v", err)
			}
			vtc.Close()
			time.Sleep(vtc.reconnectDelay)
			if err := vtc.dial(); err == nil {
				break
			}
			relog.Warning("vt: error dialing on exec %v", vtc.Conn.dbi.Host)
		}
	}

	panic("unreachable")
}
Exemple #22
0
func (vtc *VtConn) handleErr(err error) (int, error) {
	now := time.Now()
	if vtc.timeFailed.IsZero() {
		vtc.timeFailed = now
	} else if now.Sub(vtc.timeFailed) > vtc.recoveryTimeout() {
		vtc.Close()
		return ErrTypeFatal, fmt.Errorf("vt: max recovery time exceeded: %v", err)
	}

	errType := ErrTypeApp
	if tabletErr, ok := err.(TabletError); ok {
		msg := strings.ToLower(tabletErr.err.Error())
		if strings.HasPrefix(msg, "fatal") {
			errType = ErrTypeFatal
		} else if strings.HasPrefix(msg, "retry") {
			errType = ErrTypeRetry
		}
	} else if netErr, ok := err.(net.Error); ok && netErr.Temporary() {
		errType = ErrTypeRetry
	}

	if errType == ErrTypeRetry && vtc.TransactionId != 0 {
		errType = ErrTypeApp
		err = fmt.Errorf("vt: cannot retry within a transaction: %v", err)
		time.Sleep(vtc.reconnectDelay)
		vtc.Close()
		dialErr := vtc.dial()
		relog.Warning("vt: redial error %v", dialErr)
	}

	return errType, err
}
Exemple #23
0
func (vtc *VtConn) Begin() (db.Tx, error) {
	attempt := 0
	for {
		tx, err := vtc.Conn.Begin()
		if err == nil {
			vtc.timeFailed = zeroTime
			return tx, nil
		}

		errType, err := vtc.handleErr(err)
		if errType != ErrTypeRetry {
			return nil, err
		}
		for {
			attempt++
			if attempt > vtc.maxAttempts {
				return nil, fmt.Errorf("vt: max recovery attempts exceeded: %v", err)
			}
			vtc.Close()
			time.Sleep(vtc.reconnectDelay)
			if err := vtc.dial(); err == nil {
				break
			}
			relog.Warning("vt: error dialing on begin %v", vtc.Conn.dbi.Host)
		}
	}
	panic("unreachable")
}
Exemple #24
0
// Return a list of corresponding replication positions.
// Handles masters and slaves, but it's up to the caller to guarantee
// all tablets are in the same shard.
func (wr *Wrangler) tabletReplicationPositions(tablets []*topo.TabletInfo) ([]*mysqlctl.ReplicationPosition, error) {
	relog.Debug("tabletReplicationPositions %v", tablets)
	calls := make([]*rpcContext, len(tablets))
	wg := sync.WaitGroup{}

	f := func(idx int) {
		defer wg.Done()
		ti := tablets[idx]
		ctx := &rpcContext{tablet: ti}
		calls[idx] = ctx

		var actionPath string
		if ti.Type == topo.TYPE_MASTER {
			actionPath, ctx.err = wr.ai.MasterPosition(ti.Alias())
		} else if ti.IsSlaveType() {
			actionPath, ctx.err = wr.ai.SlavePosition(ti.Alias())
		}

		if ctx.err != nil {
			return
		}

		var result interface{}
		if result, ctx.err = wr.ai.WaitForCompletionReply(actionPath, wr.actionTimeout()); ctx.err != nil {
			return
		}
		ctx.position = result.(*mysqlctl.ReplicationPosition)
	}

	for i, tablet := range tablets {
		// Don't scan tablets that won't return something useful. Otherwise, you'll
		// end up waiting for a timeout.
		if tablet.Type == topo.TYPE_MASTER || tablet.IsSlaveType() {
			wg.Add(1)
			go f(i)
		} else {
			relog.Info("tabletReplicationPositions: skipping tablet %v type %v", tablet.Alias(), tablet.Type)
		}
	}
	wg.Wait()

	someErrors := false
	positions := make([]*mysqlctl.ReplicationPosition, len(tablets))
	for i, ctx := range calls {
		if ctx == nil {
			continue
		}
		if ctx.err != nil {
			relog.Warning("could not get replication position for tablet %v %v", ctx.tablet.Alias(), ctx.err)
			someErrors = true
		} else {
			positions[i] = ctx.position
		}
	}
	if someErrors {
		return positions, fmt.Errorf("partial position map, some errors")
	}
	return positions, nil
}
Exemple #25
0
func (agent *ActionAgent) RegisterQueryService(mysqld *mysqlctl.Mysqld) {
	if TabletManagerRpcService != nil {
		relog.Warning("RPC service already up %v", TabletManagerRpcService)
		return
	}
	TabletManagerRpcService = &TabletManager{agent, mysqld}
	rpcwrap.RegisterAuthenticated(TabletManagerRpcService)
}
func (rowCache *InvalidationProcessor) updateErrCounters(err *InvalidationError) {
	relog.Error(err.Error())
	if errorStats == nil {
		relog.Warning("errorStats is not initialized")
		return
	}
	errorStats.Add(err.errType, 1)
}
Exemple #27
0
func (tm *TabletManager) wrapErr(context *rpcproto.Context, name string, args interface{}, reply interface{}, err error) error {
	if err != nil {
		relog.Warning("TabletManager.%v(%v)(from %v) error: %v", name, args, context.RemoteAddr, err.Error())
		return fmt.Errorf("%v (on %v)", err, tm.agent.tabletAlias)
	}
	relog.Info("TabletManager.%v(%v)(from %v): %v", name, args, context.RemoteAddr, reply)
	return nil
}
Exemple #28
0
func ActionNodeIsStale(data string) bool {
	actionNode, err := ActionNodeFromJson(data, "")
	if err != nil {
		relog.Warning("bad action data: %v %#v", err, data)
		return false
	}

	return actionNode.State != ACTION_STATE_RUNNING
}
Exemple #29
0
func RegisterQueryService(config Config) {
	if SqlQueryRpcService != nil {
		relog.Warning("RPC service already up %v", SqlQueryRpcService)
		return
	}
	SqlQueryRpcService = NewSqlQuery(config)
	proto.RegisterAuthenticated(SqlQueryRpcService)
	http.HandleFunc("/debug/health", healthCheck)
}
Exemple #30
0
// custom function to serve files
func sendFile(rw http.ResponseWriter, req *http.Request, path string) {
	relog.Info("serve %v %v", req.URL.Path, path)
	file, err := os.Open(path)
	if err != nil {
		http.NotFound(rw, req)
		return
	}
	defer file.Close()

	fileinfo, err := file.Stat()
	if err != nil {
		http.NotFound(rw, req)
		return
	}

	// for directories, or for files smaller than 1k, use library
	if fileinfo.Mode().IsDir() || fileinfo.Size() < 1024 {
		http.ServeFile(rw, req, path)
		return
	}

	// supports If-Modified-Since header
	if t, err := time.Parse(http.TimeFormat, req.Header.Get("If-Modified-Since")); err == nil && fileinfo.ModTime().Before(t.Add(1*time.Second)) {
		rw.WriteHeader(http.StatusNotModified)
		return
	}

	// support Accept-Encoding header
	var writer io.Writer = rw
	var reader io.Reader = file
	if !strings.HasSuffix(path, ".gz") {
		ae := req.Header.Get("Accept-Encoding")

		if strings.Contains(ae, "gzip") {
			gz, err := cgzip.NewWriterLevel(rw, cgzip.Z_BEST_SPEED)
			if err != nil {
				http.Error(rw, err.Error(), http.StatusInternalServerError)
				return
			}
			rw.Header().Set("Content-Encoding", "gzip")
			defer gz.Close()
			writer = gz
		}
	}

	// add content-length if we know it
	if writer == rw && reader == file {
		rw.Header().Set("Content-Length", fmt.Sprintf("%v", fileinfo.Size()))
	}

	// and just copy content out
	rw.Header().Set("Last-Modified", fileinfo.ModTime().UTC().Format(http.TimeFormat))
	rw.WriteHeader(http.StatusOK)
	if _, err := io.Copy(writer, reader); err != nil {
		relog.Warning("transfer failed %v: %v", path, err)
	}
}