Example #1
0
// NewServer creates a new EtcdServer from the supplied configuration. The
// configuration is considered static for the lifetime of the EtcdServer.
func NewServer(cfg *ServerConfig) (*EtcdServer, error) {
	st := store.New(StoreClusterPrefix, StoreKeysPrefix)
	var w *wal.WAL
	var n raft.Node
	var s *raft.MemoryStorage
	var id types.ID
	var cl *cluster

	// Run the migrations.
	dataVer, err := version.DetectDataDir(cfg.DataDir)
	if err != nil {
		return nil, err
	}
	if err := upgradeDataDir(cfg.DataDir, cfg.Name, dataVer); err != nil {
		return nil, err
	}

	haveWAL := wal.Exist(cfg.WALDir())
	ss := snap.New(cfg.SnapDir())

	var remotes []*Member
	switch {
	case !haveWAL && !cfg.NewCluster:
		if err := cfg.VerifyJoinExisting(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		existingCluster, err := GetClusterFromRemotePeers(getRemotePeerURLs(cl, cfg.Name), cfg.Transport)
		if err != nil {
			return nil, fmt.Errorf("cannot fetch cluster info from peer urls: %v", err)
		}
		if err := ValidateClusterAndAssignIDs(cl, existingCluster); err != nil {
			return nil, fmt.Errorf("error validating peerURLs %s: %v", existingCluster, err)
		}
		if !isCompatibleWithCluster(cl, cl.MemberByName(cfg.Name).ID, cfg.Transport) {
			return nil, fmt.Errorf("incomptible with current running cluster")
		}

		remotes = existingCluster.Members()
		cl.SetID(existingCluster.id)
		cl.SetStore(st)
		cfg.Print()
		id, n, s, w = startNode(cfg, cl, nil)
	case !haveWAL && cfg.NewCluster:
		if err := cfg.VerifyBootstrap(); err != nil {
			return nil, err
		}
		cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, cfg.InitialPeerURLsMap)
		if err != nil {
			return nil, err
		}
		m := cl.MemberByName(cfg.Name)
		if isMemberBootstrapped(cl, cfg.Name, cfg.Transport) {
			return nil, fmt.Errorf("member %s has already been bootstrapped", m.ID)
		}
		if cfg.ShouldDiscover() {
			str, err := discovery.JoinCluster(cfg.DiscoveryURL, cfg.DiscoveryProxy, m.ID, cfg.InitialPeerURLsMap.String())
			if err != nil {
				return nil, err
			}
			urlsmap, err := types.NewURLsMap(str)
			if err != nil {
				return nil, err
			}
			if checkDuplicateURL(urlsmap) {
				return nil, fmt.Errorf("discovery cluster %s has duplicate url", urlsmap)
			}
			if cl, err = newClusterFromURLsMap(cfg.InitialClusterToken, urlsmap); err != nil {
				return nil, err
			}
		}
		cl.SetStore(st)
		cfg.PrintWithInitial()
		id, n, s, w = startNode(cfg, cl, cl.MemberIDs())
	case haveWAL:
		if err := fileutil.IsDirWriteable(cfg.DataDir); err != nil {
			return nil, fmt.Errorf("cannot write to data directory: %v", err)
		}

		if err := fileutil.IsDirWriteable(cfg.MemberDir()); err != nil {
			return nil, fmt.Errorf("cannot write to member directory: %v", err)
		}

		if cfg.ShouldDiscover() {
			plog.Warningf("discovery token ignored since a cluster has already been initialized. Valid log found at %q", cfg.WALDir())
		}
		snapshot, err := ss.Load()
		if err != nil && err != snap.ErrNoSnapshot {
			return nil, err
		}
		if snapshot != nil {
			if err := st.Recovery(snapshot.Data); err != nil {
				plog.Panicf("recovered store from snapshot error: %v", err)
			}
			plog.Infof("recovered store from snapshot at index %d", snapshot.Metadata.Index)
		}
		cfg.Print()
		if snapshot != nil {
			plog.Infof("loaded cluster information from store: %s", cl)
		}
		if !cfg.ForceNewCluster {
			id, cl, n, s, w = restartNode(cfg, snapshot)
		} else {
			id, cl, n, s, w = restartAsStandaloneNode(cfg, snapshot)
		}
		cl.SetStore(st)
		cl.Recover()
	default:
		return nil, fmt.Errorf("unsupported bootstrap config")
	}

	sstats := &stats.ServerStats{
		Name: cfg.Name,
		ID:   id.String(),
	}
	sstats.Initialize()
	lstats := stats.NewLeaderStats(id.String())

	srv := &EtcdServer{
		cfg:       cfg,
		snapCount: cfg.SnapCount,
		errorc:    make(chan error, 1),
		store:     st,
		r: raftNode{
			Node:        n,
			ticker:      time.Tick(time.Duration(cfg.TickMs) * time.Millisecond),
			raftStorage: s,
			storage:     NewStorage(w, ss),
		},
		id:            id,
		attributes:    Attributes{Name: cfg.Name, ClientURLs: cfg.ClientURLs.StringSlice()},
		cluster:       cl,
		stats:         sstats,
		lstats:        lstats,
		SyncTicker:    time.Tick(500 * time.Millisecond),
		reqIDGen:      idutil.NewGenerator(uint8(id), time.Now()),
		forceVersionC: make(chan struct{}),
	}

	// TODO: move transport initialization near the definition of remote
	tr := rafthttp.NewTransporter(cfg.Transport, id, cl.ID(), srv, srv.errorc, sstats, lstats)
	// add all remotes into transport
	for _, m := range remotes {
		if m.ID != id {
			tr.AddRemote(m.ID, m.PeerURLs)
		}
	}
	for _, m := range cl.Members() {
		if m.ID != id {
			tr.AddPeer(m.ID, m.PeerURLs)
		}
	}
	srv.r.transport = tr
	return srv, nil
}
Example #2
0
func Migrate4To2(dataDir string, name string) error {
	// prep new directories
	sd2 := snapDir2(dataDir)
	if err := os.MkdirAll(sd2, 0700); err != nil {
		return fmt.Errorf("failed creating snapshot directory %s: %v", sd2, err)
	}

	// read v0.4 data
	snap4, err := DecodeLatestSnapshot4FromDir(snapDir4(dataDir))
	if err != nil {
		return err
	}

	cfg4, err := DecodeConfig4FromFile(cfgFile4(dataDir))
	if err != nil {
		return err
	}

	ents4, err := DecodeLog4FromFile(logFile4(dataDir))
	if err != nil {
		return err
	}

	nodeIDs := ents4.NodeIDs()
	nodeID := GuessNodeID(nodeIDs, snap4, cfg4, name)

	if nodeID == 0 {
		return fmt.Errorf("Couldn't figure out the node ID from the log or flags, cannot convert")
	}

	metadata := pbutil.MustMarshal(&pb.Metadata{NodeID: nodeID, ClusterID: 0x04add5})
	wd2 := walDir2(dataDir)
	w, err := wal.Create(wd2, metadata)
	if err != nil {
		return fmt.Errorf("failed initializing wal at %s: %v", wd2, err)
	}
	defer w.Close()

	// transform v0.4 data
	var snap2 *raftpb.Snapshot
	if snap4 == nil {
		log.Printf("No snapshot found")
	} else {
		log.Printf("Found snapshot: lastIndex=%d", snap4.LastIndex)

		snap2 = snap4.Snapshot2()
	}

	st2 := cfg4.HardState2()

	// If we've got the most recent snapshot, we can use it's committed index. Still likely less than the current actual index, but worth it for the replay.
	if snap2 != nil && st2.Commit < snap2.Metadata.Index {
		st2.Commit = snap2.Metadata.Index
	}

	ents2, err := Entries4To2(ents4)
	if err != nil {
		return err
	}

	ents2Len := len(ents2)
	log.Printf("Found %d log entries: firstIndex=%d lastIndex=%d", ents2Len, ents2[0].Index, ents2[ents2Len-1].Index)

	// set the state term to the biggest term we have ever seen,
	// so term of future entries will not be the same with term of old ones.
	st2.Term = ents2[ents2Len-1].Term

	// explicitly prepend an empty entry as the WAL code expects it
	ents2 = append(make([]raftpb.Entry, 1), ents2...)

	if err = w.Save(st2, ents2); err != nil {
		return err
	}
	log.Printf("Log migration successful")

	// migrate snapshot (if necessary) and logs
	var walsnap walpb.Snapshot
	if snap2 != nil {
		walsnap.Index, walsnap.Term = snap2.Metadata.Index, snap2.Metadata.Term
		ss := snap.New(sd2)
		if err := ss.SaveSnap(*snap2); err != nil {
			return err
		}
		log.Printf("Snapshot migration successful")
	}
	if err = w.SaveSnapshot(walsnap); err != nil {
		return err
	}

	return nil
}