// NewSynchronizer // - connect // - get optime func NewSynchronizer(config Config) *Synchronizer { p := new(Synchronizer) p.config = config if s, err := mgo.DialWithTimeout(p.config.From, time.Second*3); err == nil { p.srcSession = s p.srcSession.SetSocketTimeout(0) p.srcSession.SetSyncTimeout(0) p.srcSession.SetMode(mgo.Strong, false) // always read from primary p.srcSession.SetCursorTimeout(0) log.Printf("connected to %s\n", p.config.From) } else { log.Println(err, p.config.From) return nil } if s, err := mgo.DialWithTimeout(p.config.To, time.Second*3); err == nil { p.dstSession = s p.dstSession.SetSocketTimeout(0) p.dstSession.SetSyncTimeout(0) p.dstSession.SetSafe(&mgo.Safe{W: 1}) p.dstSession.SetMode(mgo.Eventual, false) log.Printf("connected to %s\n", p.config.To) } else { log.Println(err, p.config.To) return nil } if p.config.StartOptime > 0 { p.optime = bson.MongoTimestamp(int64(p.config.StartOptime) << 32) } else { if optime, err := utils.GetOptime(p.srcSession); err == nil { p.optime = optime } else { log.Println(err) return nil } } log.Printf("optime is %v %v\n", utils.GetTimestampFromOptime(p.optime), utils.GetTimeFromOptime(p.optime)) return p }
// dispatch oplog to workers func (p *OplogReplayer) Run() error { log.Printf("locating oplog at %v\n", utils.GetTimestampFromOptime(p.optime)) Begin: iter := p.srcSession.DB("local").C("oplog.rs").Find(bson.M{"ts": bson.M{"$gte": p.optime}}).Tail(-1) n := 0 oplog_valid := false if inc := int64(p.optime) << 32 >> 32; inc == 0 { oplog_valid = true log.Println("start optime specified by user, skip verification") } for { var oplog bson.M if iter.Next(&oplog) { if !oplog_valid { if oplog["ts"] != p.optime { log.Fatalf("oplog is stale, except %v, current %v\n", utils.GetTimestampFromOptime(p.optime), utils.GetTimestampFromOptime(oplog["ts"].(bson.MongoTimestamp))) } oplog_valid = true log.Println("oplog is OK") continue } // **COMMAND** should excute until all previous operations done to guatantee sequence // worker-0 is the master goroutine, all commands will be sent to it // INSERT/UPDATE/DELETE hash to different workers switch oplog["op"] { case "c": // wait for all previous operations done for { ready := true for i := 0; i < p.nWorkers; i++ { if p.workers[i].Qsize() > 0 { ready = false break } } if ready { break } else { time.Sleep(time.Millisecond * 10) // sleep 10ms } } p.workers[0].Push(oplog) // wait for command done for { if p.workers[0].Qsize() == 0 { break } else { time.Sleep(time.Millisecond * 10) // sleep 10ms } } case "i": fallthrough case "u": fallthrough case "d": oid, err := utils.GetObjectIdFromOplog(oplog) if err != nil { log.Fatalln("FATAL GetObjectIdFromOplog", err) continue } bytes, err := bson.Marshal(bson.M{"_id": oid}) if err != nil { log.Fatalln("FATAL oid to bytes", err) continue } wid := crc32.ChecksumIEEE(bytes) % uint32(p.nWorkers) p.workers[wid].Push(oplog) } n++ if n%1000 == 0 { // get optime of the lastest oplog has been replayed var optime bson.MongoTimestamp = 0 //optime = (1 << 63) - 1 for i := 0; i < p.nWorkers; i++ { ts := p.workers[i].Optime() if optime < ts { optime = ts } } p.optime = optime // set current optime log.Printf("\t%d replayed, %d secs delay, sync to %v %v", n, time.Now().Unix()-utils.GetTimeFromOptime(p.optime).Unix(), utils.GetTimeFromOptime(p.optime), utils.GetTimestampFromOptime(p.optime)) } } else { if err := iter.Err(); err != nil { log.Println("tail oplog failed:", err) switch err.Error() { case "EOF": p.srcSession = utils.Reconnect(p.src) p.srcSession.SetSocketTimeout(0) // locating oplog may be slow goto Begin case "invalid cursor": goto Begin default: log.Fatalln("unknown error:", err) } } time.Sleep(time.Millisecond * 100) } } if err := iter.Close(); err != nil { log.Fatalln("kill cursor failed:", err) } return nil }