Beispiel #1
0
// NewSynchronizer
//   - connect
//   - get optime
func NewSynchronizer(config Config) *Synchronizer {
	p := new(Synchronizer)
	p.config = config
	if s, err := mgo.DialWithTimeout(p.config.From, time.Second*3); err == nil {
		p.srcSession = s
		p.srcSession.SetSocketTimeout(0)
		p.srcSession.SetSyncTimeout(0)
		p.srcSession.SetMode(mgo.Strong, false) // always read from primary
		p.srcSession.SetCursorTimeout(0)
		log.Printf("connected to %s\n", p.config.From)
	} else {
		log.Println(err, p.config.From)
		return nil
	}
	if s, err := mgo.DialWithTimeout(p.config.To, time.Second*3); err == nil {
		p.dstSession = s
		p.dstSession.SetSocketTimeout(0)
		p.dstSession.SetSyncTimeout(0)
		p.dstSession.SetSafe(&mgo.Safe{W: 1})
		p.dstSession.SetMode(mgo.Eventual, false)
		log.Printf("connected to %s\n", p.config.To)
	} else {
		log.Println(err, p.config.To)
		return nil
	}
	if p.config.StartOptime > 0 {
		p.optime = bson.MongoTimestamp(int64(p.config.StartOptime) << 32)
	} else {
		if optime, err := utils.GetOptime(p.srcSession); err == nil {
			p.optime = optime
		} else {
			log.Println(err)
			return nil
		}
	}
	log.Printf("optime is %v %v\n", utils.GetTimestampFromOptime(p.optime), utils.GetTimeFromOptime(p.optime))
	return p
}
// dispatch oplog to workers
func (p *OplogReplayer) Run() error {
	log.Printf("locating oplog at %v\n", utils.GetTimestampFromOptime(p.optime))
Begin:
	iter := p.srcSession.DB("local").C("oplog.rs").Find(bson.M{"ts": bson.M{"$gte": p.optime}}).Tail(-1)
	n := 0
	oplog_valid := false

	if inc := int64(p.optime) << 32 >> 32; inc == 0 {
		oplog_valid = true
		log.Println("start optime specified by user, skip verification")
	}

	for {
		var oplog bson.M
		if iter.Next(&oplog) {
			if !oplog_valid {
				if oplog["ts"] != p.optime {
					log.Fatalf("oplog is stale, except %v, current %v\n",
						utils.GetTimestampFromOptime(p.optime),
						utils.GetTimestampFromOptime(oplog["ts"].(bson.MongoTimestamp)))
				}
				oplog_valid = true
				log.Println("oplog is OK")
				continue
			}
			// **COMMAND** should excute until all previous operations done to guatantee sequence
			// worker-0 is the master goroutine, all commands will be sent to it
			// INSERT/UPDATE/DELETE hash to different workers
			switch oplog["op"] {
			case "c":
				// wait for all previous operations done
				for {
					ready := true
					for i := 0; i < p.nWorkers; i++ {
						if p.workers[i].Qsize() > 0 {
							ready = false
							break
						}
					}
					if ready {
						break
					} else {
						time.Sleep(time.Millisecond * 10) // sleep 10ms
					}
				}
				p.workers[0].Push(oplog)
				// wait for command done
				for {
					if p.workers[0].Qsize() == 0 {
						break
					} else {
						time.Sleep(time.Millisecond * 10) // sleep 10ms
					}
				}
			case "i":
				fallthrough
			case "u":
				fallthrough
			case "d":
				oid, err := utils.GetObjectIdFromOplog(oplog)
				if err != nil {
					log.Fatalln("FATAL GetObjectIdFromOplog", err)
					continue
				}
				bytes, err := bson.Marshal(bson.M{"_id": oid})
				if err != nil {
					log.Fatalln("FATAL oid to bytes", err)
					continue
				}
				wid := crc32.ChecksumIEEE(bytes) % uint32(p.nWorkers)
				p.workers[wid].Push(oplog)
			}
			n++
			if n%1000 == 0 {
				// get optime of the lastest oplog has been replayed
				var optime bson.MongoTimestamp = 0
				//optime = (1 << 63) - 1
				for i := 0; i < p.nWorkers; i++ {
					ts := p.workers[i].Optime()
					if optime < ts {
						optime = ts
					}
				}
				p.optime = optime // set current optime
				log.Printf("\t%d replayed, %d secs delay, sync to %v %v",
					n,
					time.Now().Unix()-utils.GetTimeFromOptime(p.optime).Unix(),
					utils.GetTimeFromOptime(p.optime),
					utils.GetTimestampFromOptime(p.optime))
			}
		} else {
			if err := iter.Err(); err != nil {
				log.Println("tail oplog failed:", err)
				switch err.Error() {
				case "EOF":
					p.srcSession = utils.Reconnect(p.src)
					p.srcSession.SetSocketTimeout(0) // locating oplog may be slow
					goto Begin
				case "invalid cursor":
					goto Begin
				default:
					log.Fatalln("unknown error:", err)
				}
			}
			time.Sleep(time.Millisecond * 100)
		}
	}
	if err := iter.Close(); err != nil {
		log.Fatalln("kill cursor failed:", err)
	}
	return nil
}