// dumpQueryToWriter takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. Returns a final count of documents // dumped, and any errors that occured. func (dump *MongoDump) dumpQueryToWriter( query *mgo.Query, intent *intents.Intent) (int64, error) { var total int var err error if len(dump.query) == 0 { total, err = query.Count() if err != nil { return int64(0), fmt.Errorf("error reading from db: %v", err) } log.Logvf(log.DebugLow, "counted %v %v in %v", total, docPlural(int64(total)), intent.Namespace()) } else { log.Logvf(log.DebugLow, "not counting query on %v", intent.Namespace()) } dumpProgressor := progress.NewCounter(int64(total)) if dump.ProgressManager != nil { dump.ProgressManager.Attach(intent.Namespace(), dumpProgressor) defer dump.ProgressManager.Detach(intent.Namespace()) } err = dump.dumpIterToWriter(query.Iter(), intent.BSONFile, dumpProgressor) _, dumpCount := dumpProgressor.Progress() return dumpCount, err }
// dumpQueryToWriter takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. Returns a final count of documents // dumped, and any errors that occured. func (dump *MongoDump) dumpQueryToWriter( query *mgo.Query, intent *intents.Intent) (int64, error) { total, err := query.Count() if err != nil { return int64(0), fmt.Errorf("error reading from db: %v", err) } log.Logf(log.Info, "\tcounted %v %v in %v", total, docPlural(int64(total)), intent.Namespace()) dumpProgressor := progress.NewCounter(int64(total)) bar := &progress.Bar{ Name: intent.Namespace(), Watching: dumpProgressor, BarLength: progressBarLength, } if dump.ProgressManager != nil { dump.ProgressManager.Attach(bar) defer dump.ProgressManager.Detach(bar) } err = dump.dumpIterToWriter(query.Iter(), intent.BSONFile, dumpProgressor) _, dumpCount := dumpProgressor.Progress() return dumpCount, err }
// dumpQueryToIntent takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. Returns a final count of documents // dumped, and any errors that occured. func (dump *MongoDump) dumpQueryToIntent( query *mgo.Query, intent *intents.Intent, buffer resettableOutputBuffer) (dumpCount int64, err error) { // restore of views from archives require an empty collection as the trigger to create the view // so, we open here before the early return if IsView so that we write an empty collection to the archive err = intent.BSONFile.Open() if err != nil { return 0, err } defer func() { closeErr := intent.BSONFile.Close() if err == nil && closeErr != nil { err = fmt.Errorf("error writing data for collection `%v` to disk: %v", intent.Namespace(), closeErr) } }() // don't dump any data for views being dumped as views if intent.IsView() && !dump.OutputOptions.ViewsAsCollections { return 0, nil } var total int if len(dump.query) == 0 { total, err = query.Count() if err != nil { return int64(0), fmt.Errorf("error reading from db: %v", err) } log.Logvf(log.DebugLow, "counted %v %v in %v", total, docPlural(int64(total)), intent.Namespace()) } else { log.Logvf(log.DebugLow, "not counting query on %v", intent.Namespace()) } dumpProgressor := progress.NewCounter(int64(total)) if dump.ProgressManager != nil { dump.ProgressManager.Attach(intent.Namespace(), dumpProgressor) defer dump.ProgressManager.Detach(intent.Namespace()) } var f io.Writer f = intent.BSONFile if buffer != nil { buffer.Reset(f) f = buffer defer func() { closeErr := buffer.Close() if err == nil && closeErr != nil { err = fmt.Errorf("error writing data for collection `%v` to disk: %v", intent.Namespace(), closeErr) } }() } err = dump.dumpIterToWriter(query.Iter(), f, dumpProgressor) dumpCount, _ = dumpProgressor.Progress() if err != nil { err = fmt.Errorf("error writing data for collection `%v` to disk: %v", intent.Namespace(), err) } return }
// dumpQueryToWriter takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. func (dump *MongoDump) dumpQueryToWriter( query *mgo.Query, intent *intents.Intent) (err error) { total, err := query.Count() if err != nil { return fmt.Errorf("error reading from db: %v", err) } log.Logf(log.Info, "\t%v documents", total) dumpProgressor := progress.NewCounter(int64(total)) bar := &progress.Bar{ Name: intent.Namespace(), Watching: dumpProgressor, BarLength: progressBarLength, } dump.progressManager.Attach(bar) defer dump.progressManager.Detach(bar) return dump.dumpIterToWriter(query.Iter(), intent.BSONFile, dumpProgressor) }
// Internal function that handles exporting to the given writer. Used primarily // for testing, because it bypasses writing to the file system. func (exp *MongoExport) exportInternal(out io.Writer) (int64, error) { max, err := exp.getCount() if err != nil { return 0, err } progressManager := progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime) progressManager.Start() defer progressManager.Stop() watchProgressor := progress.NewCounter(int64(max)) bar := &progress.Bar{ Name: fmt.Sprintf("%v.%v", exp.ToolOptions.Namespace.DB, exp.ToolOptions.Namespace.Collection), Watching: watchProgressor, BarLength: progressBarLength, } progressManager.Attach(bar) defer progressManager.Detach(bar) exportOutput, err := exp.getExportOutput(out) if err != nil { return 0, err } cursor, session, err := exp.getCursor() if err != nil { return 0, err } defer session.Close() defer cursor.Close() connURL := exp.ToolOptions.Host if connURL == "" { connURL = util.DefaultHost } if exp.ToolOptions.Port != "" { connURL = connURL + ":" + exp.ToolOptions.Port } log.Logf(log.Always, "connected to: %v", connURL) // Write headers err = exportOutput.WriteHeader() if err != nil { return 0, err } var result bson.M docsCount := int64(0) // Write document content for cursor.Next(&result) { err := exportOutput.ExportDocument(result) if err != nil { return docsCount, err } docsCount++ if docsCount%watchProgressorUpdateFrequency == 0 { watchProgressor.Set(docsCount) } } watchProgressor.Set(docsCount) if err := cursor.Err(); err != nil { return docsCount, err } // Write footers err = exportOutput.WriteFooter() if err != nil { return docsCount, err } exportOutput.Flush() return docsCount, nil }
// RestoreCollectionToDB pipes the given BSON data into the database. // Returns the number of documents restored and any errors that occured. func (restore *MongoRestore) RestoreCollectionToDB(dbName, colName string, bsonSource *db.DecodedBSONSource, file PosReader, fileSize int64) (int64, error) { var termErr error session, err := restore.SessionProvider.GetSession() if err != nil { return int64(0), fmt.Errorf("error establishing connection: %v", err) } session.SetSafe(restore.safety) defer session.Close() collection := session.DB(dbName).C(colName) documentCount := int64(0) watchProgressor := progress.NewCounter(fileSize) if restore.ProgressManager != nil { name := fmt.Sprintf("%v.%v", dbName, colName) restore.ProgressManager.Attach(name, watchProgressor) defer restore.ProgressManager.Detach(name) } maxInsertWorkers := restore.OutputOptions.NumInsertionWorkers if restore.OutputOptions.MaintainInsertionOrder { maxInsertWorkers = 1 } docChan := make(chan bson.Raw, insertBufferFactor) resultChan := make(chan error, maxInsertWorkers) // stream documents for this collection on docChan go func() { doc := bson.Raw{} for bsonSource.Next(&doc) { select { case <-restore.termChan: log.Logvf(log.Always, "terminating read on %v.%v", dbName, colName) termErr = util.ErrTerminated close(docChan) return default: rawBytes := make([]byte, len(doc.Data)) copy(rawBytes, doc.Data) docChan <- bson.Raw{Data: rawBytes} documentCount++ } } close(docChan) }() log.Logvf(log.DebugLow, "using %v insertion workers", maxInsertWorkers) for i := 0; i < maxInsertWorkers; i++ { go func() { // get a session copy for each insert worker s := session.Copy() defer s.Close() coll := collection.With(s) bulk := db.NewBufferedBulkInserter( coll, restore.OutputOptions.BulkBufferSize, !restore.OutputOptions.StopOnError) for rawDoc := range docChan { if restore.objCheck { err := bson.Unmarshal(rawDoc.Data, &bson.D{}) if err != nil { resultChan <- fmt.Errorf("invalid object: %v", err) return } } if err := bulk.Insert(rawDoc); err != nil { if db.IsConnectionError(err) || restore.OutputOptions.StopOnError { // Propagate this error, since it's either a fatal connection error // or the user has turned on --stopOnError resultChan <- err } else { // Otherwise just log the error but don't propagate it. log.Logvf(log.Always, "error: %v", err) } } watchProgressor.Set(file.Pos()) } err := bulk.Flush() if err != nil { if !db.IsConnectionError(err) && !restore.OutputOptions.StopOnError { // Suppress this error since it's not a severe connection error and // the user has not specified --stopOnError log.Logvf(log.Always, "error: %v", err) err = nil } } resultChan <- err return }() // sleep to prevent all threads from inserting at the same time at start time.Sleep(time.Duration(i) * 10 * time.Millisecond) } // wait until all insert jobs finish for done := 0; done < maxInsertWorkers; done++ { err := <-resultChan if err != nil { return int64(0), fmt.Errorf("insertion error: %v", err) } } // final error check if err = bsonSource.Err(); err != nil { return int64(0), fmt.Errorf("reading bson input: %v", err) } return documentCount, termErr }
// DumpIntent dumps the specified database's collection. func (dump *MongoDump) DumpIntent(intent *intents.Intent) error { session, err := dump.sessionProvider.GetSession() if err != nil { return err } defer session.Close() // in mgo, setting prefetch = 1.0 causes the driver to make requests for // more results as soon as results are returned. This effectively // duplicates the behavior of an exhaust cursor. session.SetPrefetch(1.0) err = intent.BSONFile.Open() if err != nil { return err } defer intent.BSONFile.Close() var findQuery *mgo.Query switch { case len(dump.query) > 0: findQuery = session.DB(intent.DB).C(intent.C).Find(dump.query) case dump.InputOptions.TableScan: // ---forceTablesScan runs the query without snapshot enabled findQuery = session.DB(intent.DB).C(intent.C).Find(nil) default: findQuery = session.DB(intent.DB).C(intent.C).Find(nil).Snapshot() } var dumpCount int64 if dump.OutputOptions.Out == "-" { log.Logvf(log.Always, "writing %v to stdout", intent.Namespace()) dumpCount, err = dump.dumpQueryToWriter(findQuery, intent) if err == nil { // on success, print the document count log.Logvf(log.Always, "dumped %v %v", dumpCount, docPlural(dumpCount)) } return err } // set where the intent will be written to if dump.OutputOptions.Archive != "" { if dump.OutputOptions.Archive == "-" { intent.Location = "archive on stdout" } else { intent.Location = fmt.Sprintf("archive '%v'", dump.OutputOptions.Archive) } } if !dump.OutputOptions.Repair { log.Logvf(log.Always, "writing %v to %v", intent.Namespace(), intent.Location) if dumpCount, err = dump.dumpQueryToWriter(findQuery, intent); err != nil { return err } } else { // handle repairs as a special case, since we cannot count them log.Logvf(log.Always, "writing repair of %v to %v", intent.Namespace(), intent.Location) repairIter := session.DB(intent.DB).C(intent.C).Repair() repairCounter := progress.NewCounter(1) // this counter is ignored if err := dump.dumpIterToWriter(repairIter, intent.BSONFile, repairCounter); err != nil { return fmt.Errorf("repair error: %v", err) } _, repairCount := repairCounter.Progress() log.Logvf(log.Always, "\trepair cursor found %v %v in %v", repairCount, docPlural(repairCount), intent.Namespace()) } log.Logvf(log.Always, "done dumping %v (%v %v)", intent.Namespace(), dumpCount, docPlural(dumpCount)) return nil }
// RestoreOplog attempts to restore a MongoDB oplog. func (restore *MongoRestore) RestoreOplog() error { log.Logv(log.Always, "replaying oplog") intent := restore.manager.Oplog() if intent == nil { // this should not be reached log.Logv(log.Always, "no oplog file provided, skipping oplog application") return nil } if err := intent.BSONFile.Open(); err != nil { return err } defer intent.BSONFile.Close() // NewBufferlessBSONSource reads each bson document into its own buffer // because bson.Unmarshal currently can't unmarshal binary types without // them referencing the source buffer bsonSource := db.NewDecodedBSONSource(db.NewBufferlessBSONSource(intent.BSONFile)) defer bsonSource.Close() rawOplogEntry := &bson.Raw{} var totalOps int64 var entrySize int oplogProgressor := progress.NewCounter(intent.BSONSize) if restore.ProgressManager != nil { restore.ProgressManager.Attach("oplog", oplogProgressor) defer restore.ProgressManager.Detach("oplog") } session, err := restore.SessionProvider.GetSession() if err != nil { return fmt.Errorf("error establishing connection: %v", err) } defer session.Close() for bsonSource.Next(rawOplogEntry) { entrySize = len(rawOplogEntry.Data) entryAsOplog := db.Oplog{} err = bson.Unmarshal(rawOplogEntry.Data, &entryAsOplog) if err != nil { return fmt.Errorf("error reading oplog: %v", err) } if entryAsOplog.Operation == "n" { //skip no-ops continue } if !restore.TimestampBeforeLimit(entryAsOplog.Timestamp) { log.Logvf( log.DebugLow, "timestamp %v is not below limit of %v; ending oplog restoration", entryAsOplog.Timestamp, restore.oplogLimit, ) break } totalOps++ oplogProgressor.Inc(int64(entrySize)) err = restore.ApplyOps(session, []interface{}{entryAsOplog}) if err != nil { return fmt.Errorf("error applying oplog: %v", err) } } log.Logvf(log.Info, "applied %v ops", totalOps) return nil }
// RestoreOplog attempts to restore a MongoDB oplog. func (restore *MongoRestore) RestoreOplog() error { log.Log(log.Always, "replaying oplog") intent := restore.manager.Oplog() if intent == nil { // this should not be reached log.Log(log.Always, "no oplog.bson file in root of the dump directory, skipping oplog application") return nil } if err := intent.BSONFile.Open(); err != nil { return err } defer intent.BSONFile.Close() bsonSource := db.NewDecodedBSONSource(db.NewBSONSource(intent.BSONFile)) defer bsonSource.Close() entryArray := make([]interface{}, 0, 1024) rawOplogEntry := &bson.Raw{} var totalOps int64 var entrySize, bufferedBytes int oplogProgressor := progress.NewCounter(intent.BSONSize) bar := progress.Bar{ Name: "oplog", Watching: oplogProgressor, WaitTime: 3 * time.Second, Writer: log.Writer(0), BarLength: progressBarLength, IsBytes: true, } bar.Start() defer bar.Stop() session, err := restore.SessionProvider.GetSession() if err != nil { return fmt.Errorf("error establishing connection: %v", err) } defer session.Close() // To restore the oplog, we iterate over the oplog entries, // filling up a buffer. Once the buffer reaches max document size, // apply the current buffered ops and reset the buffer. for bsonSource.Next(rawOplogEntry) { entrySize = len(rawOplogEntry.Data) if bufferedBytes+entrySize > oplogMaxCommandSize { err = restore.ApplyOps(session, entryArray) if err != nil { return fmt.Errorf("error applying oplog: %v", err) } entryArray = make([]interface{}, 0, 1024) bufferedBytes = 0 } entryAsOplog := db.Oplog{} err = bson.Unmarshal(rawOplogEntry.Data, &entryAsOplog) if err != nil { return fmt.Errorf("error reading oplog: %v", err) } if entryAsOplog.Operation == "n" { //skip no-ops continue } if !restore.TimestampBeforeLimit(entryAsOplog.Timestamp) { log.Logf( log.DebugLow, "timestamp %v is not below limit of %v; ending oplog restoration", entryAsOplog.Timestamp, restore.oplogLimit, ) break } totalOps++ bufferedBytes += entrySize oplogProgressor.Inc(int64(entrySize)) entryArray = append(entryArray, entryAsOplog) } // finally, flush the remaining entries if len(entryArray) > 0 { err = restore.ApplyOps(session, entryArray) if err != nil { return fmt.Errorf("error applying oplog: %v", err) } } log.Logf(log.Info, "applied %v ops", totalOps) return nil }
// RestoreOplog attempts to restore a MongoDB oplog. func (restore *MongoRestore) RestoreOplog() error { log.Logv(log.Always, "replaying oplog") intent := restore.manager.Oplog() if intent == nil { // this should not be reached log.Logv(log.Always, "no oplog file provided, skipping oplog application") return nil } if err := intent.BSONFile.Open(); err != nil { return err } defer intent.BSONFile.Close() // NewBufferlessBSONSource reads each bson document into its own buffer // because bson.Unmarshal currently can't unmarshal binary types without // them referencing the source buffer bsonSource := db.NewDecodedBSONSource(db.NewBufferlessBSONSource(intent.BSONFile)) defer bsonSource.Close() entryArray := make([]interface{}, 0, 1024) rawOplogEntry := &bson.Raw{} var totalOps int64 var entrySize, bufferedBytes int oplogProgressor := progress.NewCounter(intent.BSONSize) if restore.ProgressManager != nil { restore.ProgressManager.Attach("oplog", oplogProgressor) defer restore.ProgressManager.Detach("oplog") } session, err := restore.SessionProvider.GetSession() if err != nil { return fmt.Errorf("error establishing connection: %v", err) } defer session.Close() // To restore the oplog, we iterate over the oplog entries, // filling up a buffer. Once the buffer reaches max document size, // apply the current buffered ops and reset the buffer. for bsonSource.Next(rawOplogEntry) { entrySize = len(rawOplogEntry.Data) if bufferedBytes+entrySize > oplogMaxCommandSize { err = restore.ApplyOps(session, entryArray) if err != nil { return fmt.Errorf("error applying oplog: %v", err) } entryArray = make([]interface{}, 0, 1024) bufferedBytes = 0 } entryAsOplog := db.Oplog{} err = bson.Unmarshal(rawOplogEntry.Data, &entryAsOplog) if err != nil { return fmt.Errorf("error reading oplog: %v", err) } if entryAsOplog.Operation == "n" { //skip no-ops continue } if !restore.TimestampBeforeLimit(entryAsOplog.Timestamp) { log.Logvf( log.DebugLow, "timestamp %v is not below limit of %v; ending oplog restoration", entryAsOplog.Timestamp, restore.oplogLimit, ) break } totalOps++ bufferedBytes += entrySize oplogProgressor.Inc(int64(entrySize)) entryArray = append(entryArray, entryAsOplog) } // finally, flush the remaining entries if len(entryArray) > 0 { err = restore.ApplyOps(session, entryArray) if err != nil { return fmt.Errorf("error applying oplog: %v", err) } } log.Logvf(log.Info, "applied %v ops", totalOps) return nil }