// BuildWriteConcern takes a string and a NodeType indicating the type of node the write concern // is intended to be used against, and converts the write concern string argument into an // mgo.Safe object that's usable on sessions for that node type. func BuildWriteConcern(writeConcern string, nodeType NodeType) (*mgo.Safe, error) { sessionSafety, err := constructWCObject(writeConcern) if err != nil { return nil, err } if sessionSafety == nil { log.Logf(log.DebugLow, "using unacknowledged write concern") return nil, nil } // for standalone mongods, set the default write concern to 1 if nodeType == Standalone { log.Logf(log.DebugLow, "standalone server: setting write concern %v to 1", w) sessionSafety.W = 1 sessionSafety.WMode = "" } var writeConcernStr interface{} if sessionSafety.WMode != "" { writeConcernStr = sessionSafety.WMode } else { writeConcernStr = sessionSafety.W } log.Logf(log.Info, "using write concern: %v='%v', %v=%v, %v=%v, %v=%v", w, writeConcernStr, j, sessionSafety.J, fSync, sessionSafety.FSync, wTimeout, sessionSafety.WTimeout, ) return sessionSafety, nil }
// handleBSONInsteadOfDirectory updates -d and -c settings based on // the path to the BSON file passed to mongorestore. This is only // applicable if the target path points to a .bson file. // // As an example, when the user passes 'dump/mydb/col.bson', this method // will infer that 'mydb' is the database and 'col' is the collection name. func (restore *MongoRestore) handleBSONInsteadOfDirectory(path string) error { // we know we have been given a non-directory, so we should handle it // like a bson file and infer as much as we can if restore.ToolOptions.Collection == "" { // if the user did not set -c, use the file name for the collection newCollectionName, fileType := restore.getInfoFromFilename(path) if fileType != BSONFileType { return fmt.Errorf("file %v does not have .bson extension", path) } restore.ToolOptions.Collection = newCollectionName log.Logf(log.DebugLow, "inferred collection '%v' from file", restore.ToolOptions.Collection) } if restore.ToolOptions.DB == "" { // if the user did not set -d, use the directory containing the target // file as the db name (as it would be in a dump directory). If // we cannot determine the directory name, use "test" dirForFile := filepath.Base(filepath.Dir(path)) if dirForFile == "." || dirForFile == ".." { dirForFile = "test" } restore.ToolOptions.DB = dirForFile log.Logf(log.DebugLow, "inferred db '%v' from the file's directory", restore.ToolOptions.DB) } return nil }
// dumpQueryToWriter takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. Returns a final count of documents // dumped, and any errors that occured. func (dump *MongoDump) dumpQueryToWriter( query *mgo.Query, intent *intents.Intent) (int64, error) { var total int var err error if len(dump.query) == 0 { total, err = query.Count() if err != nil { return int64(0), fmt.Errorf("error reading from db: %v", err) } log.Logf(log.DebugLow, "counted %v %v in %v", total, docPlural(int64(total)), intent.Namespace()) } else { log.Logf(log.DebugLow, "not counting query on %v", intent.Namespace()) } dumpProgressor := progress.NewCounter(int64(total)) bar := &progress.Bar{ Name: intent.Namespace(), Watching: dumpProgressor, BarLength: progressBarLength, } dump.progressManager.Attach(bar) defer dump.progressManager.Detach(bar) err = dump.dumpIterToWriter(query.Iter(), intent.BSONFile, dumpProgressor) _, dumpCount := dumpProgressor.Progress() return dumpCount, err }
// determineOplogCollectionName uses a command to infer // the name of the oplog collection in the connected db func (dump *MongoDump) determineOplogCollectionName() error { session := dump.SessionProvider.GetSession() masterDoc := bson.M{} err := session.Run("isMaster", &masterDoc) if err != nil { return fmt.Errorf("error running command: %v", err) } if _, ok := masterDoc["hosts"]; ok { log.Logf(2, "determined cluster to be a replica set") log.Logf(3, "oplog located in local.oplog.rs") dump.oplogCollection = "oplog.rs" return nil } if isMaster := masterDoc["ismaster"]; util.IsFalsy(isMaster) { log.Logf(1, "mongodump is not connected to a master") return fmt.Errorf("not connected to master") } // TODO stop assuming master/slave, be smarter and check if it is really // master/slave...though to be fair legacy mongodump doesn't do this either... log.Logf(2, "not connected to a replica set, assuming master/slave") log.Logf(3, "oplog located in local.oplog.$main") dump.oplogCollection = "oplog.$main" return nil }
// Dump handles some final options checking and executes MongoDump func (dump *MongoDump) Dump() error { err := dump.ValidateOptions() if err != nil { return fmt.Errorf("Bad Option: %v", err) } if dump.InputOptions.Query != "" { // TODO, check for extended json support... // gonna need to do some exploring later on, since im 95% sure // this is undefined in the current tools err = json.Unmarshal([]byte(dump.InputOptions.Query), &dump.query) if err != nil { return fmt.Errorf("error parsing query: %v", err) } } if dump.OutputOptions.Out == "-" { dump.useStdout = true } if dump.OutputOptions.DumpDBUsersAndRoles { //first make sure this is possible with the connected database dump.authVersion, err = auth.GetAuthVersion(dump.SessionProvider.GetSession()) if err != nil { return fmt.Errorf("error getting auth schema version for dumpDbUsersAndRoles: %v", err) } log.Logf(2, "using auth schema version %v", dump.authVersion) if dump.authVersion != 3 { return fmt.Errorf("backing up users and roles is only supported for "+ "deployments with auth schema versions 3, found: %v", dump.authVersion) } } //switch on what kind of execution to do switch { case dump.ToolOptions.DB == "" && dump.ToolOptions.Collection == "": err = dump.DumpEverything() case dump.ToolOptions.DB != "" && dump.ToolOptions.Collection == "": err = dump.DumpDatabase(dump.ToolOptions.DB) case dump.ToolOptions.DB != "" && dump.ToolOptions.Collection != "": err = dump.DumpCollection(dump.ToolOptions.DB, dump.ToolOptions.Collection) } if dump.OutputOptions.DumpDBUsersAndRoles { log.Logf(0, "dumping users and roles for %v", dump.ToolOptions.DB) if dump.ToolOptions.DB == "admin" { log.Logf(0, "skipping users/roles dump, already dumped admin database") } else { err = dump.DumpUsersAndRolesForDB(dump.ToolOptions.DB) if err != nil { return fmt.Errorf("error dumping users and roles: %v", err) } } } log.Logf(1, "done") return err }
// CreateIntentForCollection builds an intent for the given database and collection name // along with a path to a .bson collection file. It searches the file's parent directory // for a matching metadata file. // // This method is not called by CreateIntentsForDB, // it is only used in the case where --db and --collection flags are set. func (restore *MongoRestore) CreateIntentForCollection(db string, collection string, dir archive.DirLike) error { log.Logf(log.DebugLow, "reading collection %v for database %v from %v", collection, db, dir.Path()) // first make sure the bson file exists and is valid _, err := dir.Stat() if err != nil { return err } if dir.IsDir() { return fmt.Errorf("file %v is a directory, not a bson file", dir.Path()) } baseName, fileType := restore.getInfoFromFilename(dir.Name()) if fileType != BSONFileType { return fmt.Errorf("file %v does not have .bson extension", dir.Path()) } // then create its intent intent := &intents.Intent{ DB: db, C: collection, Size: dir.Size(), Location: dir.Path(), } intent.BSONFile = &realBSONFile{path: dir.Path(), intent: intent, gzip: restore.InputOptions.Gzip} // finally, check if it has a .metadata.json file in its folder log.Logf(log.DebugLow, "scanning directory %v for metadata", dir.Name()) entries, err := dir.Parent().ReadDir() if err != nil { // try and carry on if we can log.Logf(log.Info, "error attempting to locate metadata for file: %v", err) log.Log(log.Info, "restoring collection without metadata") restore.manager.Put(intent) return nil } metadataName := baseName + ".metadata.json" if restore.InputOptions.Gzip { metadataName += ".gz" } for _, entry := range entries { if entry.Name() == metadataName { metadataPath := entry.Path() log.Logf(log.Info, "found metadata for collection at %v", metadataPath) intent.MetadataLocation = metadataPath intent.MetadataFile = &realMetadataFile{path: metadataPath, intent: intent, gzip: restore.InputOptions.Gzip} break } } if intent.MetadataFile == nil { log.Log(log.Info, "restoring collection without metadata") } restore.manager.Put(intent) return nil }
// Run creates and runs a parser with the Demultiplexer as a consumer func (demux *Demultiplexer) Run() error { parser := Parser{In: demux.In} err := parser.ReadAllBlocks(demux) if len(demux.outs) > 0 { log.Logf(log.Always, "demux finishing when there are still outs (%v)", len(demux.outs)) } log.Logf(log.DebugLow, "demux finishing (err:%v)", err) return err }
// RestoreIntents iterates through all of the intents stored in the IntentManager, and restores them. func (restore *MongoRestore) RestoreIntents() error { // start up the progress bar manager restore.progressManager = progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime) restore.progressManager.Start() defer restore.progressManager.Stop() log.Logf(log.DebugLow, "restoring up to %v collections in parallel", restore.OutputOptions.NumParallelCollections) if restore.OutputOptions.NumParallelCollections > 0 { resultChan := make(chan error) // start a goroutine for each job thread for i := 0; i < restore.OutputOptions.NumParallelCollections; i++ { go func(id int) { log.Logf(log.DebugHigh, "starting restore routine with id=%v", id) for { intent := restore.manager.Pop() if intent == nil { log.Logf(log.DebugHigh, "ending restore routine with id=%v, no more work to do", id) resultChan <- nil // done return } err := restore.RestoreIntent(intent) if err != nil { resultChan <- fmt.Errorf("%v: %v", intent.Namespace(), err) return } restore.manager.Finish(intent) } }(i) } // wait until all goroutines are done or one of them errors out for i := 0; i < restore.OutputOptions.NumParallelCollections; i++ { if err := <-resultChan; err != nil { return err } } return nil } // single-threaded for { intent := restore.manager.Pop() if intent == nil { return nil } err := restore.RestoreIntent(intent) if err != nil { return fmt.Errorf("%v: %v", intent.Namespace(), err) } restore.manager.Finish(intent) } return nil }
// validateReaderFields is a helper to validate fields for input readers func validateReaderFields(fields []string) error { if err := validateFields(fields); err != nil { return err } if len(fields) == 1 { log.Logf(log.Info, "using field: %v", fields[0]) } else { log.Logf(log.Info, "using fields: %v", strings.Join(fields, ",")) } return nil }
// handle logic for 'put' command. func (mf *MongoFiles) handlePut(gfs *mgo.GridFS) (output string, err error) { localFileName := mf.getLocalFileName(nil) // check if --replace flag turned on if mf.StorageOptions.Replace { err := gfs.Remove(mf.FileName) if err != nil { return "", err } output = fmt.Sprintf("removed all instances of '%v' from GridFS\n", mf.FileName) } var localFile io.ReadCloser if localFileName == "-" { localFile = os.Stdin } else { localFile, err = os.Open(localFileName) if err != nil { return "", fmt.Errorf("error while opening local file '%v' : %v\n", localFileName, err) } defer localFile.Close() log.Logf(log.DebugLow, "creating GridFS file '%v' from local file '%v'", mf.FileName, localFileName) } gFile, err := gfs.Create(mf.FileName) if err != nil { return "", fmt.Errorf("error while creating '%v' in GridFS: %v\n", mf.FileName, err) } defer func() { // GridFS files flush a buffer on Close(), so it's important we // capture any errors that occur as this function exits and // overwrite the error if earlier writes executed successfully if closeErr := gFile.Close(); err == nil && closeErr != nil { log.Logf(log.DebugHigh, "error occurred while closing GridFS file handler") err = fmt.Errorf("error while storing '%v' into GridFS: %v\n", localFileName, closeErr) } }() // set optional mime type if mf.StorageOptions.ContentType != "" { gFile.SetContentType(mf.StorageOptions.ContentType) } n, err := io.Copy(gFile, localFile) if err != nil { return "", fmt.Errorf("error while storing '%v' into GridFS: %v\n", localFileName, err) } log.Logf(log.DebugLow, "copied %v bytes to server", n) output += fmt.Sprintf("added file: %v\n", gFile.Name()) return output, nil }
// DumpCollection dumps the specified database's collection func (dump *MongoDump) DumpCollection(db, c string) error { session := dump.SessionProvider.GetSession() // in mgo, setting prefetch = 1.0 causes the driver to make requests for // more results as soon as results are returned. This effectively // duplicates the behavior of an exhaust cursor. session.SetPrefetch(1.0) var findQuery *mgo.Query switch { case len(dump.query) > 0: findQuery = session.DB(db).C(c).Find(dump.query) case dump.InputOptions.TableScan: // ---forceTablesScan runs the query without snapshot enabled findQuery = session.DB(db).C(c).Find(nil) default: findQuery = session.DB(db).C(c).Find(nil).Snapshot() } if dump.useStdout { log.Logf(0, "writing %v.%v to stdout", db, c) return dump.dumpQueryToWriter(findQuery, os.Stdout) } else { dbFolder := filepath.Join(dump.OutputOptions.Out, db) err := os.MkdirAll(dbFolder, 0755) if err != nil { return fmt.Errorf("error creating directory `%v`: %v", dbFolder, err) } outFilepath := filepath.Join(dbFolder, fmt.Sprintf("%v.bson", c)) out, err := os.Create(outFilepath) if err != nil { return fmt.Errorf("error creating bson file `%v`: %v", outFilepath, err) } defer out.Close() log.Logf(0, "writing %v.%v to %v", db, c, outFilepath) err = dump.dumpQueryToWriter(findQuery, out) if err != nil { return err } metadataFilepath := filepath.Join(dbFolder, fmt.Sprintf("%v.metadata.json", c)) metaOut, err := os.Create(metadataFilepath) if err != nil { return fmt.Errorf("error creating metadata.json file `%v`: %v", outFilepath, err) } defer metaOut.Close() log.Logf(0, "writing %v.%v metadata to %v", db, c, metadataFilepath) return dump.dumpMetadataToWriter(db, c, metaOut) } }
// Convert implements the Converter interface for JSON input. It converts a // JSONConverter struct to a BSON document. func (c JSONConverter) Convert() (bson.D, error) { document, err := json.UnmarshalBsonD(c.data) if err != nil { return nil, fmt.Errorf("error unmarshaling bytes on document #%v: %v", c.index, err) } log.Logf(log.DebugHigh, "got line: %v", document) bsonD, err := bsonutil.GetExtendedBsonD(document) if err != nil { return nil, fmt.Errorf("error getting extended BSON for document #%v: %v", c.index, err) } log.Logf(log.DebugHigh, "got extended line: %#v", bsonD) return bsonD, nil }
// HeaderBSON is part of the ParserConsumer interface and receives headers from parser. // Its main role is to implement opens and EOFs of the embedded stream. func (demux *Demultiplexer) HeaderBSON(buf []byte) error { colHeader := NamespaceHeader{} err := bson.Unmarshal(buf, &colHeader) if err != nil { return newWrappedError("header bson doesn't unmarshal as a collection header", err) } log.Logf(log.DebugHigh, "demux namespaceHeader: %v", colHeader) if colHeader.Collection == "" { return newError("collection header is missing a Collection") } demux.currentNamespace = colHeader.Database + "." + colHeader.Collection if _, ok := demux.outs[demux.currentNamespace]; !ok { if demux.NamespaceChan != nil { demux.NamespaceChan <- demux.currentNamespace err := <-demux.NamespaceErrorChan if err == io.EOF { // if the Prioritizer sends us back an io.EOF then it's telling us that // it's finishing and doesn't need any more namespace announcements. close(demux.NamespaceChan) demux.NamespaceChan = nil return nil } if err != nil { return newWrappedError("failed arranging a consumer for new namespace", err) } } } if colHeader.EOF { crc := int64(demux.hashes[demux.currentNamespace].Sum64()) length := int64(demux.lengths[demux.currentNamespace]) if crc != colHeader.CRC { return fmt.Errorf("CRC mismatch for namespace %v, %v!=%v", demux.currentNamespace, crc, colHeader.CRC, ) } log.Logf(log.DebugHigh, "demux checksum for namespace %v is correct (%v), %v bytes", demux.currentNamespace, crc, length) demux.outs[demux.currentNamespace].Close() delete(demux.outs, demux.currentNamespace) delete(demux.hashes, demux.currentNamespace) delete(demux.lengths, demux.currentNamespace) // in case we get a BSONBody with this block, // we want to ensure that that causes an error demux.currentNamespace = "" } return nil }
func (dump *MongoDump) createIntentFromOptions(dbName string, ci *collectionInfo) error { if dump.shouldSkipCollection(ci.Name) { log.Logf(log.DebugLow, "skipping dump of %v.%v, it is excluded", dbName, ci.Name) return nil } intent, err := dump.NewIntent(dbName, ci.Name) if err != nil { return err } intent.Options = ci.Options dump.manager.Put(intent) log.Logf(log.DebugLow, "enqueued collection '%v'", intent.Namespace()) return nil }
// checkOplogTimestampExists checks to make sure the oplog hasn't rolled over // since mongodump started. It does this by checking the oldest oplog entry // still in the database and making sure it happened at or before the timestamp // captured at the start of the dump. func (dump *MongoDump) checkOplogTimestampExists(ts bson.MongoTimestamp) (bool, error) { oldestOplogEntry := db.Oplog{} err := dump.sessionProvider.FindOne("local", dump.oplogCollection, 0, nil, []string{"+$natural"}, &oldestOplogEntry, 0) if err != nil { return false, fmt.Errorf("unable to read entry from oplog: %v", err) } log.Logf(log.DebugHigh, "oldest oplog entry has timestamp %v", oldestOplogEntry.Timestamp) if oldestOplogEntry.Timestamp > ts { log.Logf(log.Info, "oldest oplog entry of timestamp %v is older than %v", oldestOplogEntry.Timestamp, ts) return false, nil } return true, nil }
// Run executes the mongotop program. func (mt *MongoTop) Run() error { connURL := mt.Options.Host if connURL == "" { connURL = "127.0.0.1" } if mt.Options.Port != "" { connURL = connURL + ":" + mt.Options.Port } hasData := false numPrinted := 0 for { if mt.OutputOptions.RowCount > 0 && numPrinted > mt.OutputOptions.RowCount { return nil } numPrinted++ diff, err := mt.runDiff() if err != nil { // If this is the first time trying to poll the server and it fails, // just stop now instead of trying over and over. if !hasData { return err } log.Logf(log.Always, "Error: %v\n", err) time.Sleep(mt.Sleeptime) } // if this is the first time and the connection is successful, print // the connection message if !hasData && !mt.OutputOptions.Json { log.Logf(log.Always, "connected to: %v\n", connURL) } hasData = true if diff != nil { if mt.OutputOptions.Json { fmt.Println(diff.JSON()) } else { fmt.Println(diff.Grid()) } } time.Sleep(mt.Sleeptime) } }
func GetCollections(database *mgo.Database, name string) (*mgo.Iter, bool, error) { var cmdResult struct { Cursor struct { FirstBatch []bson.Raw "firstBatch" NS string Id int64 } } command := bson.D{{"listCollections", 1}, {"cursor", bson.M{}}} if len(name) > 0 { command = bson.D{{"listCollections", 1}, {"filter", bson.M{"name": name}}, {"cursor", bson.M{}}} } err := database.Run(command, &cmdResult) switch { case err == nil: ns := strings.SplitN(cmdResult.Cursor.NS, ".", 2) if len(ns) < 2 { return nil, false, fmt.Errorf("server returned invalid cursor.ns `%v` on listCollections for `%v`: %v", cmdResult.Cursor.NS, database.Name, err) } return database.Session.DB(ns[0]).C(ns[1]).NewIter(database.Session, cmdResult.Cursor.FirstBatch, cmdResult.Cursor.Id, nil), false, nil case IsNoCmd(err): log.Logf(log.DebugLow, "No support for listCollections command, falling back to querying system.namespaces") iter, err := getCollectionsPre28(database, name) return iter, true, err default: return nil, false, fmt.Errorf("error running `listCollections`. Database: `%v` Err: %v", database.Name, err) } }
// Init performs preliminary setup operations for MongoDump. func (dump *MongoDump) Init() error { err := dump.ValidateOptions() if err != nil { return fmt.Errorf("bad option: %v", err) } dump.sessionProvider, err = db.NewSessionProvider(*dump.ToolOptions) if err != nil { return fmt.Errorf("can't create session: %v", err) } // allow secondary reads for the isMongos check dump.sessionProvider.SetFlags(db.Monotonic) dump.isMongos, err = dump.sessionProvider.IsMongos() if err != nil { return err } // ensure we allow secondary reads on mongods and disable TCP timeouts flags := db.DisableSocketTimeout if dump.isMongos { log.Logf(log.Info, "connecting to mongos; secondary reads disabled") } else { flags |= db.Monotonic } dump.sessionProvider.SetFlags(flags) // return a helpful error message for mongos --repair if dump.OutputOptions.Repair && dump.isMongos { return fmt.Errorf("--repair flag cannot be used on a mongos") } dump.manager = intents.NewIntentManager() dump.progressManager = progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime) return nil }
// CreateIntentForOplog creates an intent for a file that we want to treat as an oplog. func (restore *MongoRestore) CreateIntentForOplog() error { target, err := newActualPath(restore.InputOptions.OplogFile) db := "" collection := "oplog" if err != nil { return err } log.Logf(log.DebugLow, "reading oplog from %v", target.Path()) if target.IsDir() { return fmt.Errorf("file %v is a directory, not a bson file", target.Path()) } // Then create its intent. intent := &intents.Intent{ DB: db, C: collection, Size: target.Size(), Location: target.Path(), } intent.BSONFile = &realBSONFile{path: target.Path(), intent: intent, gzip: restore.InputOptions.Gzip} restore.manager.PutOplogIntent(intent, "oplogFile") return nil }
// GetIndexes returns an iterator to thethe raw index info for a collection by // using the listIndexes command if available, or by falling back to querying // against system.indexes (pre-3.0 systems). func GetIndexes(coll *mgo.Collection) (*mgo.Iter, error) { var cmdResult struct { Cursor struct { FirstBatch []bson.Raw "firstBatch" NS string Id int64 } } err := coll.Database.Run(bson.D{{"listIndexes", coll.Name}, {"cursor", bson.M{}}}, &cmdResult) switch { case err == nil: ns := strings.SplitN(cmdResult.Cursor.NS, ".", 2) if len(ns) < 2 { return nil, fmt.Errorf("server returned invalid cursor.ns `%v` on listIndexes for `%v`: %v", cmdResult.Cursor.NS, coll.FullName, err) } ses := coll.Database.Session return ses.DB(ns[0]).C(ns[1]).NewIter(ses, cmdResult.Cursor.FirstBatch, cmdResult.Cursor.Id, nil), nil case IsNoCmd(err): log.Logf(log.DebugLow, "No support for listIndexes command, falling back to querying system.indexes") return getIndexesPre28(coll) default: return nil, fmt.Errorf("error running `listIndexes`. Collection: `%v` Err: %v", coll.FullName, err) } }
// Init performs preliminary setup operations for MongoDump. func (dump *MongoDump) Init() error { err := dump.ValidateOptions() if err != nil { return fmt.Errorf("bad option: %v", err) } if dump.stdout == nil { dump.stdout = os.Stdout } dump.sessionProvider, err = db.NewSessionProvider(*dump.ToolOptions) if err != nil { return fmt.Errorf("can't create session: %v", err) } // temporarily allow secondary reads for the isMongos check dump.sessionProvider.SetReadPreference(mgo.Nearest) dump.isMongos, err = dump.sessionProvider.IsMongos() if err != nil { return err } if dump.isMongos && dump.OutputOptions.Oplog { return fmt.Errorf("can't use --oplog option when dumping from a mongos") } var mode mgo.Mode if dump.ToolOptions.ReplicaSetName != "" || dump.isMongos { mode = mgo.Primary } else { mode = mgo.Nearest } var tags bson.D if dump.InputOptions.ReadPreference != "" { mode, tags, err = db.ParseReadPreference(dump.InputOptions.ReadPreference) if err != nil { return fmt.Errorf("error parsing --readPreference : %v", err) } if len(tags) > 0 { dump.sessionProvider.SetTags(tags) } } // warn if we are trying to dump from a secondary in a sharded cluster if dump.isMongos && mode != mgo.Primary { log.Logf(log.Always, db.WarningNonPrimaryMongosConnection) } dump.sessionProvider.SetReadPreference(mode) dump.sessionProvider.SetTags(tags) dump.sessionProvider.SetFlags(db.DisableSocketTimeout) // return a helpful error message for mongos --repair if dump.OutputOptions.Repair && dump.isMongos { return fmt.Errorf("--repair flag cannot be used on a mongos") } dump.manager = intents.NewIntentManager() dump.progressManager = progress.NewProgressBarManager(log.Writer(0), progressBarWaitTime) return nil }
// Watch spawns a goroutine to continuously collect and process stats for // a single node on a regular interval. At each interval, the goroutine triggers // the node's Report function with the 'discover' and 'out' channels. func (node *NodeMonitor) Watch(sleep time.Duration, discover chan string, cluster ClusterMonitor) { go func() { cycle := uint64(0) for { sampleDiff := int64(sleep / time.Second) log.Logf(log.DebugHigh, "polling server: %v", node.host) statLine := node.Poll(discover, node.All, cycle%10 == 1, sampleDiff) if statLine != nil { log.Logf(log.DebugHigh, "successfully got statline from host: %v", node.host) cluster.Update(*statLine) } time.Sleep(sleep) cycle++ } }() }
// dumpQueryToWriter takes an mgo Query, its intent, and a writer, performs the query, // and writes the raw bson results to the writer. Returns a final count of documents // dumped, and any errors that occured. func (dump *MongoDump) dumpQueryToWriter( query *mgo.Query, intent *intents.Intent) (int64, error) { total, err := query.Count() if err != nil { return int64(0), fmt.Errorf("error reading from db: %v", err) } log.Logf(log.Info, "\tcounted %v %v in %v", total, docPlural(int64(total)), intent.Namespace()) dumpProgressor := progress.NewCounter(int64(total)) bar := &progress.Bar{ Name: intent.Namespace(), Watching: dumpProgressor, BarLength: progressBarLength, } if dump.ProgressManager != nil { dump.ProgressManager.Attach(bar) defer dump.ProgressManager.Detach(bar) } err = dump.dumpIterToWriter(query.Iter(), intent.BSONFile, dumpProgressor) _, dumpCount := dumpProgressor.Progress() return dumpCount, err }
// JSON iterates through the BSON file and for each document it finds, // recursively descends into objects and arrays and prints the human readable // JSON representation. // It returns the number of documents processed and a non-nil error if one is // encountered before the end of the file is reached. func (bd *BSONDump) JSON() (int, error) { numFound := 0 if bd.BSONSource == nil { panic("Tried to call JSON() before opening file") } decodedStream := db.NewDecodedBSONSource(bd.BSONSource) var result bson.Raw for decodedStream.Next(&result) { if err := printJSON(&result, bd.Out, bd.BSONDumpOptions.Pretty); err != nil { log.Logf(log.Always, "unable to dump document %v: %v", numFound+1, err) //if objcheck is turned on, stop now. otherwise keep on dumpin' if bd.BSONDumpOptions.ObjCheck { return numFound, err } } else { _, err := bd.Out.Write([]byte("\n")) if err != nil { return numFound, err } } numFound++ } if err := decodedStream.Err(); err != nil { return numFound, err } return numFound, nil }
// DumpIntents iterates through the previously-created intents and // dumps all of the found collections. func (dump *MongoDump) DumpIntents() error { resultChan := make(chan error) var jobs int if dump.ToolOptions != nil && dump.ToolOptions.HiddenOptions != nil { jobs = dump.ToolOptions.HiddenOptions.MaxProcs } jobs = util.MaxInt(jobs, 1) if jobs > 1 { dump.manager.Finalize(intents.LongestTaskFirst) } else { dump.manager.Finalize(intents.Legacy) } log.Logf(log.Info, "dumping with %v job threads", jobs) // start a goroutine for each job thread for i := 0; i < jobs; i++ { go func(id int) { log.Logf(log.DebugHigh, "starting dump routine with id=%v", id) for { intent := dump.manager.Pop() if intent == nil { log.Logf(log.DebugHigh, "ending dump routine with id=%v, no more work to do", id) resultChan <- nil return } err := dump.DumpIntent(intent) if err != nil { resultChan <- err return } dump.manager.Finish(intent) } }(i) } // wait until all goroutines are done or one of them errors out for i := 0; i < jobs; i++ { if err := <-resultChan; err != nil { return err } } return nil }
// DumpIntents iterates through the previously-created intents and // dumps all of the found collections. func (dump *MongoDump) DumpIntents() error { resultChan := make(chan error) jobs := dump.OutputOptions.NumParallelCollections if numIntents := len(dump.manager.Intents()); jobs > numIntents { jobs = numIntents } if jobs > 1 { dump.manager.Finalize(intents.LongestTaskFirst) } else { dump.manager.Finalize(intents.Legacy) } log.Logf(log.Info, "dumping up to %v collections in parallel", jobs) // start a goroutine for each job thread for i := 0; i < jobs; i++ { go func(id int) { log.Logf(log.DebugHigh, "starting dump routine with id=%v", id) for { intent := dump.manager.Pop() if intent == nil { log.Logf(log.DebugHigh, "ending dump routine with id=%v, no more work to do", id) resultChan <- nil return } err := dump.DumpIntent(intent) if err != nil { resultChan <- err return } dump.manager.Finish(intent) } }(i) } // wait until all goroutines are done or one of them errors out for i := 0; i < jobs; i++ { if err := <-resultChan; err != nil { return err } } return nil }
// runInsertionWorker is a helper to InsertDocuments - it reads document off // the read channel and prepares then in batches for insertion into the databas func (imp *MongoImport) runInsertionWorker(readDocs chan bson.D) (err error) { session, err := imp.SessionProvider.GetSession() if err != nil { return fmt.Errorf("error connecting to mongod: %v", err) } defer session.Close() if err = imp.configureSession(session); err != nil { return fmt.Errorf("error configuring session: %v", err) } collection := session.DB(imp.ToolOptions.DB).C(imp.ToolOptions.Collection) ignoreBlanks := imp.IngestOptions.IgnoreBlanks && imp.InputOptions.Type != JSON var documentBytes []byte var documents []bson.Raw numMessageBytes := 0 readLoop: for { select { case document, alive := <-readDocs: if !alive { break readLoop } // the mgo driver doesn't currently respect the maxBatchSize // limit so we self impose a limit by using maxMessageSizeBytes // and send documents over the wire when we hit the batch size // or when we're at/over the maximum message size threshold if len(documents) == imp.ToolOptions.BulkBufferSize || numMessageBytes >= maxMessageSizeBytes { if err = imp.insert(documents, collection); err != nil { return err } documents = documents[:0] numMessageBytes = 0 } // ignore blank fields if specified if ignoreBlanks { document = removeBlankFields(document) } if documentBytes, err = bson.Marshal(document); err != nil { return err } if len(documentBytes) > maxBSONSize { log.Logf(log.Always, "warning: attempting to insert document with size %v (exceeds %v limit)", text.FormatByteAmount(int64(len(documentBytes))), text.FormatByteAmount(maxBSONSize)) } numMessageBytes += len(documentBytes) documents = append(documents, bson.Raw{3, documentBytes}) case <-imp.Dying(): return nil } } // ingest any documents left in slice if len(documents) != 0 { return imp.insert(documents, collection) } return nil }
// checkOplogTimestampExists checks to make sure the oplog hasn't rolled over // since mongodump started. It does this by checking the oldest oplog entry // still in the database and making sure it happened at or before the timestamp // captured at the start of the dump. func (dump *MongoDump) checkOplogTimestampExists(ts bson.MongoTimestamp) (bool, error) { session := dump.SessionProvider.GetSession() oldestOplogEntry := Oplog{} collection := session.DB("local").C(dump.oplogCollection) err := collection.Find(bson.M{}).Sort("$natural").Limit(1).One(&oldestOplogEntry) if err != nil { return false, fmt.Errorf("unable to read entry from oplog: %v", err) } log.Logf(3, "oldest oplog entry has timestamp %v", oldestOplogEntry.Timestamp) if oldestOplogEntry.Timestamp > ts { log.Logf(1, "oldest oplog entry of timestamp %v is older than %v", oldestOplogEntry.Timestamp, ts) return false, nil } return true, nil }
// getSourceReader returns an io.Reader to read from the input source. Also // returns a progress.Progressor which can be used to track progress if the // reader supports it. func (imp *MongoImport) getSourceReader() (io.ReadCloser, int64, error) { if imp.InputOptions.File != "" { file, err := os.Open(util.ToUniversalPath(imp.InputOptions.File)) if err != nil { return nil, -1, err } fileStat, err := file.Stat() if err != nil { return nil, -1, err } log.Logf(log.Info, "filesize: %v bytes", fileStat.Size()) return file, int64(fileStat.Size()), err } log.Logf(log.Info, "reading from stdin") // Stdin has undefined max size, so return 0 return os.Stdin, 0, nil }
// AddMetadata adds a metadata data structure to a prelude and does the required bookkeeping. func (prelude *Prelude) AddMetadata(cm *CollectionMetadata) { prelude.NamespaceMetadatas = append(prelude.NamespaceMetadatas, cm) if prelude.NamespaceMetadatasByDB == nil { prelude.NamespaceMetadatasByDB = make(map[string][]*CollectionMetadata) } _, ok := prelude.NamespaceMetadatasByDB[cm.Database] if !ok { prelude.DBS = append(prelude.DBS, cm.Database) } prelude.NamespaceMetadatasByDB[cm.Database] = append(prelude.NamespaceMetadatasByDB[cm.Database], cm) log.Logf(log.Info, "archive prelude %v.%v", cm.Database, cm.Collection) }