// runInsertionWorker is a helper to InsertDocuments - it reads document off // the read channel and prepares then in batches for insertion into the databas func (imp *MongoImport) runInsertionWorker(readDocs chan bson.D) (err error) { session, err := imp.SessionProvider.GetSession() if err != nil { return fmt.Errorf("error connecting to mongod: %v", err) } defer session.Close() if err = imp.configureSession(session); err != nil { return fmt.Errorf("error configuring session: %v", err) } collection := session.DB(imp.ToolOptions.DB).C(imp.ToolOptions.Collection) var inserter flushInserter if imp.IngestOptions.Mode == modeInsert { inserter = db.NewBufferedBulkInserter(collection, imp.IngestOptions.BulkBufferSize, !imp.IngestOptions.StopOnError) if !imp.IngestOptions.MaintainInsertionOrder { inserter.(*db.BufferedBulkInserter).Unordered() } } else { inserter = imp.newUpserter(collection) } readLoop: for { select { case document, alive := <-readDocs: if !alive { break readLoop } err = filterIngestError(imp.IngestOptions.StopOnError, inserter.Insert(document)) if err != nil { return err } atomic.AddUint64(&imp.insertionCount, 1) case <-imp.Dying(): return nil } } err = inserter.Flush() // TOOLS-349 correct import count for bulk operations if bulkError, ok := err.(*mgo.BulkError); ok { failedDocs := make(map[int]bool) // index of failures for _, failure := range bulkError.Cases() { failedDocs[failure.Index] = true } numFailures := len(failedDocs) if numFailures > 0 { log.Logvf(log.Always, "num failures: %d", numFailures) atomic.AddUint64(&imp.insertionCount, ^uint64(numFailures-1)) } } return filterIngestError(imp.IngestOptions.StopOnError, err) }
// RestoreCollectionToDB pipes the given BSON data into the database. // Returns the number of documents restored and any errors that occured. func (restore *MongoRestore) RestoreCollectionToDB(dbName, colName string, bsonSource *db.DecodedBSONSource, file PosReader, fileSize int64) (int64, error) { var termErr error session, err := restore.SessionProvider.GetSession() if err != nil { return int64(0), fmt.Errorf("error establishing connection: %v", err) } session.SetSafe(restore.safety) defer session.Close() collection := session.DB(dbName).C(colName) documentCount := int64(0) watchProgressor := progress.NewCounter(fileSize) if restore.ProgressManager != nil { name := fmt.Sprintf("%v.%v", dbName, colName) restore.ProgressManager.Attach(name, watchProgressor) defer restore.ProgressManager.Detach(name) } maxInsertWorkers := restore.OutputOptions.NumInsertionWorkers if restore.OutputOptions.MaintainInsertionOrder { maxInsertWorkers = 1 } docChan := make(chan bson.Raw, insertBufferFactor) resultChan := make(chan error, maxInsertWorkers) // stream documents for this collection on docChan go func() { doc := bson.Raw{} for bsonSource.Next(&doc) { select { case <-restore.termChan: log.Logvf(log.Always, "terminating read on %v.%v", dbName, colName) termErr = util.ErrTerminated close(docChan) return default: rawBytes := make([]byte, len(doc.Data)) copy(rawBytes, doc.Data) docChan <- bson.Raw{Data: rawBytes} documentCount++ } } close(docChan) }() log.Logvf(log.DebugLow, "using %v insertion workers", maxInsertWorkers) for i := 0; i < maxInsertWorkers; i++ { go func() { // get a session copy for each insert worker s := session.Copy() defer s.Close() coll := collection.With(s) bulk := db.NewBufferedBulkInserter( coll, restore.OutputOptions.BulkBufferSize, !restore.OutputOptions.StopOnError) for rawDoc := range docChan { if restore.objCheck { err := bson.Unmarshal(rawDoc.Data, &bson.D{}) if err != nil { resultChan <- fmt.Errorf("invalid object: %v", err) return } } if err := bulk.Insert(rawDoc); err != nil { if db.IsConnectionError(err) || restore.OutputOptions.StopOnError { // Propagate this error, since it's either a fatal connection error // or the user has turned on --stopOnError resultChan <- err } else { // Otherwise just log the error but don't propagate it. log.Logvf(log.Always, "error: %v", err) } } watchProgressor.Set(file.Pos()) } err := bulk.Flush() if err != nil { if !db.IsConnectionError(err) && !restore.OutputOptions.StopOnError { // Suppress this error since it's not a severe connection error and // the user has not specified --stopOnError log.Logvf(log.Always, "error: %v", err) err = nil } } resultChan <- err return }() // sleep to prevent all threads from inserting at the same time at start time.Sleep(time.Duration(i) * 10 * time.Millisecond) } // wait until all insert jobs finish for done := 0; done < maxInsertWorkers; done++ { err := <-resultChan if err != nil { return int64(0), fmt.Errorf("insertion error: %v", err) } } // final error check if err = bsonSource.Err(); err != nil { return int64(0), fmt.Errorf("reading bson input: %v", err) } return documentCount, termErr }