func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { // TODO: use cache var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary cap for i := range blobs { br := blobs[i] gate.Start() grp.Go(func() error { defer gate.Done() fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String()) if err == b2.FileNotFoundError { return nil } if err != nil { return err } if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() { return errors.New("b2: remote ContentSHA1 mismatch") } size := fi.ContentLength if size > constants.MaxBlobSize { return fmt.Errorf("blob %s stat size too large (%d)", br, size) } dest <- blob.SizedRef{Ref: br, Size: uint32(size)} return nil }) } return grp.Err() }
func (sh *SyncHandler) runFullValidation() { var wg sync.WaitGroup sh.mu.Lock() shards := sh.vshards wg.Add(len(shards)) sh.mu.Unlock() sh.logf("full validation beginning with %d shards", len(shards)) const maxShardWorkers = 30 // arbitrary gate := syncutil.NewGate(maxShardWorkers) for _, pfx := range shards { pfx := pfx gate.Start() go func() { wg.Done() defer gate.Done() sh.validateShardPrefix(pfx) }() } wg.Wait() sh.logf("Validation complete") }
func (s *storage) RemoveBlobs(blobs []blob.Ref) error { // Plan: // -- delete from small (if it's there) // -- if in big, update the meta index to note that it's there, but deleted. // -- fetch big's zip file (constructed from a ReaderAt that is all dummy zeros + // the zip's TOC only, relying on big being a SubFetcher, and keeping info in // the meta about the offset of the TOC+total size of each big's zip) // -- iterate over the zip's blobs (at some point). If all are marked deleted, actually RemoveBlob // on big to delete the full zip and then delete all the meta rows. var ( mu sync.Mutex unpacked []blob.Ref packed []blob.Ref large = map[blob.Ref]bool{} // the large blobs that packed are in ) var grp syncutil.Group delGate := syncutil.NewGate(removeLookups) for _, br := range blobs { br := br delGate.Start() grp.Go(func() error { defer delGate.Done() m, err := s.getMetaRow(br) if err != nil { return err } mu.Lock() defer mu.Unlock() if m.isPacked() { packed = append(packed, br) large[m.largeRef] = true } else { unpacked = append(unpacked, br) } return nil }) } if err := grp.Err(); err != nil { return err } if len(unpacked) > 0 { grp.Go(func() error { return s.small.RemoveBlobs(unpacked) }) } if len(packed) > 0 { grp.Go(func() error { bm := s.meta.BeginBatch() now := time.Now() for zipRef := range large { bm.Set("d:"+zipRef.String(), fmt.Sprint(now.Unix())) } for _, br := range packed { bm.Delete("b:" + br.String()) } return s.meta.CommitBatch(bm) }) } return grp.Err() }
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { // TODO: use cache // TODO(mpl): use context from caller, once one is available (issue 733) ctx := context.TODO() var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary cap for i := range blobs { br := blobs[i] gate.Start() grp.Go(func() error { defer gate.Done() attrs, err := s.client.Bucket(s.bucket).Object(s.dirPrefix + br.String()).Attrs(ctx) if err == storage.ErrObjectNotExist { return nil } if err != nil { return err } size := attrs.Size if size > constants.MaxBlobSize { return fmt.Errorf("blob %s stat size too large (%d)", br, size) } dest <- blob.SizedRef{Ref: br, Size: uint32(size)} return nil }) } return grp.Err() }
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error { if s.cache != nil { s.cache.RemoveBlobs(blobs) } gate := syncutil.NewGate(50) // arbitrary var grp syncutil.Group for i := range blobs { gate.Start() br := blobs[i] grp.Go(func() error { defer gate.Done() fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String()) if err != nil { return err } if fi == nil { return nil } if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() { return errors.New("b2: remote ContentSHA1 mismatch") } return s.cl.DeleteFile(fi.ID, fi.Name) }) } return grp.Err() }
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { // TODO: use cache var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary cap for i := range blobs { br := blobs[i] gate.Start() grp.Go(func() error { defer gate.Done() size, exists, err := s.client.StatObject( &googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()}) if err != nil { return err } if !exists { return nil } if size > constants.MaxBlobSize { return fmt.Errorf("blob %s stat size too large (%d)", br, size) } dest <- blob.SizedRef{Ref: br, Size: uint32(size)} return nil }) } return grp.Err() }
// Readdir implements the Directory interface. func (dr *DirReader) Readdir(n int) (entries []DirectoryEntry, err error) { sts, err := dr.StaticSet() if err != nil { return nil, fmt.Errorf("schema/dirreader: can't get StaticSet: %v", err) } up := dr.current + n if n <= 0 { dr.current = 0 up = len(sts) } else { if n > (len(sts) - dr.current) { err = io.EOF up = len(sts) } } // TODO(bradfitz): push down information to the fetcher // (e.g. cachingfetcher -> remote client http) that we're // going to load a bunch, so the HTTP client (if not using // SPDY) can do discovery and see if the server supports a // batch handler, then get them all in one round-trip, rather // than attacking the server with hundreds of parallel TLS // setups. type res struct { ent DirectoryEntry err error } var cs []chan res // Kick off all directory entry loads. gate := syncutil.NewGate(20) // Limit IO concurrency for _, entRef := range sts[dr.current:up] { c := make(chan res, 1) cs = append(cs, c) gate.Start() go func(entRef blob.Ref) { defer gate.Done() entry, err := NewDirectoryEntryFromBlobRef(dr.fetcher, entRef) c <- res{entry, err} }(entRef) } for _, c := range cs { res := <-c if res.err != nil { return nil, fmt.Errorf("schema/dirreader: can't create dirEntry: %v", res.err) } entries = append(entries, res.ent) } return entries, nil }
func getTestClient(t *testing.T) { accessKey := os.Getenv("AWS_ACCESS_KEY_ID") secret := os.Getenv("AWS_ACCESS_KEY_SECRET") if accessKey != "" && secret != "" { tc = &Client{ Auth: &Auth{AccessKey: accessKey, SecretAccessKey: secret}, Transport: http.DefaultTransport, PutGate: syncutil.NewGate(5), } return } t.Logf("no AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_SECRET set in environment; trying against local fakes3 instead.") var ip string containerID, ip = dockertest.SetupFakeS3Container(t) hostname := ip + ":4567" tc = &Client{ Auth: &Auth{AccessKey: "foo", SecretAccessKey: "bar", Hostname: hostname}, Transport: http.DefaultTransport, PutGate: syncutil.NewGate(5), NoSSL: true, } }
// NewService builds a new Service. Zero timeout or maxProcs means no limit. func NewService(th Thumbnailer, timeout time.Duration, maxProcs int) *Service { var g *syncutil.Gate if maxProcs > 0 { g = syncutil.NewGate(maxProcs) } return &Service{ thumbnailer: th, timeout: timeout, gate: g, } }
func newKeyValueFromConfig(cfg jsonconfig.Obj) (sorted.KeyValue, error) { if !compiled { return nil, ErrNotCompiled } file := cfg.RequiredString("file") if err := cfg.Validate(); err != nil { return nil, err } fi, err := os.Stat(file) if os.IsNotExist(err) || (err == nil && fi.Size() == 0) { if err := initDB(file); err != nil { return nil, fmt.Errorf("could not initialize sqlite DB at %s: %v", file, err) } } db, err := sql.Open("sqlite3", file) if err != nil { return nil, err } kv := &keyValue{ file: file, db: db, KeyValue: &sqlkv.KeyValue{ DB: db, Gate: syncutil.NewGate(1), }, } version, err := kv.SchemaVersion() if err != nil { return nil, fmt.Errorf("error getting schema version (need to init database with 'camtool dbinit %s'?): %v", file, err) } if err := kv.ping(); err != nil { return nil, err } if version != requiredSchemaVersion { if env.IsDev() { // Good signal that we're using the devcam server, so help out // the user with a more useful tip: return nil, fmt.Errorf("database schema version is %d; expect %d (run \"devcam server --wipe\" to wipe both your blobs and re-populate the database schema)", version, requiredSchemaVersion) } return nil, fmt.Errorf("database schema version is %d; expect %d (need to re-init/upgrade database?)", version, requiredSchemaVersion) } return kv, nil }
func (im imp) Run(ctx *importer.RunContext) (err error) { log.Printf("pinboard: Running importer.") r := &run{ RunContext: ctx, im: im, postGate: syncutil.NewGate(3), nextCursor: time.Now().Format(timeFormat), nextAfter: time.Now(), lastPause: pauseInterval, } _, err = r.importPosts() log.Printf("pinboard: Importer returned %v.", err) return }
func getTestClient(t *testing.T) bool { accessKey := os.Getenv("AWS_ACCESS_KEY_ID") secret := os.Getenv("AWS_ACCESS_KEY_SECRET") if accessKey == "" || secret == "" { t.Logf("Skipping test; no AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_SECRET set in environment") return false } tc = &Client{ Auth: &Auth{AccessKey: accessKey, SecretAccessKey: secret}, Transport: http.DefaultTransport, PutGate: syncutil.NewGate(5), } return true }
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error { if s.cache != nil { s.cache.RemoveBlobs(blobs) } gate := syncutil.NewGate(50) // arbitrary var grp syncutil.Group for i := range blobs { gate.Start() br := blobs[i] grp.Go(func() error { defer gate.Done() return s.client.DeleteObject(&googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()}) }) } return grp.Err() }
// New returns a new local disk storage implementation at the provided // root directory, which must already exist. func New(root string) (*DiskStorage, error) { // Local disk. fi, err := os.Stat(root) if os.IsNotExist(err) { // As a special case, we auto-created the "packed" directory for subpacked. if filepath.Base(root) == "packed" { if err := os.Mkdir(root, 0700); err != nil { return nil, fmt.Errorf("failed to mkdir packed directory: %v", err) } fi, err = os.Stat(root) } else { return nil, fmt.Errorf("Storage root %q doesn't exist", root) } } if err != nil { return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err) } if !fi.IsDir() { return nil, fmt.Errorf("Storage root %q exists but is not a directory.", root) } ds := &DiskStorage{ root: root, dirLockMu: new(sync.RWMutex), gen: local.NewGenerationer(root), } if err := ds.migrate3to2(); err != nil { return nil, fmt.Errorf("Error updating localdisk format: %v", err) } if _, _, err := ds.StorageGeneration(); err != nil { return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err) } ul, err := osutil.MaxFD() if err != nil { if err == osutil.ErrNotSupported { // Do not set the gate on Windows, since we don't know the ulimit. return ds, nil } return nil, err } if ul < minFDLimit { return nil, fmt.Errorf("The max number of open file descriptors on your system (ulimit -n) is too low. Please fix it with 'ulimit -S -n X' with X being at least %d.", recommendedFDLimit) } // Setting the gate to 80% of the ulimit, to leave a bit of room for other file ops happening in Camlistore. // TODO(mpl): make this used and enforced Camlistore-wide. Issue #837. ds.tmpFileGate = syncutil.NewGate(int(ul * 80 / 100)) return ds, nil }
func (imp) Run(ctx *importer.RunContext) error { clientId, secret, err := ctx.Credentials() if err != nil { return err } acctNode := ctx.AccountNode() ocfg := baseOAuthConfig ocfg.ClientId, ocfg.ClientSecret = clientId, secret token := decodeToken(acctNode.Attr(acctAttrOAuthToken)) transport := &oauth.Transport{ Config: &ocfg, Token: &token, Transport: notOAuthTransport(ctxutil.Client(ctx)), } ctx.Context = context.WithValue(ctx.Context, ctxutil.HTTPClient, transport.Client()) root := ctx.RootNode() if root.Attr(nodeattr.Title) == "" { if err := root.SetAttr(nodeattr.Title, fmt.Sprintf("%s %s - Google/Picasa Photos", acctNode.Attr(importer.AcctAttrGivenName), acctNode.Attr(importer.AcctAttrFamilyName))); err != nil { return err } } r := &run{ RunContext: ctx, incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion, photoGate: syncutil.NewGate(3), } if err := r.importAlbums(); err != nil { return err } r.mu.Lock() anyErr := r.anyErr r.mu.Unlock() if !anyErr { if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil { return err } } return nil }
func newClient(server string, mode auth.AuthMode, opts ...ClientOption) *Client { c := &Client{ server: server, haveCache: noHaveCache{}, log: log.New(os.Stderr, "", log.Ldate|log.Ltime), authMode: mode, } for _, v := range opts { v.modifyClient(c) } if c.httpClient == nil { c.httpClient = &http.Client{ Transport: c.transportForConfig(c.transportConfig), } } c.httpGate = syncutil.NewGate(httpGateSize(c.httpClient.Transport)) return c }
func (imp) Run(rctx *importer.RunContext) error { clientID, secret, err := rctx.Credentials() if err != nil { return err } acctNode := rctx.AccountNode() ocfg := &oauth2.Config{ Endpoint: google.Endpoint, ClientID: clientID, ClientSecret: secret, Scopes: []string{scopeURL}, } token := decodeToken(acctNode.Attr(acctAttrOAuthToken)) baseCtx := rctx.Context() ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token)) root := rctx.RootNode() if root.Attr(nodeattr.Title) == "" { if err := root.SetAttr( nodeattr.Title, fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)), ); err != nil { return err } } r := &run{ RunContext: rctx, incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion, photoGate: syncutil.NewGate(3), } if err := r.importAlbums(ctx); err != nil { return err } if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil { return err } return nil }
func (fr *FileReader) loadAllChunksSync() { gate := syncutil.NewGate(20) // num readahead chunk loads at a time fr.ForeachChunk(func(_ []blob.Ref, p BytesPart) error { if !p.BlobRef.Valid() { return nil } gate.Start() go func(br blob.Ref) { defer gate.Done() rc, _, err := fr.fetcher.Fetch(br) if err == nil { defer rc.Close() var b [1]byte rc.Read(b[:]) // fault in the blob } }(p.BlobRef) return nil }) }
func TestPackTwoIdenticalfiles(t *testing.T) { const fileSize = 1 << 20 fileContents := randBytes(fileSize) testPack(t, func(sto blobserver.Storage) (err error) { if _, err = schema.WriteFileFromReader(sto, "a.txt", bytes.NewReader(fileContents)); err != nil { return } if _, err = schema.WriteFileFromReader(sto, "b.txt", bytes.NewReader(fileContents)); err != nil { return } return }, func(pt *packTest) { pt.sto.packGate = syncutil.NewGate(1) }, // one pack at a time wantNumLargeBlobs(1), wantNumSmallBlobs(1), // just the "b.txt" file schema blob okayWithoutMeta("sha1-cb4399f6b3b31ace417e1ec9326f9818bb3f8387"), ) }
func newUploader() *Uploader { var cc *client.Client var httpStats *httputil.StatsTransport if d := *flagBlobDir; d != "" { ss, err := dir.New(d) if err != nil && d == "discard" { ss = discardStorage{} err = nil } if err != nil { log.Fatalf("Error using dir %s as storage: %v", d, err) } cc = client.NewStorageClient(ss) } else { var proxy func(*http.Request) (*url.URL, error) if flagProxyLocal { proxy = proxyFromEnvironment } cc = client.NewOrFail(client.OptionTransportConfig(&client.TransportConfig{ Proxy: proxy, Verbose: *flagHTTP, })) httpStats = cc.HTTPStats() } if *cmdmain.FlagVerbose { cc.SetLogger(log.New(cmdmain.Stderr, "", log.LstdFlags)) } else { cc.SetLogger(nil) } pwd, err := os.Getwd() if err != nil { log.Fatalf("os.Getwd: %v", err) } return &Uploader{ Client: cc, stats: httpStats, pwd: pwd, fdGate: syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit } }
func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { if len(blobs) == 0 { return nil } var ( grp syncutil.Group trySmallMu sync.Mutex trySmall []blob.Ref ) statGate := syncutil.NewGate(50) // arbitrary for _, br := range blobs { br := br statGate.Start() grp.Go(func() error { defer statGate.Done() m, err := s.getMetaRow(br) if err != nil { return err } if m.exists { dest <- blob.SizedRef{Ref: br, Size: m.size} } else { trySmallMu.Lock() trySmall = append(trySmall, br) // Assume append cannot fail or panic trySmallMu.Unlock() } return nil }) } if err := grp.Err(); err != nil { return err } if len(trySmall) == 0 { return nil } return s.small.StatBlobs(dest, trySmall) }
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error { if s.cache != nil { s.cache.RemoveBlobs(blobs) } // TODO(mpl): use context from caller, once one is available (issue 733) ctx := context.TODO() gate := syncutil.NewGate(50) // arbitrary var grp syncutil.Group for i := range blobs { gate.Start() br := blobs[i] grp.Go(func() error { defer gate.Done() err := s.client.Bucket(s.bucket).Object(s.dirPrefix + br.String()).Delete(ctx) if err == storage.ErrObjectNotExist { return nil } return err }) } return grp.Err() }
func (s *storage) zipPartsInUse(br blob.Ref) ([]blob.Ref, error) { var ( mu sync.Mutex inUse []blob.Ref ) var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary constant err := s.foreachZipBlob(br, func(bap BlobAndPos) error { gate.Start() grp.Go(func() error { defer gate.Done() mr, err := s.getMetaRow(bap.Ref) if err != nil { return err } if mr.isPacked() { mu.Lock() inUse = append(inUse, mr.largeRef) mu.Unlock() } return nil }) return nil }) if os.IsNotExist(err) { // An already-deleted blob from large isn't considered // to be in-use. return nil, nil } if err != nil { return nil, err } if err := grp.Err(); err != nil { return nil, err } return inUse, nil }
func (r *run) importTweetsFromZip(userID string, zr *zip.Reader) error { log.Printf("Processing zip file with %d files", len(zr.File)) tweetsNode, err := r.getTopLevelNode("tweets") if err != nil { return err } var ( gate = syncutil.NewGate(tweetsAtOnce) grp syncutil.Group ) total := 0 for _, zf := range zr.File { if !(strings.HasPrefix(zf.Name, "data/js/tweets/2") && strings.HasSuffix(zf.Name, ".js")) { continue } tweets, err := tweetsFromZipFile(zf) if err != nil { return fmt.Errorf("error reading tweets from %s: %v", zf.Name, err) } for i := range tweets { total++ tweet := tweets[i] gate.Start() grp.Go(func() error { defer gate.Done() _, err := r.importTweet(tweetsNode, tweet, false) return err }) } } err = grp.Err() log.Printf("zip import of tweets: %d total, err = %v", total, err) return err }
// src: non-nil source // dest: non-nil destination // thirdLeg: optional third-leg client. if not nil, anything on src // but not on dest will instead be copied to thirdLeg, instead of // directly to dest. (sneakernet mode, copying to a portable drive // and transporting thirdLeg to dest) func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) { var statsMu sync.Mutex // guards stats return value srcBlobs := make(chan blob.SizedRef, 100) destBlobs := make(chan blob.SizedRef, 100) srcErr := make(chan error, 1) destErr := make(chan error, 1) ctx := context.TODO() enumCtx, cancel := context.WithCancel(ctx) // used for all (2 or 3) enumerates defer cancel() enumerate := func(errc chan<- error, sto blobserver.Storage, blobc chan<- blob.SizedRef) { err := enumerateAllBlobs(enumCtx, sto, blobc) if err != nil { cancel() } errc <- err } go enumerate(srcErr, src, srcBlobs) checkSourceError := func() { if err := <-srcErr; err != nil && err != context.Canceled { retErr = fmt.Errorf("Enumerate error from source: %v", err) } } if c.dest == "stdout" { for sb := range srcBlobs { fmt.Fprintf(cmdmain.Stdout, "%s %d\n", sb.Ref, sb.Size) } checkSourceError() return } if c.wipe { // TODO(mpl): dest is a client. make it send a "wipe" request? // upon reception its server then wipes itself if it is a wiper. log.Print("Index wiping not yet supported.") } go enumerate(destErr, dest, destBlobs) checkDestError := func() { if err := <-destErr; err != nil && err != context.Canceled { retErr = fmt.Errorf("Enumerate error from destination: %v", err) } } destNotHaveBlobs := make(chan blob.SizedRef) readSrcBlobs := srcBlobs if c.verbose { readSrcBlobs = loggingBlobRefChannel(srcBlobs) } mismatches := []blob.Ref{} logErrorf := func(format string, args ...interface{}) { log.Printf(format, args...) statsMu.Lock() stats.ErrorCount++ statsMu.Unlock() } onMismatch := func(br blob.Ref) { // TODO(bradfitz): check both sides and repair, carefully. For now, fail. logErrorf("WARNING: blobref %v has differing sizes on source and dest", br) mismatches = append(mismatches, br) } go blobserver.ListMissingDestinationBlobs(destNotHaveBlobs, onMismatch, readSrcBlobs, destBlobs) // Handle three-legged mode if tc is provided. checkThirdError := func() {} // default nop syncBlobs := destNotHaveBlobs firstHopDest := dest if thirdLeg != nil { thirdBlobs := make(chan blob.SizedRef, 100) thirdErr := make(chan error, 1) go enumerate(thirdErr, thirdLeg, thirdBlobs) checkThirdError = func() { if err := <-thirdErr; err != nil && err != context.Canceled { retErr = fmt.Errorf("Enumerate error from third leg: %v", err) } } thirdNeedBlobs := make(chan blob.SizedRef) go blobserver.ListMissingDestinationBlobs(thirdNeedBlobs, onMismatch, destNotHaveBlobs, thirdBlobs) syncBlobs = thirdNeedBlobs firstHopDest = thirdLeg } var gate = syncutil.NewGate(c.concurrency) var wg sync.WaitGroup for sb := range syncBlobs { sb := sb gate.Start() wg.Add(1) go func() { defer wg.Done() defer gate.Done() fmt.Fprintf(cmdmain.Stdout, "Destination needs blob: %s\n", sb) blobReader, size, err := src.Fetch(sb.Ref) if err != nil { logErrorf("Error fetching %s: %v", sb.Ref, err) return } if size != sb.Size { logErrorf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d", sb.Size, sb.Ref, size) return } _, err = blobserver.Receive(firstHopDest, sb.Ref, blobReader) if err != nil { logErrorf("Upload of %s to destination blobserver failed: %v", sb.Ref, err) return } statsMu.Lock() stats.BlobsCopied++ stats.BytesCopied += int64(size) statsMu.Unlock() if c.removeSrc { if err := src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil { logErrorf("Failed to delete %s from source: %v", sb.Ref, err) } } }() } wg.Wait() checkSourceError() checkDestError() checkThirdError() if retErr == nil && stats.ErrorCount > 0 { retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount) } return stats, retErr }
func (s *storage) init() { s.packGate = syncutil.NewGate(10) }
limitations under the License. */ package localdisk import ( "os" "camlistore.org/pkg/blob" "go4.org/syncutil" ) const maxParallelStats = 20 var statGate = syncutil.NewGate(maxParallelStats) func (ds *DiskStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { if len(blobs) == 0 { return nil } statSend := func(ref blob.Ref) error { fi, err := os.Stat(ds.blobPath(ref)) switch { case err == nil && fi.Mode().IsRegular(): dest <- blob.SizedRef{Ref: ref, Size: u32(fi.Size())} return nil case err != nil && !os.IsNotExist(err): return err }
func newKeyValueFromJSONConfig(cfg jsonconfig.Obj) (sorted.KeyValue, error) { var ( user = cfg.RequiredString("user") database = cfg.RequiredString("database") host = cfg.OptionalString("host", "") password = cfg.OptionalString("password", "") ) if err := cfg.Validate(); err != nil { return nil, err } if !validDatabaseName(database) { return nil, fmt.Errorf("%q looks like an invalid database name", database) } var err error if host != "" { host, err = maybeRemapCloudSQL(host) if err != nil { return nil, err } if !strings.Contains(host, ":") { host += ":3306" } host = "tcp(" + host + ")" } // The DSN does NOT have a database name in it so it's // cacheable and can be shared between different queues & the // index, all sharing the same database server, cutting down // number of TCP connections required. We add the database // name in queries instead. dsn := fmt.Sprintf("%s:%s@%s/", user, password, host) db, err := openOrCachedDB(dsn) if err != nil { return nil, err } if err := CreateDB(db, database); err != nil { return nil, err } if err := createTables(db, database); err != nil { return nil, err } kv := &keyValue{ dsn: dsn, db: db, KeyValue: &sqlkv.KeyValue{ DB: db, TablePrefix: database + ".", Gate: syncutil.NewGate(20), // arbitrary limit. TODO: configurable, automatically-learned? }, } if err := kv.ping(); err != nil { return nil, fmt.Errorf("MySQL db unreachable: %v", err) } version, err := kv.SchemaVersion() if err != nil { return nil, fmt.Errorf("error getting current database schema version: %v", err) } if version == 0 { // Newly created table case if _, err := db.Exec(fmt.Sprintf(`REPLACE INTO %s.meta VALUES ('version', ?)`, database), requiredSchemaVersion); err != nil { return nil, fmt.Errorf("error setting schema version: %v", err) } return kv, nil } if version != requiredSchemaVersion { if version == 20 && requiredSchemaVersion == 21 { fmt.Fprintf(os.Stderr, fixSchema20to21) } if env.IsDev() { // Good signal that we're using the devcam server, so help out // the user with a more useful tip: return nil, fmt.Errorf("database schema version is %d; expect %d (run \"devcam server --wipe\" to wipe both your blobs and re-populate the database schema)", version, requiredSchemaVersion) } return nil, fmt.Errorf("database schema version is %d; expect %d (need to re-init/upgrade database?)", version, requiredSchemaVersion) } return kv, nil }
func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (blobserver.Storage, error) { hostname := config.OptionalString("hostname", "s3.amazonaws.com") cacheSize := config.OptionalInt64("cacheSize", 32<<20) client := &s3.Client{ Auth: &s3.Auth{ AccessKey: config.RequiredString("aws_access_key"), SecretAccessKey: config.RequiredString("aws_secret_access_key"), Hostname: hostname, }, PutGate: syncutil.NewGate(maxParallelHTTP), } bucket := config.RequiredString("bucket") var dirPrefix string if parts := strings.SplitN(bucket, "/", 2); len(parts) > 1 { dirPrefix = parts[1] bucket = parts[0] } if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") { dirPrefix += "/" } sto := &s3Storage{ s3Client: client, bucket: bucket, dirPrefix: dirPrefix, hostname: hostname, } skipStartupCheck := config.OptionalBool("skipStartupCheck", false) if err := config.Validate(); err != nil { return nil, err } if cacheSize != 0 { sto.cache = memory.NewCache(cacheSize) } if !skipStartupCheck { _, err := client.ListBucket(sto.bucket, "", 1) if serr, ok := err.(*s3.Error); ok { if serr.AmazonCode == "NoSuchBucket" { return nil, fmt.Errorf("Bucket %q doesn't exist.", sto.bucket) } // This code appears when the hostname has dots in it: if serr.AmazonCode == "PermanentRedirect" { loc, lerr := client.BucketLocation(sto.bucket) if lerr != nil { return nil, fmt.Errorf("Wrong server for bucket %q; and error determining bucket's location: %v", sto.bucket, lerr) } client.Auth.Hostname = loc _, err = client.ListBucket(sto.bucket, "", 1) if err == nil { log.Printf("Warning: s3 server should be %q, not %q. Change config file to avoid start-up latency.", client.Auth.Hostname, hostname) } } // This path occurs when the user set the // wrong server, or didn't set one at all, but // the bucket doesn't have dots in it: if serr.UseEndpoint != "" { // UseEndpoint will be e.g. "brads3test-ca.s3-us-west-1.amazonaws.com" // But we only want the "s3-us-west-1.amazonaws.com" part. client.Auth.Hostname = strings.TrimPrefix(serr.UseEndpoint, sto.bucket+".") _, err = client.ListBucket(sto.bucket, "", 1) if err == nil { log.Printf("Warning: s3 server should be %q, not %q. Change config file to avoid start-up latency.", client.Auth.Hostname, hostname) } } } if err != nil { return nil, fmt.Errorf("Error listing bucket %s: %v", sto.bucket, err) } } return sto, nil }
func (r *run) importTweets(userID string) error { maxId := "" continueRequests := true tweetsNode, err := r.getTopLevelNode("tweets") if err != nil { return err } numTweets := 0 sawTweet := map[string]bool{} // If attrs is changed, so should the expected responses accordingly for the // RoundTripper of MakeTestData (testdata.go). attrs := []string{ "user_id", userID, "count", strconv.Itoa(tweetRequestLimit), } for continueRequests { select { case <-r.Context().Done(): r.errorf("Twitter importer: interrupted") return r.Context().Err() default: } var resp []*apiTweetItem var err error if maxId == "" { log.Printf("Fetching tweets for userid %s", userID) err = r.doAPI(&resp, userTimeLineAPIPath, attrs...) } else { log.Printf("Fetching tweets for userid %s with max ID %s", userID, maxId) err = r.doAPI(&resp, userTimeLineAPIPath, append(attrs, "max_id", maxId)...) } if err != nil { return err } var ( newThisBatch = 0 allDupMu sync.Mutex allDups = true gate = syncutil.NewGate(tweetsAtOnce) grp syncutil.Group ) for i := range resp { tweet := resp[i] // Dup-suppression. if sawTweet[tweet.Id] { continue } sawTweet[tweet.Id] = true newThisBatch++ maxId = tweet.Id gate.Start() grp.Go(func() error { defer gate.Done() dup, err := r.importTweet(tweetsNode, tweet, true) if !dup { allDupMu.Lock() allDups = false allDupMu.Unlock() } if err != nil { r.errorf("Twitter importer: error importing tweet %s %v", tweet.Id, err) } return err }) } if err := grp.Err(); err != nil { return err } numTweets += newThisBatch log.Printf("Imported %d tweets this batch; %d total.", newThisBatch, numTweets) if r.incremental && allDups { log.Printf("twitter incremental import found end batch") break } continueRequests = newThisBatch > 0 } log.Printf("Successfully did full run of importing %d tweets", numTweets) return nil }