func newUploader() *Uploader { cc := client.NewOrFail() if !*cmdmain.FlagVerbose { cc.SetLogger(nil) } proxy := http.ProxyFromEnvironment if flagProxyLocal { proxy = proxyFromEnvironment } tr := cc.TransportForConfig( &client.TransportConfig{ Proxy: proxy, Verbose: *flagHTTP, }) httpStats, _ := tr.(*httputil.StatsTransport) cc.SetHTTPClient(&http.Client{Transport: tr}) pwd, err := os.Getwd() if err != nil { log.Fatalf("os.Getwd: %v", err) } return &Uploader{ Client: cc, transport: httpStats, pwd: pwd, fdGate: syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit } }
func (sh *SyncHandler) runFullValidation() { var wg sync.WaitGroup sh.mu.Lock() shards := sh.vshards wg.Add(len(shards)) sh.mu.Unlock() sh.logf("full validation beginning with %d shards", len(shards)) const maxShardWorkers = 30 // arbitrary gate := syncutil.NewGate(maxShardWorkers) for _, pfx := range shards { pfx := pfx gate.Start() go func() { wg.Done() defer gate.Done() sh.validateShardPrefix(pfx) }() } wg.Wait() sh.logf("Validation complete") }
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { // TODO: use cache var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary cap for i := range blobs { br := blobs[i] gate.Start() grp.Go(func() error { defer gate.Done() size, exists, err := s.client.StatObject( &googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()}) if err != nil { return err } if !exists { return nil } if size > constants.MaxBlobSize { return fmt.Errorf("blob %s stat size too large (%d)", br, size) } dest <- blob.SizedRef{Ref: br, Size: uint32(size)} return nil }) } return grp.Err() }
func (s *storage) RemoveBlobs(blobs []blob.Ref) error { // Plan: // -- delete from small (if it's there) // -- if in big, update the meta index to note that it's there, but deleted. // -- fetch big's zip file (constructed from a ReaderAt that is all dummy zeros + // the zip's TOC only, relying on big being a SubFetcher, and keeping info in // the meta about the offset of the TOC+total size of each big's zip) // -- iterate over the zip's blobs (at some point). If all are marked deleted, actually RemoveBlob // on big to delete the full zip and then delete all the meta rows. var ( mu sync.Mutex unpacked []blob.Ref packed []blob.Ref large = map[blob.Ref]bool{} // the large blobs that packed are in ) var grp syncutil.Group delGate := syncutil.NewGate(removeLookups) for _, br := range blobs { br := br delGate.Start() grp.Go(func() error { defer delGate.Done() m, err := s.getMetaRow(br) if err != nil { return err } mu.Lock() defer mu.Unlock() if m.isPacked() { packed = append(packed, br) large[m.largeRef] = true } else { unpacked = append(unpacked, br) } return nil }) } if err := grp.Err(); err != nil { return err } if len(unpacked) > 0 { grp.Go(func() error { return s.small.RemoveBlobs(unpacked) }) } if len(packed) > 0 { grp.Go(func() error { bm := s.meta.BeginBatch() now := time.Now() for zipRef := range large { bm.Set("d:"+zipRef.String(), fmt.Sprint(now.Unix())) } for _, br := range packed { bm.Delete("b:" + br.String()) } return s.meta.CommitBatch(bm) }) } return grp.Err() }
func iterItems(itemch chan<- imageFile, errch chan<- error, filter filterFunc, client *http.Client, username string) { defer close(itemch) albums, err := picago.GetAlbums(client, username) if err != nil { errch <- err return } gate := syncutil.NewGate(parallelAlbumRoutines) for _, album := range albums { photos, err := picago.GetPhotos(client, username, album.ID) if err != nil { select { case errch <- err: default: return } continue } gate.Start() go func(albumName, albumTitle string) { defer gate.Done() for _, photo := range photos { img := imageFile{ albumTitle: albumTitle, albumName: albumName, fileName: photo.Filename(), ID: photo.ID, } ok, err := filter(img) if err != nil { errch <- err return } if !ok { continue } img.r, err = picago.DownloadPhoto(client, photo.URL) if err != nil { select { case errch <- fmt.Errorf("Get(%s): %v", photo.URL, err): default: return } continue } itemch <- img } }(album.Name, album.Title) } }
// NewService builds a new Service. Zero timeout or maxProcs means no limit. func NewService(th Thumbnailer, timeout time.Duration, maxProcs int) *Service { var g *syncutil.Gate if maxProcs > 0 { g = syncutil.NewGate(maxProcs) } return &Service{ thumbnailer: th, timeout: timeout, gate: g, } }
func (im imp) Run(ctx *importer.RunContext) (err error) { log.Printf("pinboard: Running importer.") r := &run{ RunContext: ctx, im: im, postGate: syncutil.NewGate(3), nextCursor: time.Now().Format(timeFormat), nextAfter: time.Now(), lastPause: pauseInterval, } _, err = r.importPosts() log.Printf("pinboard: Importer returned %v.", err) return }
func newFromParams(server string, mode auth.AuthMode) *Client { httpClient := &http.Client{ Transport: &http.Transport{ MaxIdleConnsPerHost: maxParallelHTTP, }, } return &Client{ server: server, httpClient: httpClient, httpGate: syncutil.NewGate(maxParallelHTTP), haveCache: noHaveCache{}, log: log.New(os.Stderr, "", log.Ldate|log.Ltime), authMode: mode, } }
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error { if s.cache != nil { s.cache.RemoveBlobs(blobs) } gate := syncutil.NewGate(50) // arbitrary var grp syncutil.Group for i := range blobs { gate.Start() br := blobs[i] grp.Go(func() error { defer gate.Done() return s.client.DeleteObject(&googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()}) }) } return grp.Err() }
func (imp) Run(ctx *importer.RunContext) error { clientId, secret, err := ctx.Credentials() if err != nil { return err } acctNode := ctx.AccountNode() ocfg := baseOAuthConfig ocfg.ClientId, ocfg.ClientSecret = clientId, secret token := decodeToken(acctNode.Attr(acctAttrOAuthToken)) transport := &oauth.Transport{ Config: &ocfg, Token: &token, Transport: notOAuthTransport(ctx.HTTPClient()), } ctx.Context = ctx.Context.New(context.WithHTTPClient(transport.Client())) root := ctx.RootNode() if root.Attr(nodeattr.Title) == "" { if err := root.SetAttr(nodeattr.Title, fmt.Sprintf("%s %s - Google/Picasa Photos", acctNode.Attr(importer.AcctAttrGivenName), acctNode.Attr(importer.AcctAttrFamilyName))); err != nil { return err } } r := &run{ RunContext: ctx, incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion, photoGate: syncutil.NewGate(3), } if err := r.importAlbums(); err != nil { return err } r.mu.Lock() anyErr := r.anyErr r.mu.Unlock() if !anyErr { if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil { return err } } return nil }
func newUploader() *Uploader { var cc *client.Client var httpStats *httputil.StatsTransport if d := *flagBlobDir; d != "" { ss, err := dir.New(d) if err != nil && d == "discard" { ss = discardStorage{} err = nil } if err != nil { log.Fatalf("Error using dir %s as storage: %v", d, err) } cc = client.NewStorageClient(ss) } else { cc = client.NewOrFail() proxy := http.ProxyFromEnvironment if flagProxyLocal { proxy = proxyFromEnvironment } tr := cc.TransportForConfig( &client.TransportConfig{ Proxy: proxy, Verbose: *flagHTTP, }) httpStats, _ = tr.(*httputil.StatsTransport) cc.SetHTTPClient(&http.Client{Transport: tr}) } if *cmdmain.FlagVerbose { cc.SetLogger(log.New(cmdmain.Stderr, "", log.LstdFlags)) } else { cc.SetLogger(nil) } pwd, err := os.Getwd() if err != nil { log.Fatalf("os.Getwd: %v", err) } return &Uploader{ Client: cc, transport: httpStats, pwd: pwd, fdGate: syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit } }
func (fr *FileReader) loadAllChunksSync() { gate := syncutil.NewGate(20) // num readahead chunk loads at a time fr.ForeachChunk(func(_ []blob.Ref, p BytesPart) error { if !p.BlobRef.Valid() { return nil } gate.Start() go func(br blob.Ref) { defer gate.Done() rc, _, err := fr.fetcher.Fetch(br) if err == nil { defer rc.Close() var b [1]byte rc.Read(b[:]) // fault in the blob } }(p.BlobRef) return nil }) }
func TestPackTwoIdenticalfiles(t *testing.T) { const fileSize = 1 << 20 fileContents := randBytes(fileSize) testPack(t, func(sto blobserver.Storage) (err error) { if _, err = schema.WriteFileFromReader(sto, "a.txt", bytes.NewReader(fileContents)); err != nil { return } if _, err = schema.WriteFileFromReader(sto, "b.txt", bytes.NewReader(fileContents)); err != nil { return } return }, func(pt *packTest) { pt.sto.packGate = syncutil.NewGate(1) }, // one pack at a time wantNumLargeBlobs(1), wantNumSmallBlobs(1), // just the "b.txt" file schema blob okayWithoutMeta("sha1-cb4399f6b3b31ace417e1ec9326f9818bb3f8387"), ) }
func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { if len(blobs) == 0 { return nil } var ( grp syncutil.Group trySmallMu sync.Mutex trySmall []blob.Ref ) statGate := syncutil.NewGate(50) // arbitrary for _, br := range blobs { br := br statGate.Start() grp.Go(func() error { defer statGate.Done() m, err := s.getMetaRow(br) if err != nil { return err } if m.exists { dest <- blob.SizedRef{Ref: br, Size: m.size} } else { trySmallMu.Lock() trySmall = append(trySmall, br) // Assume append cannot fail or panic trySmallMu.Unlock() } return nil }) } if err := grp.Err(); err != nil { return err } if len(trySmall) == 0 { return nil } return s.small.StatBlobs(dest, trySmall) }
func (ds *DiskStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { if len(blobs) == 0 { return nil } statSend := func(ref blob.Ref) error { fi, err := os.Stat(ds.blobPath(ds.partition, ref)) switch { case err == nil && fi.Mode().IsRegular(): dest <- blob.SizedRef{Ref: ref, Size: fi.Size()} return nil case err != nil && !os.IsNotExist(err): return err } return nil } if len(blobs) == 1 { return statSend(blobs[0]) } errc := make(chan error, len(blobs)) gt := syncutil.NewGate(maxParallelStats) for _, ref := range blobs { gt.Start() go func(ref blob.Ref) { defer gt.Done() errc <- statSend(ref) }(ref) } for _ = range blobs { if err := <-errc; err != nil { return err } } return nil }
// New returns a new Camlistore Client. // The provided server is either "host:port" (assumed http, not https) or a URL prefix, with or without a path, or a server alias from the client configuration file. A server alias should not be confused with a hostname, therefore it cannot contain any colon or period. // Errors are not returned until subsequent operations. func New(server string) *Client { if !isURLOrHostPort(server) { configOnce.Do(parseConfig) serverConf, ok := config.Servers[server] if !ok { log.Fatalf("%q looks like a server alias, but no such alias found in config at %v", server, osutil.UserClientConfigPath()) } server = serverConf.Server } httpClient := &http.Client{ Transport: &http.Transport{ MaxIdleConnsPerHost: maxParallelHTTP, }, } return &Client{ server: server, httpClient: httpClient, httpGate: syncutil.NewGate(maxParallelHTTP), haveCache: noHaveCache{}, log: log.New(os.Stderr, "", log.Ldate|log.Ltime), authMode: auth.None{}, } }
func (r *run) importTweetsFromZip(userID string, zr *zip.Reader) error { log.Printf("Processing zip file with %d files", len(zr.File)) tweetsNode, err := r.getTopLevelNode("tweets") if err != nil { return err } var ( gate = syncutil.NewGate(tweetsAtOnce) grp syncutil.Group ) total := 0 for _, zf := range zr.File { if !(strings.HasPrefix(zf.Name, "data/js/tweets/2") && strings.HasSuffix(zf.Name, ".js")) { continue } tweets, err := tweetsFromZipFile(zf) if err != nil { return fmt.Errorf("error reading tweets from %s: %v", zf.Name, err) } for i := range tweets { total++ tweet := tweets[i] gate.Start() grp.Go(func() error { defer gate.Done() _, err := r.importTweet(tweetsNode, tweet, false) return err }) } } err = grp.Err() log.Printf("zip import of tweets: %d total, err = %v", total, err) return err }
func (s *storage) zipPartsInUse(br blob.Ref) ([]blob.Ref, error) { var ( mu sync.Mutex inUse []blob.Ref ) var grp syncutil.Group gate := syncutil.NewGate(20) // arbitrary constant err := s.foreachZipBlob(br, func(bap BlobAndPos) error { gate.Start() grp.Go(func() error { defer gate.Done() mr, err := s.getMetaRow(bap.Ref) if err != nil { return err } if mr.isPacked() { mu.Lock() inUse = append(inUse, mr.largeRef) mu.Unlock() } return nil }) return nil }) if os.IsNotExist(err) { // An already-deleted blob from large isn't considered // to be in-use. return nil, nil } if err != nil { return nil, err } if err := grp.Err(); err != nil { return nil, err } return inUse, nil }
return } return format } // These gates control the max concurrency of slurping raw images // (e.g. JPEG bytes) to RAM, and then decoding and resizing them, // respectively. We allow more concurrency for the former because // it's slower and less memory-intensive. The actual resizing takes // much more CPU and RAM. // TODO: these numbers were just guesses and not based on any // data. measure? make these configurable? Automatically tuned // somehow? Based on memory usage/availability? var ( scaleImageGateSlurp = syncutil.NewGate(5) scaleImageGateResize = syncutil.NewGate(2) ) type formatAndImage struct { format string image []byte } func (ih *ImageHandler) scaleImage(fileRef blob.Ref) (*formatAndImage, error) { fr, err := schema.NewFileReader(ih.storageSeekFetcher(), fileRef) if err != nil { return nil, err } defer fr.Close()
func writeFileChunks(bs blobserver.StatReceiver, file *Builder, r io.Reader) (n int64, spans []span, outerr error) { src := ¬eEOFReader{r: r} bufr := bufio.NewReaderSize(src, bufioReaderSize) spans = []span{} // the tree of spans, cut on interesting rollsum boundaries rs := rollsum.New() var last int64 var buf bytes.Buffer blobSize := 0 // of the next blob being built, should be same as buf.Len() const chunksInFlight = 32 // at ~64 KB chunks, this is ~2MB memory per file gatec := syncutil.NewGate(chunksInFlight) firsterrc := make(chan error, 1) // uploadLastSpan runs in the same goroutine as the loop below and is responsible for // starting uploading the contents of the buf. It returns false if there's been // an error and the loop below should be stopped. uploadLastSpan := func() bool { chunk := buf.String() buf.Reset() br := blob.SHA1FromString(chunk) spans[len(spans)-1].br = br select { case outerr = <-firsterrc: return false default: // No error seen so far, continue. } gatec.Start() go func() { defer gatec.Done() if _, err := uploadString(bs, br, chunk); err != nil { select { case firsterrc <- err: default: } } }() return true } for { c, err := bufr.ReadByte() if err == io.EOF { if n != last { spans = append(spans, span{from: last, to: n}) if !uploadLastSpan() { return } } break } if err != nil { return 0, nil, err } buf.WriteByte(c) n++ blobSize++ rs.Roll(c) var bits int onRollSplit := rs.OnSplit() switch { case blobSize == maxBlobSize: bits = 20 // arbitrary node weight; 1<<20 == 1MB case src.sawEOF: // Don't split. End is coming soon enough. continue case onRollSplit && n > firstChunkSize && blobSize > tooSmallThreshold: bits = rs.Bits() case n == firstChunkSize: bits = 18 // 1 << 18 == 256KB default: // Don't split. continue } blobSize = 0 // Take any spans from the end of the spans slice that // have a smaller 'bits' score and make them children // of this node. var children []span childrenFrom := len(spans) for childrenFrom > 0 && spans[childrenFrom-1].bits < bits { childrenFrom-- } if nCopy := len(spans) - childrenFrom; nCopy > 0 { children = make([]span, nCopy) copy(children, spans[childrenFrom:]) spans = spans[:childrenFrom] } spans = append(spans, span{from: last, to: n, bits: bits, children: children}) last = n if !uploadLastSpan() { return } } // Loop was already hit earlier. if outerr != nil { return 0, nil, outerr } // Wait for all uploads to finish, one way or another, and then // see if any generated errors. // Once this loop is done, we own all the tokens in gatec, so nobody // else can have one outstanding. for i := 0; i < chunksInFlight; i++ { gatec.Start() } select { case err := <-firsterrc: return 0, nil, err default: } return n, spans, nil }
func (r *run) importTweets(userID string) error { maxId := "" continueRequests := true tweetsNode, err := r.getTopLevelNode("tweets") if err != nil { return err } numTweets := 0 sawTweet := map[string]bool{} // If attrs is changed, so should the expected responses accordingly for the // RoundTripper of MakeTestData (testdata.go). attrs := []string{ "user_id", userID, "count", strconv.Itoa(tweetRequestLimit), } for continueRequests { if r.Context.IsCanceled() { r.errorf("Twitter importer: interrupted") return context.ErrCanceled } var resp []*apiTweetItem var err error if maxId == "" { log.Printf("Fetching tweets for userid %s", userID) err = r.doAPI(&resp, userTimeLineAPIPath, attrs...) } else { log.Printf("Fetching tweets for userid %s with max ID %s", userID, maxId) err = r.doAPI(&resp, userTimeLineAPIPath, append(attrs, "max_id", maxId)...) } if err != nil { return err } var ( newThisBatch = 0 allDupMu sync.Mutex allDups = true gate = syncutil.NewGate(tweetsAtOnce) grp syncutil.Group ) for i := range resp { tweet := resp[i] // Dup-suppression. if sawTweet[tweet.Id] { continue } sawTweet[tweet.Id] = true newThisBatch++ maxId = tweet.Id gate.Start() grp.Go(func() error { defer gate.Done() dup, err := r.importTweet(tweetsNode, tweet, true) if !dup { allDupMu.Lock() allDups = false allDupMu.Unlock() } if err != nil { r.errorf("Twitter importer: error importing tweet %s %v", tweet.Id, err) } return err }) } if err := grp.Err(); err != nil { return err } numTweets += newThisBatch log.Printf("Imported %d tweets this batch; %d total.", newThisBatch, numTweets) if r.incremental && allDups { log.Printf("twitter incremental import found end batch") break } continueRequests = newThisBatch > 0 } log.Printf("Successfully did full run of importing %d tweets", numTweets) return nil }
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package swift import ( "io" "log" "camlistore.org/pkg/blob" "camlistore.org/pkg/syncutil" ) var receiveGate = syncutil.NewGate(5) // arbitrary func (sto *swiftStorage) ReceiveBlob(b blob.Ref, source io.Reader) (sr blob.SizedRef, err error) { log.Println("[SWIFT] ReceiveBlob") removeGate.Start() defer removeGate.Done() obj, err := sto.client.ObjectCreate(sto.container, b.String(), true, "", "", nil) if err != nil { return sr, err } size, err := io.Copy(obj, source) if err != nil { return sr, err } return blob.SizedRef{Ref: b, Size: uint32(size)}, nil }
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package s3 import ( "fmt" "os" "camlistore.org/pkg/blob" "camlistore.org/pkg/syncutil" ) var statGate = syncutil.NewGate(20) // arbitrary func (sto *s3Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { var wg syncutil.Group for _, br := range blobs { br := br statGate.Start() wg.Go(func() error { defer statGate.Done() size, err := sto.s3Client.Stat(br.String(), sto.bucket) if err == nil { dest <- blob.SizedRef{Ref: br, Size: uint32(size)} return nil }
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package mongo import ( "camlistore.org/pkg/blob" "camlistore.org/pkg/syncutil" "camlistore.org/third_party/labix.org/v2/mgo" "camlistore.org/third_party/labix.org/v2/mgo/bson" ) var removeGate = syncutil.NewGate(100) // arbitrary func (m *mongoStorage) RemoveBlobs(blobs []blob.Ref) error { var wg syncutil.Group for _, blob := range blobs { blob := blob removeGate.Start() wg.Go(func() error { defer removeGate.Done() err := m.c.Remove(bson.M{"key": blob.String()}) if err == mgo.ErrNotFound { return nil } return err })
func (s *storage) init() { s.packGate = syncutil.NewGate(10) }
See the License for the specific language governing permissions and limitations under the License. */ package localdisk import ( "os" "camlistore.org/pkg/blob" "camlistore.org/pkg/syncutil" ) const maxParallelStats = 20 var statGate = syncutil.NewGate(maxParallelStats) func (ds *DiskStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error { if len(blobs) == 0 { return nil } statSend := func(ref blob.Ref) error { fi, err := os.Stat(ds.blobPath(ds.partition, ref)) switch { case err == nil && fi.Mode().IsRegular(): dest <- blob.SizedRef{Ref: ref, Size: fi.Size()} return nil case err != nil && !os.IsNotExist(err): return err }
} rsc := struct { io.ReadSeeker io.Closer }{ rs, types.NopCloser, } return rsc, meta.size, nil } func (s *storage) filename(file int) string { return filepath.Join(s.root, fmt.Sprintf("pack-%05d.blobs", file)) } var removeGate = syncutil.NewGate(20) // arbitrary // RemoveBlobs removes the blobs from index and pads data with zero bytes func (s *storage) RemoveBlobs(blobs []blob.Ref) error { batch := s.index.BeginBatch() var wg syncutil.Group for _, br := range blobs { br := br removeGate.Start() batch.Delete(br.String()) wg.Go(func() error { defer removeGate.Done() if err := s.delete(br); err != nil { return err } return nil