Exemplo n.º 1
0
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
	// TODO: use cache
	var grp syncutil.Group
	gate := syncutil.NewGate(20) // arbitrary cap
	for i := range blobs {
		br := blobs[i]
		gate.Start()
		grp.Go(func() error {
			defer gate.Done()
			fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String())
			if err == b2.FileNotFoundError {
				return nil
			}
			if err != nil {
				return err
			}
			if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
				return errors.New("b2: remote ContentSHA1 mismatch")
			}
			size := fi.ContentLength
			if size > constants.MaxBlobSize {
				return fmt.Errorf("blob %s stat size too large (%d)", br, size)
			}
			dest <- blob.SizedRef{Ref: br, Size: uint32(size)}
			return nil
		})
	}
	return grp.Err()
}
Exemplo n.º 2
0
func (sh *SyncHandler) runFullValidation() {
	var wg sync.WaitGroup

	sh.mu.Lock()
	shards := sh.vshards
	wg.Add(len(shards))
	sh.mu.Unlock()

	sh.logf("full validation beginning with %d shards", len(shards))

	const maxShardWorkers = 30 // arbitrary
	gate := syncutil.NewGate(maxShardWorkers)

	for _, pfx := range shards {
		pfx := pfx
		gate.Start()
		go func() {
			wg.Done()
			defer gate.Done()
			sh.validateShardPrefix(pfx)
		}()
	}
	wg.Wait()
	sh.logf("Validation complete")
}
Exemplo n.º 3
0
func (s *storage) RemoveBlobs(blobs []blob.Ref) error {
	// Plan:
	//  -- delete from small (if it's there)
	//  -- if in big, update the meta index to note that it's there, but deleted.
	//  -- fetch big's zip file (constructed from a ReaderAt that is all dummy zeros +
	//     the zip's TOC only, relying on big being a SubFetcher, and keeping info in
	//     the meta about the offset of the TOC+total size of each big's zip)
	//  -- iterate over the zip's blobs (at some point). If all are marked deleted, actually RemoveBlob
	//     on big to delete the full zip and then delete all the meta rows.
	var (
		mu       sync.Mutex
		unpacked []blob.Ref
		packed   []blob.Ref
		large    = map[blob.Ref]bool{} // the large blobs that packed are in
	)
	var grp syncutil.Group
	delGate := syncutil.NewGate(removeLookups)
	for _, br := range blobs {
		br := br
		delGate.Start()
		grp.Go(func() error {
			defer delGate.Done()
			m, err := s.getMetaRow(br)
			if err != nil {
				return err
			}
			mu.Lock()
			defer mu.Unlock()
			if m.isPacked() {
				packed = append(packed, br)
				large[m.largeRef] = true
			} else {
				unpacked = append(unpacked, br)
			}
			return nil
		})
	}
	if err := grp.Err(); err != nil {
		return err
	}
	if len(unpacked) > 0 {
		grp.Go(func() error {
			return s.small.RemoveBlobs(unpacked)
		})
	}
	if len(packed) > 0 {
		grp.Go(func() error {
			bm := s.meta.BeginBatch()
			now := time.Now()
			for zipRef := range large {
				bm.Set("d:"+zipRef.String(), fmt.Sprint(now.Unix()))
			}
			for _, br := range packed {
				bm.Delete("b:" + br.String())
			}
			return s.meta.CommitBatch(bm)
		})
	}
	return grp.Err()
}
Exemplo n.º 4
0
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
	// TODO: use cache
	// TODO(mpl): use context from caller, once one is available (issue 733)
	ctx := context.TODO()
	var grp syncutil.Group
	gate := syncutil.NewGate(20) // arbitrary cap
	for i := range blobs {
		br := blobs[i]
		gate.Start()
		grp.Go(func() error {
			defer gate.Done()
			attrs, err := s.client.Bucket(s.bucket).Object(s.dirPrefix + br.String()).Attrs(ctx)
			if err == storage.ErrObjectNotExist {
				return nil
			}
			if err != nil {
				return err
			}
			size := attrs.Size
			if size > constants.MaxBlobSize {
				return fmt.Errorf("blob %s stat size too large (%d)", br, size)
			}
			dest <- blob.SizedRef{Ref: br, Size: uint32(size)}
			return nil
		})
	}
	return grp.Err()
}
Exemplo n.º 5
0
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error {
	if s.cache != nil {
		s.cache.RemoveBlobs(blobs)
	}
	gate := syncutil.NewGate(50) // arbitrary
	var grp syncutil.Group
	for i := range blobs {
		gate.Start()
		br := blobs[i]
		grp.Go(func() error {
			defer gate.Done()
			fi, err := s.b.GetFileInfoByName(s.dirPrefix + br.String())
			if err != nil {
				return err
			}
			if fi == nil {
				return nil
			}
			if br.HashName() == "sha1" && fi.ContentSHA1 != br.Digest() {
				return errors.New("b2: remote ContentSHA1 mismatch")
			}
			return s.cl.DeleteFile(fi.ID, fi.Name)
		})
	}
	return grp.Err()
}
Exemplo n.º 6
0
func (s *Storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
	// TODO: use cache
	var grp syncutil.Group
	gate := syncutil.NewGate(20) // arbitrary cap
	for i := range blobs {
		br := blobs[i]
		gate.Start()
		grp.Go(func() error {
			defer gate.Done()
			size, exists, err := s.client.StatObject(
				&googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()})
			if err != nil {
				return err
			}
			if !exists {
				return nil
			}
			if size > constants.MaxBlobSize {
				return fmt.Errorf("blob %s stat size too large (%d)", br, size)
			}
			dest <- blob.SizedRef{Ref: br, Size: uint32(size)}
			return nil
		})
	}
	return grp.Err()
}
Exemplo n.º 7
0
// Readdir implements the Directory interface.
func (dr *DirReader) Readdir(n int) (entries []DirectoryEntry, err error) {
	sts, err := dr.StaticSet()
	if err != nil {
		return nil, fmt.Errorf("schema/dirreader: can't get StaticSet: %v", err)
	}
	up := dr.current + n
	if n <= 0 {
		dr.current = 0
		up = len(sts)
	} else {
		if n > (len(sts) - dr.current) {
			err = io.EOF
			up = len(sts)
		}
	}

	// TODO(bradfitz): push down information to the fetcher
	// (e.g. cachingfetcher -> remote client http) that we're
	// going to load a bunch, so the HTTP client (if not using
	// SPDY) can do discovery and see if the server supports a
	// batch handler, then get them all in one round-trip, rather
	// than attacking the server with hundreds of parallel TLS
	// setups.

	type res struct {
		ent DirectoryEntry
		err error
	}
	var cs []chan res

	// Kick off all directory entry loads.
	gate := syncutil.NewGate(20) // Limit IO concurrency
	for _, entRef := range sts[dr.current:up] {
		c := make(chan res, 1)
		cs = append(cs, c)
		gate.Start()
		go func(entRef blob.Ref) {
			defer gate.Done()
			entry, err := NewDirectoryEntryFromBlobRef(dr.fetcher, entRef)
			c <- res{entry, err}
		}(entRef)
	}

	for _, c := range cs {
		res := <-c
		if res.err != nil {
			return nil, fmt.Errorf("schema/dirreader: can't create dirEntry: %v", res.err)
		}
		entries = append(entries, res.ent)
	}
	return entries, nil
}
Exemplo n.º 8
0
func getTestClient(t *testing.T) {
	accessKey := os.Getenv("AWS_ACCESS_KEY_ID")
	secret := os.Getenv("AWS_ACCESS_KEY_SECRET")
	if accessKey != "" && secret != "" {
		tc = &Client{
			Auth:      &Auth{AccessKey: accessKey, SecretAccessKey: secret},
			Transport: http.DefaultTransport,
			PutGate:   syncutil.NewGate(5),
		}
		return
	}
	t.Logf("no AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_SECRET set in environment; trying against local fakes3 instead.")
	var ip string
	containerID, ip = dockertest.SetupFakeS3Container(t)
	hostname := ip + ":4567"
	tc = &Client{
		Auth:      &Auth{AccessKey: "foo", SecretAccessKey: "bar", Hostname: hostname},
		Transport: http.DefaultTransport,
		PutGate:   syncutil.NewGate(5),
		NoSSL:     true,
	}
}
Exemplo n.º 9
0
// NewService builds a new Service. Zero timeout or maxProcs means no limit.
func NewService(th Thumbnailer, timeout time.Duration, maxProcs int) *Service {

	var g *syncutil.Gate
	if maxProcs > 0 {
		g = syncutil.NewGate(maxProcs)
	}

	return &Service{
		thumbnailer: th,
		timeout:     timeout,
		gate:        g,
	}
}
Exemplo n.º 10
0
func newKeyValueFromConfig(cfg jsonconfig.Obj) (sorted.KeyValue, error) {
	if !compiled {
		return nil, ErrNotCompiled
	}

	file := cfg.RequiredString("file")
	if err := cfg.Validate(); err != nil {
		return nil, err
	}

	fi, err := os.Stat(file)
	if os.IsNotExist(err) || (err == nil && fi.Size() == 0) {
		if err := initDB(file); err != nil {
			return nil, fmt.Errorf("could not initialize sqlite DB at %s: %v", file, err)
		}
	}
	db, err := sql.Open("sqlite3", file)
	if err != nil {
		return nil, err
	}
	kv := &keyValue{
		file: file,
		db:   db,
		KeyValue: &sqlkv.KeyValue{
			DB:   db,
			Gate: syncutil.NewGate(1),
		},
	}

	version, err := kv.SchemaVersion()
	if err != nil {
		return nil, fmt.Errorf("error getting schema version (need to init database with 'camtool dbinit %s'?): %v", file, err)
	}

	if err := kv.ping(); err != nil {
		return nil, err
	}

	if version != requiredSchemaVersion {
		if env.IsDev() {
			// Good signal that we're using the devcam server, so help out
			// the user with a more useful tip:
			return nil, fmt.Errorf("database schema version is %d; expect %d (run \"devcam server --wipe\" to wipe both your blobs and re-populate the database schema)", version, requiredSchemaVersion)
		}
		return nil, fmt.Errorf("database schema version is %d; expect %d (need to re-init/upgrade database?)",
			version, requiredSchemaVersion)
	}

	return kv, nil

}
Exemplo n.º 11
0
func (im imp) Run(ctx *importer.RunContext) (err error) {
	log.Printf("pinboard: Running importer.")
	r := &run{
		RunContext: ctx,
		im:         im,
		postGate:   syncutil.NewGate(3),
		nextCursor: time.Now().Format(timeFormat),
		nextAfter:  time.Now(),
		lastPause:  pauseInterval,
	}
	_, err = r.importPosts()
	log.Printf("pinboard: Importer returned %v.", err)
	return
}
Exemplo n.º 12
0
func getTestClient(t *testing.T) bool {
	accessKey := os.Getenv("AWS_ACCESS_KEY_ID")
	secret := os.Getenv("AWS_ACCESS_KEY_SECRET")
	if accessKey == "" || secret == "" {
		t.Logf("Skipping test; no AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_SECRET set in environment")
		return false
	}
	tc = &Client{
		Auth:      &Auth{AccessKey: accessKey, SecretAccessKey: secret},
		Transport: http.DefaultTransport,
		PutGate:   syncutil.NewGate(5),
	}
	return true
}
Exemplo n.º 13
0
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error {
	if s.cache != nil {
		s.cache.RemoveBlobs(blobs)
	}
	gate := syncutil.NewGate(50) // arbitrary
	var grp syncutil.Group
	for i := range blobs {
		gate.Start()
		br := blobs[i]
		grp.Go(func() error {
			defer gate.Done()
			return s.client.DeleteObject(&googlestorage.Object{Bucket: s.bucket, Key: s.dirPrefix + br.String()})
		})
	}
	return grp.Err()
}
Exemplo n.º 14
0
// New returns a new local disk storage implementation at the provided
// root directory, which must already exist.
func New(root string) (*DiskStorage, error) {
	// Local disk.
	fi, err := os.Stat(root)
	if os.IsNotExist(err) {
		// As a special case, we auto-created the "packed" directory for subpacked.
		if filepath.Base(root) == "packed" {
			if err := os.Mkdir(root, 0700); err != nil {
				return nil, fmt.Errorf("failed to mkdir packed directory: %v", err)
			}
			fi, err = os.Stat(root)
		} else {
			return nil, fmt.Errorf("Storage root %q doesn't exist", root)
		}
	}
	if err != nil {
		return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err)
	}
	if !fi.IsDir() {
		return nil, fmt.Errorf("Storage root %q exists but is not a directory.", root)
	}
	ds := &DiskStorage{
		root:      root,
		dirLockMu: new(sync.RWMutex),
		gen:       local.NewGenerationer(root),
	}
	if err := ds.migrate3to2(); err != nil {
		return nil, fmt.Errorf("Error updating localdisk format: %v", err)
	}
	if _, _, err := ds.StorageGeneration(); err != nil {
		return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err)
	}
	ul, err := osutil.MaxFD()
	if err != nil {
		if err == osutil.ErrNotSupported {
			// Do not set the gate on Windows, since we don't know the ulimit.
			return ds, nil
		}
		return nil, err
	}
	if ul < minFDLimit {
		return nil, fmt.Errorf("The max number of open file descriptors on your system (ulimit -n) is too low. Please fix it with 'ulimit -S -n X' with X being at least %d.", recommendedFDLimit)
	}
	// Setting the gate to 80% of the ulimit, to leave a bit of room for other file ops happening in Camlistore.
	// TODO(mpl): make this used and enforced Camlistore-wide. Issue #837.
	ds.tmpFileGate = syncutil.NewGate(int(ul * 80 / 100))
	return ds, nil
}
Exemplo n.º 15
0
func (imp) Run(ctx *importer.RunContext) error {
	clientId, secret, err := ctx.Credentials()
	if err != nil {
		return err
	}
	acctNode := ctx.AccountNode()
	ocfg := baseOAuthConfig
	ocfg.ClientId, ocfg.ClientSecret = clientId, secret
	token := decodeToken(acctNode.Attr(acctAttrOAuthToken))
	transport := &oauth.Transport{
		Config:    &ocfg,
		Token:     &token,
		Transport: notOAuthTransport(ctxutil.Client(ctx)),
	}
	ctx.Context = context.WithValue(ctx.Context, ctxutil.HTTPClient, transport.Client())

	root := ctx.RootNode()
	if root.Attr(nodeattr.Title) == "" {
		if err := root.SetAttr(nodeattr.Title,
			fmt.Sprintf("%s %s - Google/Picasa Photos",
				acctNode.Attr(importer.AcctAttrGivenName),
				acctNode.Attr(importer.AcctAttrFamilyName))); err != nil {
			return err
		}
	}

	r := &run{
		RunContext:  ctx,
		incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
		photoGate:   syncutil.NewGate(3),
	}
	if err := r.importAlbums(); err != nil {
		return err
	}

	r.mu.Lock()
	anyErr := r.anyErr
	r.mu.Unlock()
	if !anyErr {
		if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
			return err
		}
	}

	return nil
}
Exemplo n.º 16
0
func newClient(server string, mode auth.AuthMode, opts ...ClientOption) *Client {
	c := &Client{
		server:    server,
		haveCache: noHaveCache{},
		log:       log.New(os.Stderr, "", log.Ldate|log.Ltime),
		authMode:  mode,
	}
	for _, v := range opts {
		v.modifyClient(c)
	}
	if c.httpClient == nil {
		c.httpClient = &http.Client{
			Transport: c.transportForConfig(c.transportConfig),
		}
	}
	c.httpGate = syncutil.NewGate(httpGateSize(c.httpClient.Transport))
	return c
}
Exemplo n.º 17
0
func (imp) Run(rctx *importer.RunContext) error {
	clientID, secret, err := rctx.Credentials()
	if err != nil {
		return err
	}
	acctNode := rctx.AccountNode()

	ocfg := &oauth2.Config{
		Endpoint:     google.Endpoint,
		ClientID:     clientID,
		ClientSecret: secret,
		Scopes:       []string{scopeURL},
	}

	token := decodeToken(acctNode.Attr(acctAttrOAuthToken))
	baseCtx := rctx.Context()
	ctx := context.WithValue(baseCtx, ctxutil.HTTPClient, ocfg.Client(baseCtx, token))

	root := rctx.RootNode()
	if root.Attr(nodeattr.Title) == "" {
		if err := root.SetAttr(
			nodeattr.Title,
			fmt.Sprintf("%s - Google Photos", acctNode.Attr(importer.AcctAttrName)),
		); err != nil {
			return err
		}
	}

	r := &run{
		RunContext:  rctx,
		incremental: !forceFullImport && acctNode.Attr(importer.AcctAttrCompletedVersion) == runCompleteVersion,
		photoGate:   syncutil.NewGate(3),
	}
	if err := r.importAlbums(ctx); err != nil {
		return err
	}

	if err := acctNode.SetAttrs(importer.AcctAttrCompletedVersion, runCompleteVersion); err != nil {
		return err
	}

	return nil
}
Exemplo n.º 18
0
func (fr *FileReader) loadAllChunksSync() {
	gate := syncutil.NewGate(20) // num readahead chunk loads at a time
	fr.ForeachChunk(func(_ []blob.Ref, p BytesPart) error {
		if !p.BlobRef.Valid() {
			return nil
		}
		gate.Start()
		go func(br blob.Ref) {
			defer gate.Done()
			rc, _, err := fr.fetcher.Fetch(br)
			if err == nil {
				defer rc.Close()
				var b [1]byte
				rc.Read(b[:]) // fault in the blob
			}
		}(p.BlobRef)
		return nil
	})
}
Exemplo n.º 19
0
func TestPackTwoIdenticalfiles(t *testing.T) {
	const fileSize = 1 << 20
	fileContents := randBytes(fileSize)
	testPack(t,
		func(sto blobserver.Storage) (err error) {
			if _, err = schema.WriteFileFromReader(sto, "a.txt", bytes.NewReader(fileContents)); err != nil {
				return
			}
			if _, err = schema.WriteFileFromReader(sto, "b.txt", bytes.NewReader(fileContents)); err != nil {
				return
			}
			return
		},
		func(pt *packTest) { pt.sto.packGate = syncutil.NewGate(1) }, // one pack at a time
		wantNumLargeBlobs(1),
		wantNumSmallBlobs(1), // just the "b.txt" file schema blob
		okayWithoutMeta("sha1-cb4399f6b3b31ace417e1ec9326f9818bb3f8387"),
	)
}
Exemplo n.º 20
0
func newUploader() *Uploader {
	var cc *client.Client
	var httpStats *httputil.StatsTransport
	if d := *flagBlobDir; d != "" {
		ss, err := dir.New(d)
		if err != nil && d == "discard" {
			ss = discardStorage{}
			err = nil
		}
		if err != nil {
			log.Fatalf("Error using dir %s as storage: %v", d, err)
		}
		cc = client.NewStorageClient(ss)
	} else {
		var proxy func(*http.Request) (*url.URL, error)
		if flagProxyLocal {
			proxy = proxyFromEnvironment
		}
		cc = client.NewOrFail(client.OptionTransportConfig(&client.TransportConfig{
			Proxy:   proxy,
			Verbose: *flagHTTP,
		}))
		httpStats = cc.HTTPStats()
	}
	if *cmdmain.FlagVerbose {
		cc.SetLogger(log.New(cmdmain.Stderr, "", log.LstdFlags))
	} else {
		cc.SetLogger(nil)
	}

	pwd, err := os.Getwd()
	if err != nil {
		log.Fatalf("os.Getwd: %v", err)
	}

	return &Uploader{
		Client: cc,
		stats:  httpStats,
		pwd:    pwd,
		fdGate: syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit
	}
}
Exemplo n.º 21
0
func (s *storage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
	if len(blobs) == 0 {
		return nil
	}

	var (
		grp        syncutil.Group
		trySmallMu sync.Mutex
		trySmall   []blob.Ref
	)
	statGate := syncutil.NewGate(50) // arbitrary
	for _, br := range blobs {
		br := br
		statGate.Start()
		grp.Go(func() error {
			defer statGate.Done()
			m, err := s.getMetaRow(br)
			if err != nil {
				return err
			}
			if m.exists {
				dest <- blob.SizedRef{Ref: br, Size: m.size}
			} else {
				trySmallMu.Lock()
				trySmall = append(trySmall, br)
				// Assume append cannot fail or panic
				trySmallMu.Unlock()
			}
			return nil
		})
	}
	if err := grp.Err(); err != nil {
		return err
	}
	if len(trySmall) == 0 {
		return nil
	}
	return s.small.StatBlobs(dest, trySmall)
}
Exemplo n.º 22
0
func (s *Storage) RemoveBlobs(blobs []blob.Ref) error {
	if s.cache != nil {
		s.cache.RemoveBlobs(blobs)
	}
	// TODO(mpl): use context from caller, once one is available (issue 733)
	ctx := context.TODO()
	gate := syncutil.NewGate(50) // arbitrary
	var grp syncutil.Group
	for i := range blobs {
		gate.Start()
		br := blobs[i]
		grp.Go(func() error {
			defer gate.Done()
			err := s.client.Bucket(s.bucket).Object(s.dirPrefix + br.String()).Delete(ctx)
			if err == storage.ErrObjectNotExist {
				return nil
			}
			return err
		})
	}
	return grp.Err()
}
Exemplo n.º 23
0
func (s *storage) zipPartsInUse(br blob.Ref) ([]blob.Ref, error) {
	var (
		mu    sync.Mutex
		inUse []blob.Ref
	)
	var grp syncutil.Group
	gate := syncutil.NewGate(20) // arbitrary constant
	err := s.foreachZipBlob(br, func(bap BlobAndPos) error {
		gate.Start()
		grp.Go(func() error {
			defer gate.Done()
			mr, err := s.getMetaRow(bap.Ref)
			if err != nil {
				return err
			}
			if mr.isPacked() {
				mu.Lock()
				inUse = append(inUse, mr.largeRef)
				mu.Unlock()
			}
			return nil
		})
		return nil
	})
	if os.IsNotExist(err) {
		// An already-deleted blob from large isn't considered
		// to be in-use.
		return nil, nil
	}
	if err != nil {
		return nil, err
	}
	if err := grp.Err(); err != nil {
		return nil, err
	}
	return inUse, nil
}
Exemplo n.º 24
0
func (r *run) importTweetsFromZip(userID string, zr *zip.Reader) error {
	log.Printf("Processing zip file with %d files", len(zr.File))

	tweetsNode, err := r.getTopLevelNode("tweets")
	if err != nil {
		return err
	}

	var (
		gate = syncutil.NewGate(tweetsAtOnce)
		grp  syncutil.Group
	)
	total := 0
	for _, zf := range zr.File {
		if !(strings.HasPrefix(zf.Name, "data/js/tweets/2") && strings.HasSuffix(zf.Name, ".js")) {
			continue
		}
		tweets, err := tweetsFromZipFile(zf)
		if err != nil {
			return fmt.Errorf("error reading tweets from %s: %v", zf.Name, err)
		}

		for i := range tweets {
			total++
			tweet := tweets[i]
			gate.Start()
			grp.Go(func() error {
				defer gate.Done()
				_, err := r.importTweet(tweetsNode, tweet, false)
				return err
			})
		}
	}
	err = grp.Err()
	log.Printf("zip import of tweets: %d total, err = %v", total, err)
	return err
}
Exemplo n.º 25
0
// src: non-nil source
// dest: non-nil destination
// thirdLeg: optional third-leg client. if not nil, anything on src
//     but not on dest will instead be copied to thirdLeg, instead of
//     directly to dest. (sneakernet mode, copying to a portable drive
//     and transporting thirdLeg to dest)
func (c *syncCmd) doPass(src, dest, thirdLeg blobserver.Storage) (stats SyncStats, retErr error) {
	var statsMu sync.Mutex // guards stats return value

	srcBlobs := make(chan blob.SizedRef, 100)
	destBlobs := make(chan blob.SizedRef, 100)
	srcErr := make(chan error, 1)
	destErr := make(chan error, 1)

	ctx := context.TODO()
	enumCtx, cancel := context.WithCancel(ctx) // used for all (2 or 3) enumerates
	defer cancel()
	enumerate := func(errc chan<- error, sto blobserver.Storage, blobc chan<- blob.SizedRef) {
		err := enumerateAllBlobs(enumCtx, sto, blobc)
		if err != nil {
			cancel()
		}
		errc <- err
	}

	go enumerate(srcErr, src, srcBlobs)
	checkSourceError := func() {
		if err := <-srcErr; err != nil && err != context.Canceled {
			retErr = fmt.Errorf("Enumerate error from source: %v", err)
		}
	}

	if c.dest == "stdout" {
		for sb := range srcBlobs {
			fmt.Fprintf(cmdmain.Stdout, "%s %d\n", sb.Ref, sb.Size)
		}
		checkSourceError()
		return
	}

	if c.wipe {
		// TODO(mpl): dest is a client. make it send a "wipe" request?
		// upon reception its server then wipes itself if it is a wiper.
		log.Print("Index wiping not yet supported.")
	}

	go enumerate(destErr, dest, destBlobs)
	checkDestError := func() {
		if err := <-destErr; err != nil && err != context.Canceled {
			retErr = fmt.Errorf("Enumerate error from destination: %v", err)
		}
	}

	destNotHaveBlobs := make(chan blob.SizedRef)

	readSrcBlobs := srcBlobs
	if c.verbose {
		readSrcBlobs = loggingBlobRefChannel(srcBlobs)
	}

	mismatches := []blob.Ref{}

	logErrorf := func(format string, args ...interface{}) {
		log.Printf(format, args...)
		statsMu.Lock()
		stats.ErrorCount++
		statsMu.Unlock()
	}

	onMismatch := func(br blob.Ref) {
		// TODO(bradfitz): check both sides and repair, carefully.  For now, fail.
		logErrorf("WARNING: blobref %v has differing sizes on source and dest", br)
		mismatches = append(mismatches, br)
	}

	go blobserver.ListMissingDestinationBlobs(destNotHaveBlobs, onMismatch, readSrcBlobs, destBlobs)

	// Handle three-legged mode if tc is provided.
	checkThirdError := func() {} // default nop
	syncBlobs := destNotHaveBlobs
	firstHopDest := dest
	if thirdLeg != nil {
		thirdBlobs := make(chan blob.SizedRef, 100)
		thirdErr := make(chan error, 1)
		go enumerate(thirdErr, thirdLeg, thirdBlobs)
		checkThirdError = func() {
			if err := <-thirdErr; err != nil && err != context.Canceled {
				retErr = fmt.Errorf("Enumerate error from third leg: %v", err)
			}
		}
		thirdNeedBlobs := make(chan blob.SizedRef)
		go blobserver.ListMissingDestinationBlobs(thirdNeedBlobs, onMismatch, destNotHaveBlobs, thirdBlobs)
		syncBlobs = thirdNeedBlobs
		firstHopDest = thirdLeg
	}

	var gate = syncutil.NewGate(c.concurrency)
	var wg sync.WaitGroup

	for sb := range syncBlobs {
		sb := sb
		gate.Start()
		wg.Add(1)
		go func() {
			defer wg.Done()
			defer gate.Done()
			fmt.Fprintf(cmdmain.Stdout, "Destination needs blob: %s\n", sb)
			blobReader, size, err := src.Fetch(sb.Ref)

			if err != nil {
				logErrorf("Error fetching %s: %v", sb.Ref, err)
				return
			}
			if size != sb.Size {
				logErrorf("Source blobserver's enumerate size of %d for blob %s doesn't match its Get size of %d",
					sb.Size, sb.Ref, size)
				return
			}

			_, err = blobserver.Receive(firstHopDest, sb.Ref, blobReader)
			if err != nil {
				logErrorf("Upload of %s to destination blobserver failed: %v", sb.Ref, err)
				return
			}
			statsMu.Lock()
			stats.BlobsCopied++
			stats.BytesCopied += int64(size)
			statsMu.Unlock()

			if c.removeSrc {
				if err := src.RemoveBlobs([]blob.Ref{sb.Ref}); err != nil {
					logErrorf("Failed to delete %s from source: %v", sb.Ref, err)
				}
			}
		}()
	}
	wg.Wait()

	checkSourceError()
	checkDestError()
	checkThirdError()
	if retErr == nil && stats.ErrorCount > 0 {
		retErr = fmt.Errorf("%d errors during sync", stats.ErrorCount)
	}
	return stats, retErr
}
Exemplo n.º 26
0
func (s *storage) init() {
	s.packGate = syncutil.NewGate(10)
}
Exemplo n.º 27
0
limitations under the License.
*/

package localdisk

import (
	"os"

	"camlistore.org/pkg/blob"

	"go4.org/syncutil"
)

const maxParallelStats = 20

var statGate = syncutil.NewGate(maxParallelStats)

func (ds *DiskStorage) StatBlobs(dest chan<- blob.SizedRef, blobs []blob.Ref) error {
	if len(blobs) == 0 {
		return nil
	}

	statSend := func(ref blob.Ref) error {
		fi, err := os.Stat(ds.blobPath(ref))
		switch {
		case err == nil && fi.Mode().IsRegular():
			dest <- blob.SizedRef{Ref: ref, Size: u32(fi.Size())}
			return nil
		case err != nil && !os.IsNotExist(err):
			return err
		}
Exemplo n.º 28
0
func newKeyValueFromJSONConfig(cfg jsonconfig.Obj) (sorted.KeyValue, error) {
	var (
		user     = cfg.RequiredString("user")
		database = cfg.RequiredString("database")
		host     = cfg.OptionalString("host", "")
		password = cfg.OptionalString("password", "")
	)
	if err := cfg.Validate(); err != nil {
		return nil, err
	}
	if !validDatabaseName(database) {
		return nil, fmt.Errorf("%q looks like an invalid database name", database)
	}
	var err error
	if host != "" {
		host, err = maybeRemapCloudSQL(host)
		if err != nil {
			return nil, err
		}
		if !strings.Contains(host, ":") {
			host += ":3306"
		}
		host = "tcp(" + host + ")"
	}
	// The DSN does NOT have a database name in it so it's
	// cacheable and can be shared between different queues & the
	// index, all sharing the same database server, cutting down
	// number of TCP connections required. We add the database
	// name in queries instead.
	dsn := fmt.Sprintf("%s:%s@%s/", user, password, host)

	db, err := openOrCachedDB(dsn)
	if err != nil {
		return nil, err
	}

	if err := CreateDB(db, database); err != nil {
		return nil, err
	}
	if err := createTables(db, database); err != nil {
		return nil, err
	}

	kv := &keyValue{
		dsn: dsn,
		db:  db,
		KeyValue: &sqlkv.KeyValue{
			DB:          db,
			TablePrefix: database + ".",
			Gate:        syncutil.NewGate(20), // arbitrary limit. TODO: configurable, automatically-learned?
		},
	}
	if err := kv.ping(); err != nil {
		return nil, fmt.Errorf("MySQL db unreachable: %v", err)
	}

	version, err := kv.SchemaVersion()
	if err != nil {
		return nil, fmt.Errorf("error getting current database schema version: %v", err)
	}
	if version == 0 {
		// Newly created table case
		if _, err := db.Exec(fmt.Sprintf(`REPLACE INTO %s.meta VALUES ('version', ?)`, database), requiredSchemaVersion); err != nil {
			return nil, fmt.Errorf("error setting schema version: %v", err)
		}
		return kv, nil
	}
	if version != requiredSchemaVersion {
		if version == 20 && requiredSchemaVersion == 21 {
			fmt.Fprintf(os.Stderr, fixSchema20to21)
		}
		if env.IsDev() {
			// Good signal that we're using the devcam server, so help out
			// the user with a more useful tip:
			return nil, fmt.Errorf("database schema version is %d; expect %d (run \"devcam server --wipe\" to wipe both your blobs and re-populate the database schema)", version, requiredSchemaVersion)
		}
		return nil, fmt.Errorf("database schema version is %d; expect %d (need to re-init/upgrade database?)",
			version, requiredSchemaVersion)
	}

	return kv, nil
}
Exemplo n.º 29
0
func newFromConfig(_ blobserver.Loader, config jsonconfig.Obj) (blobserver.Storage, error) {
	hostname := config.OptionalString("hostname", "s3.amazonaws.com")
	cacheSize := config.OptionalInt64("cacheSize", 32<<20)
	client := &s3.Client{
		Auth: &s3.Auth{
			AccessKey:       config.RequiredString("aws_access_key"),
			SecretAccessKey: config.RequiredString("aws_secret_access_key"),
			Hostname:        hostname,
		},
		PutGate: syncutil.NewGate(maxParallelHTTP),
	}
	bucket := config.RequiredString("bucket")
	var dirPrefix string
	if parts := strings.SplitN(bucket, "/", 2); len(parts) > 1 {
		dirPrefix = parts[1]
		bucket = parts[0]
	}
	if dirPrefix != "" && !strings.HasSuffix(dirPrefix, "/") {
		dirPrefix += "/"
	}
	sto := &s3Storage{
		s3Client:  client,
		bucket:    bucket,
		dirPrefix: dirPrefix,
		hostname:  hostname,
	}
	skipStartupCheck := config.OptionalBool("skipStartupCheck", false)
	if err := config.Validate(); err != nil {
		return nil, err
	}
	if cacheSize != 0 {
		sto.cache = memory.NewCache(cacheSize)
	}
	if !skipStartupCheck {
		_, err := client.ListBucket(sto.bucket, "", 1)
		if serr, ok := err.(*s3.Error); ok {
			if serr.AmazonCode == "NoSuchBucket" {
				return nil, fmt.Errorf("Bucket %q doesn't exist.", sto.bucket)
			}

			// This code appears when the hostname has dots in it:
			if serr.AmazonCode == "PermanentRedirect" {
				loc, lerr := client.BucketLocation(sto.bucket)
				if lerr != nil {
					return nil, fmt.Errorf("Wrong server for bucket %q; and error determining bucket's location: %v", sto.bucket, lerr)
				}
				client.Auth.Hostname = loc
				_, err = client.ListBucket(sto.bucket, "", 1)
				if err == nil {
					log.Printf("Warning: s3 server should be %q, not %q. Change config file to avoid start-up latency.", client.Auth.Hostname, hostname)
				}
			}

			// This path occurs when the user set the
			// wrong server, or didn't set one at all, but
			// the bucket doesn't have dots in it:
			if serr.UseEndpoint != "" {
				// UseEndpoint will be e.g. "brads3test-ca.s3-us-west-1.amazonaws.com"
				// But we only want the "s3-us-west-1.amazonaws.com" part.
				client.Auth.Hostname = strings.TrimPrefix(serr.UseEndpoint, sto.bucket+".")
				_, err = client.ListBucket(sto.bucket, "", 1)
				if err == nil {
					log.Printf("Warning: s3 server should be %q, not %q. Change config file to avoid start-up latency.", client.Auth.Hostname, hostname)
				}
			}
		}
		if err != nil {
			return nil, fmt.Errorf("Error listing bucket %s: %v", sto.bucket, err)
		}
	}
	return sto, nil
}
Exemplo n.º 30
0
func (r *run) importTweets(userID string) error {
	maxId := ""
	continueRequests := true

	tweetsNode, err := r.getTopLevelNode("tweets")
	if err != nil {
		return err
	}

	numTweets := 0
	sawTweet := map[string]bool{}

	// If attrs is changed, so should the expected responses accordingly for the
	// RoundTripper of MakeTestData (testdata.go).
	attrs := []string{
		"user_id", userID,
		"count", strconv.Itoa(tweetRequestLimit),
	}
	for continueRequests {
		select {
		case <-r.Context().Done():
			r.errorf("Twitter importer: interrupted")
			return r.Context().Err()
		default:
		}

		var resp []*apiTweetItem
		var err error
		if maxId == "" {
			log.Printf("Fetching tweets for userid %s", userID)
			err = r.doAPI(&resp, userTimeLineAPIPath, attrs...)
		} else {
			log.Printf("Fetching tweets for userid %s with max ID %s", userID, maxId)
			err = r.doAPI(&resp, userTimeLineAPIPath,
				append(attrs, "max_id", maxId)...)
		}
		if err != nil {
			return err
		}

		var (
			newThisBatch = 0
			allDupMu     sync.Mutex
			allDups      = true
			gate         = syncutil.NewGate(tweetsAtOnce)
			grp          syncutil.Group
		)
		for i := range resp {
			tweet := resp[i]

			// Dup-suppression.
			if sawTweet[tweet.Id] {
				continue
			}
			sawTweet[tweet.Id] = true
			newThisBatch++
			maxId = tweet.Id

			gate.Start()
			grp.Go(func() error {
				defer gate.Done()
				dup, err := r.importTweet(tweetsNode, tweet, true)
				if !dup {
					allDupMu.Lock()
					allDups = false
					allDupMu.Unlock()
				}
				if err != nil {
					r.errorf("Twitter importer: error importing tweet %s %v", tweet.Id, err)
				}
				return err
			})
		}
		if err := grp.Err(); err != nil {
			return err
		}
		numTweets += newThisBatch
		log.Printf("Imported %d tweets this batch; %d total.", newThisBatch, numTweets)
		if r.incremental && allDups {
			log.Printf("twitter incremental import found end batch")
			break
		}
		continueRequests = newThisBatch > 0
	}
	log.Printf("Successfully did full run of importing %d tweets", numTweets)
	return nil
}