func (t *TreeUpload) run() { defer close(t.donec) // Kick off scanning all files, eventually learning the root // node (which references all its children). var root *node // nil until received and set in loop below. rootc := make(chan *node, 1) if !t.rootless { go func() { n, err := t.statPath(t.base, nil) if err != nil { log.Fatalf("Error scanning files under %s: %v", t.base, err) } close(t.stattedc) rootc <- n }() } var lastStat, lastUpload string dumpStats := func() { if client.AndroidOutput() { printAndroidCamputStatus(t) return } statStatus := "" if root == nil { statStatus = fmt.Sprintf("last stat: %s", lastStat) } blobStats := t.up.Stats() log.Printf("FILES: Total: %+v Skipped: %+v Uploaded: %+v %s BLOBS: %s Digested: %d last upload: %s", t.total, t.skipped, t.uploaded, statStatus, blobStats.String(), atomic.LoadInt64(&atomicDigestOps), lastUpload) } // Channels for stats & progress bars. These are never closed: uploadedc := make(chan *node) // at least tried to upload; server might have had blob skippedc := make(chan *node) // didn't even hit blobserver; trusted our stat cache uploadsdonec := make(chan bool) var upload chan<- *node withPermanode := t.up.fileOpts.wantFilePermanode() if t.DiskUsageMode { upload = NewNodeWorker(1, func(n *node, ok bool) { if !ok { uploadsdonec <- true return } if n.fi.IsDir() { fmt.Printf("%d\t%s\n", n.SumBytes()>>10, n.fullPath) } }) } else { upload = NewNodeWorker(uploadWorkers, func(n *node, ok bool) { if !ok { log.Printf("done with all uploads.") uploadsdonec <- true return } put, err := t.up.uploadNode(n) if err != nil { log.Fatalf("Error uploading %s: %v", n.fullPath, err) } n.SetPutResult(put, nil) if c := t.up.statCache; c != nil && !n.fi.IsDir() { c.AddCachedPutResult( t.up.pwd, n.fullPath, n.fi, put, withPermanode) } uploadedc <- n }) } checkStatCache := NewNodeWorker(10, func(n *node, ok bool) { if !ok { if t.up.statCache != nil { log.Printf("done checking stat cache") } close(upload) return } if t.DiskUsageMode || t.up.statCache == nil { upload <- n return } if !n.fi.IsDir() { cachedRes, err := t.up.statCache.CachedPutResult( t.up.pwd, n.fullPath, n.fi, withPermanode) if err == nil { n.SetPutResult(cachedRes, nil) cachelog.Printf("Cache HIT on %q -> %v", n.fullPath, cachedRes) client.NoteFileUploaded(n.fullPath, false) skippedc <- n return } } upload <- n }) ticker := time.NewTicker(500 * time.Millisecond) defer ticker.Stop() stattedc := t.stattedc Loop: for { select { case <-uploadsdonec: break Loop case n := <-rootc: root = n case n := <-uploadedc: t.uploaded.incr(n) lastUpload = n.fullPath case n := <-skippedc: t.skipped.incr(n) case n, ok := <-stattedc: if !ok { log.Printf("done stattting:") dumpStats() close(checkStatCache) stattedc = nil continue } lastStat = n.fullPath t.total.incr(n) checkStatCache <- n case <-ticker.C: dumpStats() } } log.Printf("tree upload finished. final stats:") dumpStats() if root == nil { panic("unexpected nil root node") } var err error log.Printf("Waiting on root node %q", root.fullPath) t.finalPutRes, err = root.PutResult() log.Printf("Waited on root node %q: %v", root.fullPath, t.finalPutRes) if err != nil { t.err = err } }
func (up *Uploader) uploadNodeRegularFile(n *node) (*client.PutResult, error) { filebb := schema.NewCommonFileMap(n.fullPath, n.fi) filebb.SetType("file") file, err := up.open(n.fullPath) if err != nil { return nil, err } defer file.Close() if up.fileOpts.exifTime { ra, ok := file.(io.ReaderAt) if !ok { return nil, errors.New("Error asserting local file to io.ReaderAt") } modtime, err := schema.FileTime(ra) if err != nil { log.Printf("warning: getting time from EXIF failed for %v: %v", n.fullPath, err) } else { filebb.SetModTime(modtime) } } var ( size = n.fi.Size() fileContents io.Reader = io.LimitReader(file, size) br *blobref.BlobRef // of file schemaref sum string // sha1 hashsum of the file to upload pr *client.PutResult // of the final "file" schema blob ) const dupCheckThreshold = 256 << 10 if size > dupCheckThreshold { sumRef, err := up.wholeFileDigest(n.fullPath) if err == nil { sum = sumRef.String() ok := false pr, ok = up.fileMapFromDuplicate(up.statReceiver(n), filebb, sum) if ok { br = pr.BlobRef client.NoteFileUploaded(n.fullPath, !pr.Skipped) if up.fileOpts.wantVivify() { // we can return early in that case, because the other options // are disallowed in the vivify case. return pr, nil } } } } if up.fileOpts.wantVivify() { // If vivify wasn't already done in fileMapFromDuplicate. err := schema.WriteFileChunks(up.statReceiver(n), filebb, fileContents) if err != nil { return nil, err } json, err := filebb.JSON() if err != nil { return nil, err } br = blobref.SHA1FromString(json) h := &client.UploadHandle{ BlobRef: br, Size: int64(len(json)), Contents: strings.NewReader(json), Vivify: true, } pr, err = up.Upload(h) if err != nil { return nil, err } client.NoteFileUploaded(n.fullPath, true) return pr, nil } if br == nil { // br still nil means fileMapFromDuplicate did not find the file on the server, // and the file has not just been uploaded subsequently to a vivify request. // So we do the full file + file schema upload here. if sum == "" && up.fileOpts.wantFilePermanode() { fileContents = &trackDigestReader{r: fileContents} } br, err = schema.WriteFileMap(up.statReceiver(n), filebb, fileContents) if err != nil { return nil, err } } // The work for those planned permanodes (and the claims) is redone // everytime we get here (i.e past the stat cache). However, they're // caught by the have cache, so they won't be reuploaded for nothing // at least. if up.fileOpts.wantFilePermanode() { if td, ok := fileContents.(*trackDigestReader); ok { sum = td.Sum() } // claimTime is both the time of the "claimDate" in the // JSON claim, as well as the date in the OpenPGP // header. // TODO(bradfitz): this is a little clumsy to do by hand. // There should probably be a method on *Uploader to do this // from an unsigned schema map. Maybe ditch the schema.Claimer // type and just have the Uploader override the claimDate. claimTime, ok := filebb.ModTime() if !ok { return nil, fmt.Errorf("couldn't get modtime for file %v", n.fullPath) } err = up.uploadFilePermanode(sum, br, claimTime) if err != nil { return nil, fmt.Errorf("Error uploading permanode for node %v: %v", n, err) } } // TODO(bradfitz): faking a PutResult here to return // is kinda gross. should instead make a // blobserver.Storage wrapper type (wrapping // statReceiver) that can track some of this? or make // schemaWriteFileMap return it? json, _ := filebb.JSON() pr = &client.PutResult{BlobRef: br, Size: int64(len(json)), Skipped: false} return pr, nil }