func (a *archiver) stage2HashLoop() { defer close(a.stage3LookupChan) pool := common.NewGoroutinePriorityPool(a.maxConcurrentContains, a.canceler) defer func() { _ = pool.Wait() }() for file := range a.stage2HashChan { // This loop will implicitly buffer when stage1 is too fast by creating a // lot of hung goroutines in pool. This permits reducing the contention on // a.closeLock. // TODO(tandrii): Implement backpressure in GoroutinePool, e.g. when it // exceeds 20k or something similar. item := file pool.Schedule(item.priority, func() { // calcDigest calls setErr() and update wgHashed even on failure. end := tracer.Span(a, "hash", tracer.Args{"name": item.DisplayName()}) if err := item.calcDigest(); err != nil { end(tracer.Args{"err": err}) a.Cancel(err) item.Close() return } end(tracer.Args{"size": float64(item.digestItem.Size)}) tracer.CounterAdd(a, "bytesHashed", float64(item.digestItem.Size)) a.progress.Update(groupHash, groupHashDone, 1) a.progress.Update(groupHash, groupHashDoneSize, item.digestItem.Size) a.progress.Update(groupLookup, groupLookupTodo, 1) a.stage3LookupChan <- item }, func() { item.setErr(a.CancelationReason()) item.wgHashed.Done() item.Close() }) } }
func (i *isolateServer) Push(state *PushState, src io.ReadSeeker) (err error) { // This push operation may be a retry after failed finalization call below, // no need to reupload contents in that case. if !state.uploaded { // PUT file to uploadURL. if err = i.doPush(state, src); err != nil { log.Printf("doPush(%s) failed: %s\n%#v", state.digest, err, state) return } state.uploaded = true } // Optionally notify the server that it's done. if state.status.GSUploadURL != "" { end := tracer.Span(i, "finalize", nil) defer func() { end(tracer.Args{"err": err}) }() // TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and // send it to isolated server. That way isolate server can verify that // the data safely reached Google Storage (GS provides MD5 and CRC32C of // stored files). in := isolated.FinalizeRequest{state.status.UploadTicket} if err = i.postJSON("/_ah/api/isolateservice/v1/finalize_gs_upload", in, nil); err != nil { log.Printf("Push(%s) (finalize) failed: %s\n%#v", state.digest, err, state) return } } state.finalized = true return }
// Archive processes a .isolate, generates a .isolated and archive it. // Returns a Future to the .isolated. func Archive(arch archiver.Archiver, opts *ArchiveOptions) archiver.Future { displayName := filepath.Base(opts.Isolated) defer tracer.Span(arch, strings.SplitN(displayName, ".", 2)[0]+":archive", nil)(nil) f, err := archive(arch, opts, displayName) if err != nil { arch.Cancel(err) s := archiver.NewSimpleFuture(displayName) s.Finalize("", err) return s } return f }
func (i *isolateServer) doPush(state *PushState, src io.ReadSeeker) (err error) { useDB := state.status.GSUploadURL == "" end := tracer.Span(i, "push", tracer.Args{"useDB": useDB, "size": state.size}) defer func() { end(tracer.Args{"err": err}) }() if useDB { err = i.doPushDB(state, src) } else { err = i.doPushGCS(state, src) } if err != nil { tracer.CounterAdd(i, "bytesUploaded", float64(state.size)) } return err }
func (i *isolateServer) Contains(items []*isolated.DigestItem) (out []*PushState, err error) { end := tracer.Span(i, "contains", tracer.Args{"number": len(items)}) defer func() { end(tracer.Args{"err": err}) }() in := isolated.DigestCollection{Items: items} in.Namespace.Namespace = i.namespace data := &isolated.URLCollection{} if err = i.postJSON("/_ah/api/isolateservice/v1/preupload", in, data); err != nil { return nil, err } out = make([]*PushState, len(items)) for _, e := range data.Items { index := int(e.Index) out[index] = &PushState{ status: e, digest: items[index].Digest, size: items[index].Size, } } return out, nil }
func archive(arch archiver.Archiver, opts *ArchiveOptions, displayName string) (archiver.Future, error) { end := tracer.Span(arch, strings.SplitN(displayName, ".", 2)[0]+":loading", nil) filesCount, dirsCount, deps, rootDir, i, err := processing(opts) end(tracer.Args{"err": err}) if err != nil { return nil, err } // Handle each dependency, either a file or a directory.. fileFutures := make([]archiver.Future, 0, filesCount) dirFutures := make([]archiver.Future, 0, dirsCount) for _, dep := range deps { relPath, err := filepath.Rel(rootDir, dep) if err != nil { return nil, err } if dep[len(dep)-1] == os.PathSeparator { relPath, err := filepath.Rel(rootDir, dep) if err != nil { return nil, err } dirFutures = append(dirFutures, archiver.PushDirectory(arch, dep, relPath, opts.Blacklist)) } else { // Grab the stats right away. info, err := os.Lstat(dep) if err != nil { return nil, err } mode := info.Mode() if mode&os.ModeSymlink == os.ModeSymlink { l, err := os.Readlink(dep) if err != nil { return nil, err } i.Files[relPath] = isolated.File{Link: newString(l)} } else { i.Files[relPath] = isolated.File{Mode: newInt(int(mode.Perm())), Size: newInt64(info.Size())} fileFutures = append(fileFutures, arch.PushFile(relPath, dep, -info.Size())) } } } for _, future := range fileFutures { future.WaitForHashed() if err = future.Error(); err != nil { return nil, err } f := i.Files[future.DisplayName()] f.Digest = future.Digest() i.Files[future.DisplayName()] = f } // Avoid duplicated entries in includes. // TODO(tandrii): add test to reproduce the problem. includesSet := map[isolated.HexDigest]bool{} for _, future := range dirFutures { future.WaitForHashed() if err = future.Error(); err != nil { return nil, err } includesSet[future.Digest()] = true } for digest := range includesSet { i.Includes = append(i.Includes, digest) } raw := &bytes.Buffer{} if err = json.NewEncoder(raw).Encode(i); err != nil { return nil, err } if err := ioutil.WriteFile(opts.Isolated, raw.Bytes(), 0644); err != nil { return nil, err } return arch.Push(displayName, bytes.NewReader(raw.Bytes()), 0), nil }
// PushDirectory walks a directory at root and creates a .isolated file. // // It walks the directories synchronously, then returns a Future to signal when // the background work is completed. The future is signaled once all files are // hashed. In particular, the Future is signaled before server side cache // lookups and upload is completed. Use archiver.Close() to wait for // completion. // // relDir is a relative directory to offset relative paths against in the // generated .isolated file. // // blacklist is a list of globs of files to ignore. func PushDirectory(a Archiver, root string, relDir string, blacklist []string) Future { total := 0 end := tracer.Span(a, "PushDirectory", tracer.Args{"path": relDir, "root": root}) defer func() { end(tracer.Args{"total": total}) }() c := make(chan *walkItem) go func() { walk(root, blacklist, c) close(c) }() displayName := filepath.Base(root) + ".isolated" i := isolated.Isolated{ Algo: "sha-1", Files: map[string]isolated.File{}, Version: isolated.IsolatedFormatVersion, } futures := []Future{} s := NewSimpleFuture(displayName) for item := range c { if s.Error() != nil { // Empty the queue. continue } if item.err != nil { s.Finalize("", item.err) continue } total++ if relDir != "" { item.relPath = filepath.Join(relDir, item.relPath) } mode := item.info.Mode() if mode&os.ModeSymlink == os.ModeSymlink { l, err := os.Readlink(item.fullPath) if err != nil { s.Finalize("", fmt.Errorf("readlink(%s): %s", item.fullPath, err)) continue } i.Files[item.relPath] = isolated.File{Link: newString(l)} } else { i.Files[item.relPath] = isolated.File{ Mode: newInt(int(mode.Perm())), Size: newInt64(item.info.Size()), } futures = append(futures, a.PushFile(item.relPath, item.fullPath, -item.info.Size())) } } if s.Error() != nil { return s } log.Printf("PushDirectory(%s) = %d files", root, len(i.Files)) // Hashing, cache lookups and upload is done asynchronously. go func() { var err error for _, future := range futures { future.WaitForHashed() if err = future.Error(); err != nil { break } name := future.DisplayName() d := i.Files[name] d.Digest = future.Digest() i.Files[name] = d } var d isolated.HexDigest if err == nil { raw := &bytes.Buffer{} if err = json.NewEncoder(raw).Encode(i); err == nil { if f := a.Push(displayName, bytes.NewReader(raw.Bytes()), 0); f != nil { f.WaitForHashed() err = f.Error() d = f.Digest() } } } s.Finalize(d, err) }() return s }
// walk() enumerates a directory tree synchronously and sends the items to // channel c. // // blacklist is a list of globs of files to ignore. Each blacklist glob is // relative to root. func walk(root string, blacklist []string, c chan<- *walkItem) { // TODO(maruel): Walk() sorts the file names list, which is not needed here // and slows things down. Options: // #1 Use os.File.Readdir() directly. It's in the stdlib and works fine, but // it's not the most efficient implementation. On posix it does a lstat() // call, on Windows it does a Win32FileAttributeData. // #2 Use raw syscalls. // - On POSIX, use syscall.ReadDirent(). See src/os/dir_unix.go. // - On Windows, use syscall.FindFirstFile(), syscall.FindNextFile(), // syscall.FindClose() directly. See src/os/file_windows.go. For odd // reasons, Windows does not have a batched version to reduce the number // of kernel calls. It's as if they didn't care about performance. // // In practice, #2 may not be needed, the performance of #1 may be good // enough relative to the other performance costs. This needs to be perf // tested at 100k+ files scale on Windows and OSX. // // TODO(maruel): Cache directory enumeration. In particular cases (Chromium), // the same directory may be enumerated multiple times. Caching the content // may be worth. This needs to be perf tested. total := 0 end := tracer.Span(root, "walk:"+filepath.Base(root), nil) defer func() { end(tracer.Args{"root": root, "total": total}) }() // Check patterns upfront, so it has consistent behavior w.r.t. bad glob // patterns. for _, b := range blacklist { if _, err := filepath.Match(b, b); err != nil { c <- &walkItem{err: fmt.Errorf("bad blacklist pattern \"%s\"", b)} return } } if strings.HasSuffix(root, string(filepath.Separator)) { root = root[:len(root)-1] } rootLen := len(root) + 1 err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error { total++ if err != nil { return fmt.Errorf("walk(%s): %s", p, err) } if len(p) <= rootLen { // Root directory. return nil } relPath := p[rootLen:] for _, b := range blacklist { matched, _ := filepath.Match(b, relPath) if !matched { // Also check at the base file name. matched, _ = filepath.Match(b, filepath.Base(relPath)) } if matched { // Must not return io.SkipDir for file, filepath.walk() handles this // badly. if info.IsDir() { return filepath.SkipDir } return nil } } if info.IsDir() { return nil } c <- &walkItem{fullPath: p, relPath: relPath, info: info} return nil }) if err != nil { c <- &walkItem{err: err} } }