Esempio n. 1
0
func (a *archiver) stage2HashLoop() {
	defer close(a.stage3LookupChan)
	pool := common.NewGoroutinePriorityPool(a.maxConcurrentContains, a.canceler)
	defer func() {
		_ = pool.Wait()
	}()
	for file := range a.stage2HashChan {
		// This loop will implicitly buffer when stage1 is too fast by creating a
		// lot of hung goroutines in pool. This permits reducing the contention on
		// a.closeLock.
		// TODO(tandrii): Implement backpressure in GoroutinePool, e.g. when it
		// exceeds 20k or something similar.
		item := file
		pool.Schedule(item.priority, func() {
			// calcDigest calls setErr() and update wgHashed even on failure.
			end := tracer.Span(a, "hash", tracer.Args{"name": item.DisplayName()})
			if err := item.calcDigest(); err != nil {
				end(tracer.Args{"err": err})
				a.Cancel(err)
				item.Close()
				return
			}
			end(tracer.Args{"size": float64(item.digestItem.Size)})
			tracer.CounterAdd(a, "bytesHashed", float64(item.digestItem.Size))
			a.progress.Update(groupHash, groupHashDone, 1)
			a.progress.Update(groupHash, groupHashDoneSize, item.digestItem.Size)
			a.progress.Update(groupLookup, groupLookupTodo, 1)
			a.stage3LookupChan <- item
		}, func() {
			item.setErr(a.CancelationReason())
			item.wgHashed.Done()
			item.Close()
		})
	}
}
Esempio n. 2
0
func (i *isolateServer) Push(state *PushState, src io.ReadSeeker) (err error) {
	// This push operation may be a retry after failed finalization call below,
	// no need to reupload contents in that case.
	if !state.uploaded {
		// PUT file to uploadURL.
		if err = i.doPush(state, src); err != nil {
			log.Printf("doPush(%s) failed: %s\n%#v", state.digest, err, state)
			return
		}
		state.uploaded = true
	}

	// Optionally notify the server that it's done.
	if state.status.GSUploadURL != "" {
		end := tracer.Span(i, "finalize", nil)
		defer func() { end(tracer.Args{"err": err}) }()
		// TODO(vadimsh): Calculate MD5 or CRC32C sum while uploading a file and
		// send it to isolated server. That way isolate server can verify that
		// the data safely reached Google Storage (GS provides MD5 and CRC32C of
		// stored files).
		in := isolated.FinalizeRequest{state.status.UploadTicket}
		if err = i.postJSON("/_ah/api/isolateservice/v1/finalize_gs_upload", in, nil); err != nil {
			log.Printf("Push(%s) (finalize) failed: %s\n%#v", state.digest, err, state)
			return
		}
	}
	state.finalized = true
	return
}
Esempio n. 3
0
// Archive processes a .isolate, generates a .isolated and archive it.
// Returns a Future to the .isolated.
func Archive(arch archiver.Archiver, opts *ArchiveOptions) archiver.Future {
	displayName := filepath.Base(opts.Isolated)
	defer tracer.Span(arch, strings.SplitN(displayName, ".", 2)[0]+":archive", nil)(nil)
	f, err := archive(arch, opts, displayName)
	if err != nil {
		arch.Cancel(err)
		s := archiver.NewSimpleFuture(displayName)
		s.Finalize("", err)
		return s
	}
	return f
}
Esempio n. 4
0
func (i *isolateServer) doPush(state *PushState, src io.ReadSeeker) (err error) {
	useDB := state.status.GSUploadURL == ""
	end := tracer.Span(i, "push", tracer.Args{"useDB": useDB, "size": state.size})
	defer func() { end(tracer.Args{"err": err}) }()
	if useDB {
		err = i.doPushDB(state, src)
	} else {
		err = i.doPushGCS(state, src)
	}
	if err != nil {
		tracer.CounterAdd(i, "bytesUploaded", float64(state.size))
	}
	return err
}
Esempio n. 5
0
func (i *isolateServer) Contains(items []*isolated.DigestItem) (out []*PushState, err error) {
	end := tracer.Span(i, "contains", tracer.Args{"number": len(items)})
	defer func() { end(tracer.Args{"err": err}) }()
	in := isolated.DigestCollection{Items: items}
	in.Namespace.Namespace = i.namespace
	data := &isolated.URLCollection{}
	if err = i.postJSON("/_ah/api/isolateservice/v1/preupload", in, data); err != nil {
		return nil, err
	}
	out = make([]*PushState, len(items))
	for _, e := range data.Items {
		index := int(e.Index)
		out[index] = &PushState{
			status: e,
			digest: items[index].Digest,
			size:   items[index].Size,
		}
	}
	return out, nil
}
Esempio n. 6
0
func archive(arch archiver.Archiver, opts *ArchiveOptions, displayName string) (archiver.Future, error) {
	end := tracer.Span(arch, strings.SplitN(displayName, ".", 2)[0]+":loading", nil)
	filesCount, dirsCount, deps, rootDir, i, err := processing(opts)
	end(tracer.Args{"err": err})
	if err != nil {
		return nil, err
	}
	// Handle each dependency, either a file or a directory..
	fileFutures := make([]archiver.Future, 0, filesCount)
	dirFutures := make([]archiver.Future, 0, dirsCount)
	for _, dep := range deps {
		relPath, err := filepath.Rel(rootDir, dep)
		if err != nil {
			return nil, err
		}
		if dep[len(dep)-1] == os.PathSeparator {
			relPath, err := filepath.Rel(rootDir, dep)
			if err != nil {
				return nil, err
			}
			dirFutures = append(dirFutures, archiver.PushDirectory(arch, dep, relPath, opts.Blacklist))
		} else {
			// Grab the stats right away.
			info, err := os.Lstat(dep)
			if err != nil {
				return nil, err
			}
			mode := info.Mode()
			if mode&os.ModeSymlink == os.ModeSymlink {
				l, err := os.Readlink(dep)
				if err != nil {
					return nil, err
				}
				i.Files[relPath] = isolated.File{Link: newString(l)}
			} else {
				i.Files[relPath] = isolated.File{Mode: newInt(int(mode.Perm())), Size: newInt64(info.Size())}
				fileFutures = append(fileFutures, arch.PushFile(relPath, dep, -info.Size()))
			}
		}
	}

	for _, future := range fileFutures {
		future.WaitForHashed()
		if err = future.Error(); err != nil {
			return nil, err
		}
		f := i.Files[future.DisplayName()]
		f.Digest = future.Digest()
		i.Files[future.DisplayName()] = f
	}
	// Avoid duplicated entries in includes.
	// TODO(tandrii): add test to reproduce the problem.
	includesSet := map[isolated.HexDigest]bool{}
	for _, future := range dirFutures {
		future.WaitForHashed()
		if err = future.Error(); err != nil {
			return nil, err
		}
		includesSet[future.Digest()] = true
	}
	for digest := range includesSet {
		i.Includes = append(i.Includes, digest)
	}

	raw := &bytes.Buffer{}
	if err = json.NewEncoder(raw).Encode(i); err != nil {
		return nil, err
	}

	if err := ioutil.WriteFile(opts.Isolated, raw.Bytes(), 0644); err != nil {
		return nil, err
	}
	return arch.Push(displayName, bytes.NewReader(raw.Bytes()), 0), nil
}
Esempio n. 7
0
// PushDirectory walks a directory at root and creates a .isolated file.
//
// It walks the directories synchronously, then returns a Future to signal when
// the background work is completed. The future is signaled once all files are
// hashed. In particular, the Future is signaled before server side cache
// lookups and upload is completed. Use archiver.Close() to wait for
// completion.
//
// relDir is a relative directory to offset relative paths against in the
// generated .isolated file.
//
// blacklist is a list of globs of files to ignore.
func PushDirectory(a Archiver, root string, relDir string, blacklist []string) Future {
	total := 0
	end := tracer.Span(a, "PushDirectory", tracer.Args{"path": relDir, "root": root})
	defer func() { end(tracer.Args{"total": total}) }()
	c := make(chan *walkItem)
	go func() {
		walk(root, blacklist, c)
		close(c)
	}()

	displayName := filepath.Base(root) + ".isolated"
	i := isolated.Isolated{
		Algo:    "sha-1",
		Files:   map[string]isolated.File{},
		Version: isolated.IsolatedFormatVersion,
	}
	futures := []Future{}
	s := NewSimpleFuture(displayName)
	for item := range c {
		if s.Error() != nil {
			// Empty the queue.
			continue
		}
		if item.err != nil {
			s.Finalize("", item.err)
			continue
		}
		total++
		if relDir != "" {
			item.relPath = filepath.Join(relDir, item.relPath)
		}
		mode := item.info.Mode()
		if mode&os.ModeSymlink == os.ModeSymlink {
			l, err := os.Readlink(item.fullPath)
			if err != nil {
				s.Finalize("", fmt.Errorf("readlink(%s): %s", item.fullPath, err))
				continue
			}
			i.Files[item.relPath] = isolated.File{Link: newString(l)}
		} else {
			i.Files[item.relPath] = isolated.File{
				Mode: newInt(int(mode.Perm())),
				Size: newInt64(item.info.Size()),
			}
			futures = append(futures, a.PushFile(item.relPath, item.fullPath, -item.info.Size()))
		}
	}
	if s.Error() != nil {
		return s
	}
	log.Printf("PushDirectory(%s) = %d files", root, len(i.Files))

	// Hashing, cache lookups and upload is done asynchronously.
	go func() {
		var err error
		for _, future := range futures {
			future.WaitForHashed()
			if err = future.Error(); err != nil {
				break
			}
			name := future.DisplayName()
			d := i.Files[name]
			d.Digest = future.Digest()
			i.Files[name] = d
		}
		var d isolated.HexDigest
		if err == nil {
			raw := &bytes.Buffer{}
			if err = json.NewEncoder(raw).Encode(i); err == nil {
				if f := a.Push(displayName, bytes.NewReader(raw.Bytes()), 0); f != nil {
					f.WaitForHashed()
					err = f.Error()
					d = f.Digest()
				}
			}
		}
		s.Finalize(d, err)
	}()
	return s
}
Esempio n. 8
0
// walk() enumerates a directory tree synchronously and sends the items to
// channel c.
//
// blacklist is a list of globs of files to ignore. Each blacklist glob is
// relative to root.
func walk(root string, blacklist []string, c chan<- *walkItem) {
	// TODO(maruel): Walk() sorts the file names list, which is not needed here
	// and slows things down. Options:
	// #1 Use os.File.Readdir() directly. It's in the stdlib and works fine, but
	//    it's not the most efficient implementation. On posix it does a lstat()
	//    call, on Windows it does a Win32FileAttributeData.
	// #2 Use raw syscalls.
	//   - On POSIX, use syscall.ReadDirent(). See src/os/dir_unix.go.
	//   - On Windows, use syscall.FindFirstFile(), syscall.FindNextFile(),
	//     syscall.FindClose() directly. See src/os/file_windows.go. For odd
	//     reasons, Windows does not have a batched version to reduce the number
	//     of kernel calls. It's as if they didn't care about performance.
	//
	// In practice, #2 may not be needed, the performance of #1 may be good
	// enough relative to the other performance costs. This needs to be perf
	// tested at 100k+ files scale on Windows and OSX.
	//
	// TODO(maruel): Cache directory enumeration. In particular cases (Chromium),
	// the same directory may be enumerated multiple times. Caching the content
	// may be worth. This needs to be perf tested.

	total := 0
	end := tracer.Span(root, "walk:"+filepath.Base(root), nil)
	defer func() { end(tracer.Args{"root": root, "total": total}) }()
	// Check patterns upfront, so it has consistent behavior w.r.t. bad glob
	// patterns.
	for _, b := range blacklist {
		if _, err := filepath.Match(b, b); err != nil {
			c <- &walkItem{err: fmt.Errorf("bad blacklist pattern \"%s\"", b)}
			return
		}
	}
	if strings.HasSuffix(root, string(filepath.Separator)) {
		root = root[:len(root)-1]
	}
	rootLen := len(root) + 1
	err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
		total++
		if err != nil {
			return fmt.Errorf("walk(%s): %s", p, err)
		}
		if len(p) <= rootLen {
			// Root directory.
			return nil
		}
		relPath := p[rootLen:]
		for _, b := range blacklist {
			matched, _ := filepath.Match(b, relPath)
			if !matched {
				// Also check at the base file name.
				matched, _ = filepath.Match(b, filepath.Base(relPath))
			}
			if matched {
				// Must not return io.SkipDir for file, filepath.walk() handles this
				// badly.
				if info.IsDir() {
					return filepath.SkipDir
				}
				return nil
			}
		}
		if info.IsDir() {
			return nil
		}
		c <- &walkItem{fullPath: p, relPath: relPath, info: info}
		return nil
	})
	if err != nil {
		c <- &walkItem{err: err}
	}
}