func TestPipelineWalkerWithSplit(t *testing.T) { if TestWalkerPath == "" { t.Skipf("walkerpath not set, skipping TestPipelineWalker") } var err error if !filepath.IsAbs(TestWalkerPath) { TestWalkerPath, err = filepath.Abs(TestWalkerPath) OK(t, err) } before, err := statPath(TestWalkerPath) OK(t, err) t.Logf("walking path %s with %d dirs, %d files", TestWalkerPath, before.dirs, before.files) // account for top level dir before.dirs++ after := stats{} m := sync.Mutex{} worker := func(wg *sync.WaitGroup, done <-chan struct{}, entCh <-chan pipe.Entry, dirCh <-chan pipe.Dir) { defer wg.Done() for { select { case e, ok := <-entCh: if !ok { // channel is closed return } m.Lock() after.files++ m.Unlock() e.Result() <- true case dir, ok := <-dirCh: if !ok { // channel is closed return } // wait for all content for _, ch := range dir.Entries { <-ch } m.Lock() after.dirs++ m.Unlock() dir.Result() <- true case <-done: // pipeline was cancelled return } } } var wg sync.WaitGroup done := make(chan struct{}) entCh := make(chan pipe.Entry) dirCh := make(chan pipe.Dir) for i := 0; i < maxWorkers; i++ { wg.Add(1) go worker(&wg, done, entCh, dirCh) } jobs := make(chan pipe.Job, 200) wg.Add(1) go func() { pipe.Split(jobs, dirCh, entCh) close(entCh) close(dirCh) wg.Done() }() resCh := make(chan pipe.Result, 1) pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) // wait for all workers to terminate wg.Wait() // wait for top-level blob <-resCh t.Logf("walked path %s with %d dirs, %d files", TestWalkerPath, after.dirs, after.files) Assert(t, before == after, "stats do not match, expected %v, got %v", before, after) }
// Snapshot creates a snapshot of the given paths. If parentID is set, this is // used to compare the files to the ones archived at the time this snapshot was // taken. func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID *backend.ID) (*Snapshot, backend.ID, error) { paths = unique(paths) sort.Sort(baseNameSlice(paths)) debug.Log("Archiver.Snapshot", "start for %v", paths) debug.RunHook("Archiver.Snapshot", nil) // signal the whole pipeline to stop done := make(chan struct{}) var err error p.Start() defer p.Done() // create new snapshot sn, err := NewSnapshot(paths) if err != nil { return nil, backend.ID{}, err } sn.Excludes = arch.Excludes jobs := archivePipe{} // use parent snapshot (if some was given) if parentID != nil { sn.Parent = parentID // load parent snapshot parent, err := LoadSnapshot(arch.repo, *parentID) if err != nil { return nil, backend.ID{}, err } // start walker on old tree ch := make(chan WalkTreeJob) go WalkTree(arch.repo, *parent.Tree, done, ch) jobs.Old = ch } else { // use closed channel ch := make(chan WalkTreeJob) close(ch) jobs.Old = ch } // start walker pipeCh := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go func() { pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh) debug.Log("Archiver.Snapshot", "pipe.Walk done") }() jobs.New = pipeCh ch := make(chan pipe.Job) go jobs.compare(done, ch) var wg sync.WaitGroup entCh := make(chan pipe.Entry) dirCh := make(chan pipe.Dir) // split wg.Add(1) go func() { pipe.Split(ch, dirCh, entCh) debug.Log("Archiver.Snapshot", "split done") close(dirCh) close(entCh) wg.Done() }() // run workers for i := 0; i < maxConcurrency; i++ { wg.Add(2) go arch.fileWorker(&wg, p, done, entCh) go arch.dirWorker(&wg, p, done, dirCh) } // run index saver var wgIndexSaver sync.WaitGroup stopIndexSaver := make(chan struct{}) wgIndexSaver.Add(1) go arch.saveIndexes(&wgIndexSaver, stopIndexSaver) // wait for all workers to terminate debug.Log("Archiver.Snapshot", "wait for workers") wg.Wait() // stop index saver close(stopIndexSaver) wgIndexSaver.Wait() debug.Log("Archiver.Snapshot", "workers terminated") // receive the top-level tree root := (<-resCh).(*Node) debug.Log("Archiver.Snapshot", "root node received: %v", root.Subtree.Str()) sn.Tree = root.Subtree // save snapshot id, err := arch.repo.SaveJSONUnpacked(backend.Snapshot, sn) if err != nil { return nil, backend.ID{}, err } // store ID in snapshot struct sn.id = &id debug.Log("Archiver.Snapshot", "saved snapshot %v", id.Str()) // flush repository err = arch.repo.Flush() if err != nil { return nil, backend.ID{}, err } // save index err = arch.repo.SaveIndex() if err != nil { debug.Log("Archiver.Snapshot", "error saving index: %v", err) return nil, backend.ID{}, err } debug.Log("Archiver.Snapshot", "saved indexes") return sn, id, nil }
func BenchmarkPipelineWalker(b *testing.B) { if TestWalkerPath == "" { b.Skipf("walkerpath not set, skipping BenchPipelineWalker") } var max time.Duration m := sync.Mutex{} fileWorker := func(wg *sync.WaitGroup, done <-chan struct{}, ch <-chan pipe.Entry) { defer wg.Done() for { select { case e, ok := <-ch: if !ok { // channel is closed return } // simulate backup //time.Sleep(10 * time.Millisecond) e.Result() <- true case <-done: // pipeline was cancelled return } } } dirWorker := func(wg *sync.WaitGroup, done <-chan struct{}, ch <-chan pipe.Dir) { defer wg.Done() for { select { case dir, ok := <-ch: if !ok { // channel is closed return } start := time.Now() // wait for all content for _, ch := range dir.Entries { <-ch } d := time.Since(start) m.Lock() if d > max { max = d } m.Unlock() dir.Result() <- true case <-done: // pipeline was cancelled return } } } for i := 0; i < b.N; i++ { max = 0 done := make(chan struct{}) entCh := make(chan pipe.Entry, 200) dirCh := make(chan pipe.Dir, 200) var wg sync.WaitGroup b.Logf("starting %d workers", maxWorkers) for i := 0; i < maxWorkers; i++ { wg.Add(2) go dirWorker(&wg, done, dirCh) go fileWorker(&wg, done, entCh) } jobs := make(chan pipe.Job, 200) wg.Add(1) go func() { pipe.Split(jobs, dirCh, entCh) close(entCh) close(dirCh) wg.Done() }() resCh := make(chan pipe.Result, 1) pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) // wait for all workers to terminate wg.Wait() // wait for final result <-resCh b.Logf("max duration for a dir: %v", max) } }