func testPipeWalkerRootWithPath(path string, t *testing.T) { pattern := filepath.Join(path, "*") rootPaths, err := filepath.Glob(pattern) OK(t, err) for i, p := range rootPaths { rootPaths[i], err = filepath.Rel(path, p) OK(t, err) } t.Logf("paths in %v (pattern %q) expanded to %v items", path, pattern, len(rootPaths)) done := make(chan struct{}) defer close(done) jobCh := make(chan pipe.Job) var jobs []pipe.Job worker := func(wg *sync.WaitGroup) { defer wg.Done() for job := range jobCh { jobs = append(jobs, job) } } var wg sync.WaitGroup wg.Add(1) go worker(&wg) filter := func(p string, fi os.FileInfo) bool { p, err := filepath.Rel(path, p) OK(t, err) return dirsInPath(p) <= 1 } resCh := make(chan pipe.Result, 1) pipe.Walk([]string{path}, filter, done, jobCh, resCh) wg.Wait() t.Logf("received %d jobs", len(jobs)) for i, job := range jobs[:len(jobs)-1] { path := job.Path() if path == "." || path == ".." || path == string(filepath.Separator) { t.Errorf("job %v has invalid path %q", i, path) } } lastPath := jobs[len(jobs)-1].Path() if lastPath != "" { t.Errorf("last job has non-empty path %q", lastPath) } if len(jobs) < len(rootPaths) { t.Errorf("want at least %v jobs, got %v for path %v\n", len(rootPaths), len(jobs), path) } }
func TestWalkTree(t *testing.T) { repo, cleanup := repository.TestRepository(t) defer cleanup() dirs, err := filepath.Glob(TestWalkerPath) OK(t, err) // archive a few files arch := archiver.New(repo) sn, _, err := arch.Snapshot(nil, dirs, nil, nil) OK(t, err) // flush repo, write all packs OK(t, repo.Flush()) done := make(chan struct{}) // start tree walker treeJobs := make(chan walk.TreeJob) go walk.Tree(repo, *sn.Tree, done, treeJobs) // start filesystem walker fsJobs := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) f := func(string, os.FileInfo) bool { return true } go pipe.Walk(dirs, f, done, fsJobs, resCh) for { // receive fs job fsJob, fsChOpen := <-fsJobs Assert(t, !fsChOpen || fsJob != nil, "received nil job from filesystem: %v %v", fsJob, fsChOpen) if fsJob != nil { OK(t, fsJob.Error()) } var path string fsEntries := 1 switch j := fsJob.(type) { case pipe.Dir: path = j.Path() fsEntries = len(j.Entries) case pipe.Entry: path = j.Path() } // receive tree job treeJob, treeChOpen := <-treeJobs treeEntries := 1 OK(t, treeJob.Error) if treeJob.Tree != nil { treeEntries = len(treeJob.Tree.Nodes) } Assert(t, fsChOpen == treeChOpen, "one channel closed too early: fsChOpen %v, treeChOpen %v", fsChOpen, treeChOpen) if !fsChOpen || !treeChOpen { break } Assert(t, filepath.Base(path) == filepath.Base(treeJob.Path), "paths do not match: %q != %q", filepath.Base(path), filepath.Base(treeJob.Path)) Assert(t, fsEntries == treeEntries, "wrong number of entries: %v != %v", fsEntries, treeEntries) } }
// Snapshot creates a snapshot of the given paths. If parentID is set, this is // used to compare the files to the ones archived at the time this snapshot was // taken. func (arch *Archiver) Snapshot(p *Progress, paths []string, parentID *backend.ID) (*Snapshot, backend.ID, error) { paths = unique(paths) sort.Sort(baseNameSlice(paths)) debug.Log("Archiver.Snapshot", "start for %v", paths) debug.RunHook("Archiver.Snapshot", nil) // signal the whole pipeline to stop done := make(chan struct{}) var err error p.Start() defer p.Done() // create new snapshot sn, err := NewSnapshot(paths) if err != nil { return nil, backend.ID{}, err } sn.Excludes = arch.Excludes jobs := archivePipe{} // use parent snapshot (if some was given) if parentID != nil { sn.Parent = parentID // load parent snapshot parent, err := LoadSnapshot(arch.repo, *parentID) if err != nil { return nil, backend.ID{}, err } // start walker on old tree ch := make(chan WalkTreeJob) go WalkTree(arch.repo, *parent.Tree, done, ch) jobs.Old = ch } else { // use closed channel ch := make(chan WalkTreeJob) close(ch) jobs.Old = ch } // start walker pipeCh := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go func() { pipe.Walk(paths, arch.SelectFilter, done, pipeCh, resCh) debug.Log("Archiver.Snapshot", "pipe.Walk done") }() jobs.New = pipeCh ch := make(chan pipe.Job) go jobs.compare(done, ch) var wg sync.WaitGroup entCh := make(chan pipe.Entry) dirCh := make(chan pipe.Dir) // split wg.Add(1) go func() { pipe.Split(ch, dirCh, entCh) debug.Log("Archiver.Snapshot", "split done") close(dirCh) close(entCh) wg.Done() }() // run workers for i := 0; i < maxConcurrency; i++ { wg.Add(2) go arch.fileWorker(&wg, p, done, entCh) go arch.dirWorker(&wg, p, done, dirCh) } // run index saver var wgIndexSaver sync.WaitGroup stopIndexSaver := make(chan struct{}) wgIndexSaver.Add(1) go arch.saveIndexes(&wgIndexSaver, stopIndexSaver) // wait for all workers to terminate debug.Log("Archiver.Snapshot", "wait for workers") wg.Wait() // stop index saver close(stopIndexSaver) wgIndexSaver.Wait() debug.Log("Archiver.Snapshot", "workers terminated") // receive the top-level tree root := (<-resCh).(*Node) debug.Log("Archiver.Snapshot", "root node received: %v", root.Subtree.Str()) sn.Tree = root.Subtree // save snapshot id, err := arch.repo.SaveJSONUnpacked(backend.Snapshot, sn) if err != nil { return nil, backend.ID{}, err } // store ID in snapshot struct sn.id = &id debug.Log("Archiver.Snapshot", "saved snapshot %v", id.Str()) // flush repository err = arch.repo.Flush() if err != nil { return nil, backend.ID{}, err } // save index err = arch.repo.SaveIndex() if err != nil { debug.Log("Archiver.Snapshot", "error saving index: %v", err) return nil, backend.ID{}, err } debug.Log("Archiver.Snapshot", "saved indexes") return sn, id, nil }
func TestPipelineWalkerWithSplit(t *testing.T) { if TestWalkerPath == "" { t.Skipf("walkerpath not set, skipping TestPipelineWalker") } var err error if !filepath.IsAbs(TestWalkerPath) { TestWalkerPath, err = filepath.Abs(TestWalkerPath) OK(t, err) } before, err := statPath(TestWalkerPath) OK(t, err) t.Logf("walking path %s with %d dirs, %d files", TestWalkerPath, before.dirs, before.files) // account for top level dir before.dirs++ after := stats{} m := sync.Mutex{} worker := func(wg *sync.WaitGroup, done <-chan struct{}, entCh <-chan pipe.Entry, dirCh <-chan pipe.Dir) { defer wg.Done() for { select { case e, ok := <-entCh: if !ok { // channel is closed return } m.Lock() after.files++ m.Unlock() e.Result() <- true case dir, ok := <-dirCh: if !ok { // channel is closed return } // wait for all content for _, ch := range dir.Entries { <-ch } m.Lock() after.dirs++ m.Unlock() dir.Result() <- true case <-done: // pipeline was cancelled return } } } var wg sync.WaitGroup done := make(chan struct{}) entCh := make(chan pipe.Entry) dirCh := make(chan pipe.Dir) for i := 0; i < maxWorkers; i++ { wg.Add(1) go worker(&wg, done, entCh, dirCh) } jobs := make(chan pipe.Job, 200) wg.Add(1) go func() { pipe.Split(jobs, dirCh, entCh) close(entCh) close(dirCh) wg.Done() }() resCh := make(chan pipe.Result, 1) pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) // wait for all workers to terminate wg.Wait() // wait for top-level blob <-resCh t.Logf("walked path %s with %d dirs, %d files", TestWalkerPath, after.dirs, after.files) Assert(t, before == after, "stats do not match, expected %v, got %v", before, after) }
func TestPipelineWalkerMultiple(t *testing.T) { if TestWalkerPath == "" { t.Skipf("walkerpath not set, skipping TestPipelineWalker") } paths, err := filepath.Glob(filepath.Join(TestWalkerPath, "*")) OK(t, err) before, err := statPath(TestWalkerPath) OK(t, err) t.Logf("walking paths %v with %d dirs, %d files", paths, before.dirs, before.files) after := stats{} m := sync.Mutex{} worker := func(wg *sync.WaitGroup, done <-chan struct{}, jobs <-chan pipe.Job) { defer wg.Done() for { select { case job, ok := <-jobs: if !ok { // channel is closed return } Assert(t, job != nil, "job is nil") switch j := job.(type) { case pipe.Dir: // wait for all content for _, ch := range j.Entries { <-ch } m.Lock() after.dirs++ m.Unlock() j.Result() <- true case pipe.Entry: m.Lock() after.files++ m.Unlock() j.Result() <- true } case <-done: // pipeline was cancelled return } } } var wg sync.WaitGroup done := make(chan struct{}) jobs := make(chan pipe.Job) for i := 0; i < maxWorkers; i++ { wg.Add(1) go worker(&wg, done, jobs) } resCh := make(chan pipe.Result, 1) pipe.Walk(paths, acceptAll, done, jobs, resCh) // wait for all workers to terminate wg.Wait() // wait for top-level blob <-resCh t.Logf("walked %d paths with %d dirs, %d files", len(paths), after.dirs, after.files) Assert(t, before == after, "stats do not match, expected %v, got %v", before, after) }
func BenchmarkPipelineWalker(b *testing.B) { if TestWalkerPath == "" { b.Skipf("walkerpath not set, skipping BenchPipelineWalker") } var max time.Duration m := sync.Mutex{} fileWorker := func(wg *sync.WaitGroup, done <-chan struct{}, ch <-chan pipe.Entry) { defer wg.Done() for { select { case e, ok := <-ch: if !ok { // channel is closed return } // simulate backup //time.Sleep(10 * time.Millisecond) e.Result() <- true case <-done: // pipeline was cancelled return } } } dirWorker := func(wg *sync.WaitGroup, done <-chan struct{}, ch <-chan pipe.Dir) { defer wg.Done() for { select { case dir, ok := <-ch: if !ok { // channel is closed return } start := time.Now() // wait for all content for _, ch := range dir.Entries { <-ch } d := time.Since(start) m.Lock() if d > max { max = d } m.Unlock() dir.Result() <- true case <-done: // pipeline was cancelled return } } } for i := 0; i < b.N; i++ { max = 0 done := make(chan struct{}) entCh := make(chan pipe.Entry, 200) dirCh := make(chan pipe.Dir, 200) var wg sync.WaitGroup b.Logf("starting %d workers", maxWorkers) for i := 0; i < maxWorkers; i++ { wg.Add(2) go dirWorker(&wg, done, dirCh) go fileWorker(&wg, done, entCh) } jobs := make(chan pipe.Job, 200) wg.Add(1) go func() { pipe.Split(jobs, dirCh, entCh) close(entCh) close(dirCh) wg.Done() }() resCh := make(chan pipe.Result, 1) pipe.Walk([]string{TestWalkerPath}, acceptAll, done, jobs, resCh) // wait for all workers to terminate wg.Wait() // wait for final result <-resCh b.Logf("max duration for a dir: %v", max) } }
func TestPipeWalkerError(t *testing.T) { dir, err := ioutil.TempDir("", "restic-test-") OK(t, err) base := filepath.Base(dir) var testjobs = []struct { path []string err bool }{ {[]string{base, "a", "file_a"}, false}, {[]string{base, "a"}, false}, {[]string{base, "b"}, true}, {[]string{base, "c", "file_c"}, false}, {[]string{base, "c"}, false}, {[]string{base}, false}, {[]string{}, false}, } OK(t, os.Mkdir(filepath.Join(dir, "a"), 0755)) OK(t, os.Mkdir(filepath.Join(dir, "b"), 0755)) OK(t, os.Mkdir(filepath.Join(dir, "c"), 0755)) OK(t, createFile(filepath.Join(dir, "a", "file_a"), "file a")) OK(t, createFile(filepath.Join(dir, "b", "file_b"), "file b")) OK(t, createFile(filepath.Join(dir, "c", "file_c"), "file c")) ranHook := false testdir := filepath.Join(dir, "b") // install hook that removes the dir right before readdirnames() debug.Hook("pipe.readdirnames", func(context interface{}) { path := context.(string) if path != testdir { return } t.Logf("in hook, removing test file %v", testdir) ranHook = true OK(t, os.RemoveAll(testdir)) }) done := make(chan struct{}) ch := make(chan pipe.Job) resCh := make(chan pipe.Result, 1) go pipe.Walk([]string{dir}, acceptAll, done, ch, resCh) i := 0 for job := range ch { if i == len(testjobs) { t.Errorf("too many jobs received") break } p := filepath.Join(testjobs[i].path...) if p != job.Path() { t.Errorf("job %d has wrong path: expected %q, got %q", i, p, job.Path()) } if testjobs[i].err { if job.Error() == nil { t.Errorf("job %d expected error but got nil", i) } } else { if job.Error() != nil { t.Errorf("job %d expected no error but got %v", i, job.Error()) } } i++ } if i != len(testjobs) { t.Errorf("expected %d jobs, got %d", len(testjobs), i) } close(done) Assert(t, ranHook, "hook did not run") OK(t, os.RemoveAll(dir)) }