// handleFile queues the copies and pulls as necessary for a single new or // changed file. func (p *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) { curFile, ok := p.model.CurrentFolderFile(p.folder, file.Name) if ok && len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) { // We are supposed to copy the entire file, and then fetch nothing. We // are only updating metadata, so we don't actually *need* to make the // copy. if debug { l.Debugln(p, "taking shortcut on", file.Name) } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "metadata", }) p.queue.Done(file.Name) var err error if file.IsSymlink() { err = p.shortcutSymlink(file) } else { err = p.shortcutFile(file) } events.Default.Log(events.ItemFinished, map[string]interface{}{ "folder": p.folder, "item": file.Name, "error": events.Error(err), "type": "file", "action": "metadata", }) if err != nil { l.Infoln("Puller: shortcut:", err) p.newError(file.Name, err) } else { p.dbUpdates <- dbUpdateJob{file, dbUpdateShortcutFile} } return } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "update", }) scanner.PopulateOffsets(file.Blocks) // Figure out the absolute filenames we need once and for all tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name)) realName := filepath.Join(p.dir, file.Name) reused := 0 var blocks []protocol.BlockInfo // Check for an old temporary file which might have some blocks we could // reuse. tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]struct{}, len(tempCopyBlocks)) for _, block := range tempCopyBlocks { existingBlocks[block.String()] = struct{}{} } // Since the blocks are already there, we don't need to get them. for _, block := range file.Blocks { _, ok := existingBlocks[block.String()] if !ok { blocks = append(blocks, block) } } // The sharedpullerstate will know which flags to use when opening the // temp file depending if we are reusing any blocks or not. reused = len(file.Blocks) - len(blocks) if reused == 0 { // Otherwise, discard the file ourselves in order for the // sharedpuller not to panic when it fails to exclusively create a // file which already exists os.Remove(tempName) } } else { blocks = file.Blocks } s := sharedPullerState{ file: file, folder: p.folder, tempName: tempName, realName: realName, copyTotal: len(blocks), copyNeeded: len(blocks), reused: reused, ignorePerms: p.ignorePermissions(file), version: curFile.Version, mut: sync.NewMutex(), } if debug { l.Debugf("%v need file %s; copy %d, reused %v", p, file.Name, len(blocks), reused) } cs := copyBlocksState{ sharedPullerState: &s, blocks: blocks, } copyChan <- cs }
// handleFile queues the copies and pulls as necessary for a single new or // changed file. func (p *Puller) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) { curFile := p.model.CurrentFolderFile(p.folder, file.Name) if len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) { // We are supposed to copy the entire file, and then fetch nothing. We // are only updating metadata, so we don't actually *need* to make the // copy. if debug { l.Debugln(p, "taking shortcut on", file.Name) } if file.IsSymlink() { p.shortcutSymlink(curFile, file) } else { p.shortcutFile(file) } return } scanner.PopulateOffsets(file.Blocks) // Figure out the absolute filenames we need once and for all tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name)) realName := filepath.Join(p.dir, file.Name) reused := 0 var blocks []protocol.BlockInfo // Check for an old temporary file which might have some blocks we could // reuse. tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]bool, len(tempCopyBlocks)) for _, block := range tempCopyBlocks { existingBlocks[block.String()] = true } // Since the blocks are already there, we don't need to get them. for _, block := range file.Blocks { _, ok := existingBlocks[block.String()] if !ok { blocks = append(blocks, block) } } // The sharedpullerstate will know which flags to use when opening the // temp file depending if we are reusing any blocks or not. reused = len(file.Blocks) - len(blocks) if reused == 0 { // Otherwise, discard the file ourselves in order for the // sharedpuller not to panic when it fails to exlusively create a // file which already exists os.Remove(tempName) } } else { blocks = file.Blocks } s := sharedPullerState{ file: file, folder: p.folder, tempName: tempName, realName: realName, copyTotal: uint32(len(blocks)), copyNeeded: uint32(len(blocks)), reused: uint32(reused), } if debug { l.Debugf("%v need file %s; copy %d, reused %v", p, file.Name, len(blocks), reused) } cs := copyBlocksState{ sharedPullerState: &s, blocks: blocks, } copyChan <- cs }
// pullerIteration runs a single puller iteration for the given folder and // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. func (p *rwFolder) pullerIteration(ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) updateWg := sync.NewWaitGroup() copyWg := sync.NewWaitGroup() pullWg := sync.NewWaitGroup() doneWg := sync.NewWaitGroup() if debug { l.Debugln(p, "c", p.copiers, "p", p.pullers) } p.dbUpdates = make(chan dbUpdateJob) updateWg.Add(1) go func() { // dbUpdaterRoutine finishes when p.dbUpdates is closed p.dbUpdaterRoutine() updateWg.Done() }() for i := 0; i < p.copiers; i++ { copyWg.Add(1) go func() { // copierRoutine finishes when copyChan is closed p.copierRoutine(copyChan, pullChan, finisherChan) copyWg.Done() }() } for i := 0; i < p.pullers; i++ { pullWg.Add(1) go func() { // pullerRoutine finishes when pullChan is closed p.pullerRoutine(pullChan, finisherChan) pullWg.Done() }() } doneWg.Add(1) // finisherRoutine finishes when finisherChan is closed go func() { p.finisherRoutine(finisherChan) doneWg.Done() }() p.model.fmut.RLock() folderFiles := p.model.folderFiles[p.folder] p.model.fmut.RUnlock() // !!! // WithNeed takes a database snapshot (by necessity). By the time we've // handled a bunch of files it might have become out of date and we might // be attempting to sync with an old version of a file... // !!! changed := 0 fileDeletions := map[string]protocol.FileInfo{} dirDeletions := []protocol.FileInfo{} buckets := map[string][]protocol.FileInfo{} folderFiles.WithNeed(protocol.LocalDeviceID, func(intf db.FileIntf) bool { // Needed items are delivered sorted lexicographically. We'll handle // directories as they come along, so parents before children. Files // are queued and the order may be changed later. file := intf.(protocol.FileInfo) if ignores.Match(file.Name) { // This is an ignored file. Skip it, continue iteration. return true } if debug { l.Debugln(p, "handling", file.Name) } switch { case file.IsDeleted(): // A deleted file, directory or symlink if file.IsDirectory() { dirDeletions = append(dirDeletions, file) } else { fileDeletions[file.Name] = file df, ok := p.model.CurrentFolderFile(p.folder, file.Name) // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong // type if we haven't yet managed to pull it. if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() { // Put files into buckets per first hash key := string(df.Blocks[0].Hash) buckets[key] = append(buckets[key], df) } } case file.IsDirectory() && !file.IsSymlink(): // A new or changed directory if debug { l.Debugln("Creating directory", file.Name) } p.handleDir(file) default: // A new or changed file or symlink. This is the only case where we // do stuff concurrently in the background p.queue.Push(file.Name, file.Size(), file.Modified) } changed++ return true }) // Reorder the file queue according to configuration switch p.order { case config.OrderRandom: p.queue.Shuffle() case config.OrderAlphabetic: // The queue is already in alphabetic order. case config.OrderSmallestFirst: p.queue.SortSmallestFirst() case config.OrderLargestFirst: p.queue.SortLargestFirst() case config.OrderOldestFirst: p.queue.SortOldestFirst() case config.OrderNewestFirst: p.queue.SortOldestFirst() } // Process the file queue nextFile: for { fileName, ok := p.queue.Pop() if !ok { break } f, ok := p.model.CurrentGlobalFile(p.folder, fileName) if !ok { // File is no longer in the index. Mark it as done and drop it. p.queue.Done(fileName) continue } // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong type if // the global index changed while we were processing this iteration. if !f.IsDeleted() && !f.IsSymlink() && !f.IsDirectory() { key := string(f.Blocks[0].Hash) for i, candidate := range buckets[key] { if scanner.BlocksEqual(candidate.Blocks, f.Blocks) { // Remove the candidate from the bucket lidx := len(buckets[key]) - 1 buckets[key][i] = buckets[key][lidx] buckets[key] = buckets[key][:lidx] // candidate is our current state of the file, where as the // desired state with the delete bit set is in the deletion // map. desired := fileDeletions[candidate.Name] // Remove the pending deletion (as we perform it by renaming) delete(fileDeletions, candidate.Name) p.renameFile(desired, f) p.queue.Done(fileName) continue nextFile } } } // Not a rename or a symlink, deal with it. p.handleFile(f, copyChan, finisherChan) } // Signal copy and puller routines that we are done with the in data for // this iteration. Wait for them to finish. close(copyChan) copyWg.Wait() close(pullChan) pullWg.Wait() // Signal the finisher chan that there will be no more input. close(finisherChan) // Wait for the finisherChan to finish. doneWg.Wait() for _, file := range fileDeletions { if debug { l.Debugln("Deleting file", file.Name) } p.deleteFile(file) } for i := range dirDeletions { dir := dirDeletions[len(dirDeletions)-i-1] if debug { l.Debugln("Deleting dir", dir.Name) } p.deleteDir(dir) } // Wait for db updates to complete close(p.dbUpdates) updateWg.Wait() return changed }
// pullerIteration runs a single puller iteration for the given folder and // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. func (p *Puller) pullerIteration(ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) var copyWg sync.WaitGroup var pullWg sync.WaitGroup var doneWg sync.WaitGroup if debug { l.Debugln(p, "c", p.copiers, "p", p.pullers) } for i := 0; i < p.copiers; i++ { copyWg.Add(1) go func() { // copierRoutine finishes when copyChan is closed p.copierRoutine(copyChan, pullChan, finisherChan) copyWg.Done() }() } for i := 0; i < p.pullers; i++ { pullWg.Add(1) go func() { // pullerRoutine finishes when pullChan is closed p.pullerRoutine(pullChan, finisherChan) pullWg.Done() }() } doneWg.Add(1) // finisherRoutine finishes when finisherChan is closed go func() { p.finisherRoutine(finisherChan) doneWg.Done() }() p.model.fmut.RLock() folderFiles := p.model.folderFiles[p.folder] p.model.fmut.RUnlock() // !!! // WithNeed takes a database snapshot (by necessity). By the time we've // handled a bunch of files it might have become out of date and we might // be attempting to sync with an old version of a file... // !!! changed := 0 fileDeletions := map[string]protocol.FileInfo{} dirDeletions := []protocol.FileInfo{} buckets := map[string][]protocol.FileInfo{} folderFiles.WithNeed(protocol.LocalDeviceID, func(intf db.FileIntf) bool { // Needed items are delivered sorted lexicographically. This isn't // really optimal from a performance point of view - it would be // better if files were handled in random order, to spread the load // over the cluster. But it means that we can be sure that we fully // handle directories before the files that go inside them, which is // nice. file := intf.(protocol.FileInfo) if ignores.Match(file.Name) { // This is an ignored file. Skip it, continue iteration. return true } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, }) if debug { l.Debugln(p, "handling", file.Name) } switch { case file.IsDeleted(): // A deleted file, directory or symlink if file.IsDirectory() { dirDeletions = append(dirDeletions, file) } else { fileDeletions[file.Name] = file df, ok := p.model.CurrentFolderFile(p.folder, file.Name) // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed if ok && !df.IsDeleted() { // Put files into buckets per first hash key := string(df.Blocks[0].Hash) buckets[key] = append(buckets[key], df) } } case file.IsDirectory() && !file.IsSymlink(): // A new or changed directory p.handleDir(file) default: // A new or changed file or symlink. This is the only case where we // do stuff concurrently in the background p.queue.Push(file.Name) } changed++ return true }) nextFile: for { fileName, ok := p.queue.Pop() if !ok { break } f, ok := p.model.CurrentGlobalFile(p.folder, fileName) if !ok { // File is no longer in the index. Mark it as done and drop it. p.queue.Done(fileName) continue } // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed if !f.IsSymlink() && !f.IsDeleted() { key := string(f.Blocks[0].Hash) for i, candidate := range buckets[key] { if scanner.BlocksEqual(candidate.Blocks, f.Blocks) { // Remove the candidate from the bucket l := len(buckets[key]) - 1 buckets[key][i] = buckets[key][l] buckets[key] = buckets[key][:l] // Remove the pending deletion (as we perform it by renaming) delete(fileDeletions, candidate.Name) p.renameFile(candidate, f) p.queue.Done(fileName) continue nextFile } } } // Not a rename or a symlink, deal with it. p.handleFile(f, copyChan, finisherChan) } // Signal copy and puller routines that we are done with the in data for // this iteration. Wait for them to finish. close(copyChan) copyWg.Wait() close(pullChan) pullWg.Wait() // Signal the finisher chan that there will be no more input. close(finisherChan) // Wait for the finisherChan to finish. doneWg.Wait() for _, file := range fileDeletions { p.deleteFile(file) } for i := range dirDeletions { p.deleteDir(dirDeletions[len(dirDeletions)-i-1]) } return changed }