// handleFile queues the copies and pulls as necessary for a single new or // changed file. func (f *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) { curFile, hasCurFile := f.model.CurrentFolderFile(f.folderID, file.Name) if hasCurFile && len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) { // We are supposed to copy the entire file, and then fetch nothing. We // are only updating metadata, so we don't actually *need* to make the // copy. l.Debugln(f, "taking shortcut on", file.Name) events.Default.Log(events.ItemStarted, map[string]string{ "folder": f.folderID, "item": file.Name, "type": "file", "action": "metadata", }) f.queue.Done(file.Name) err := f.shortcutFile(file) events.Default.Log(events.ItemFinished, map[string]interface{}{ "folder": f.folderID, "item": file.Name, "error": events.Error(err), "type": "file", "action": "metadata", }) if err != nil { l.Infoln("Puller: shortcut:", err) f.newError(file.Name, err) } else { f.dbUpdates <- dbUpdateJob{file, dbUpdateShortcutFile} } return } // Figure out the absolute filenames we need once and for all tempName, err := rootedJoinedPath(f.dir, defTempNamer.TempName(file.Name)) if err != nil { f.newError(file.Name, err) return } realName, err := rootedJoinedPath(f.dir, file.Name) if err != nil { f.newError(file.Name, err) return } if hasCurFile && !curFile.IsDirectory() && !curFile.IsSymlink() { // Check that the file on disk is what we expect it to be according to // the database. If there's a mismatch here, there might be local // changes that we don't know about yet and we should scan before // touching the file. If we can't stat the file we'll just pull it. if info, err := f.mtimeFS.Lstat(realName); err == nil { if !info.ModTime().Equal(curFile.ModTime()) || info.Size() != curFile.Size { l.Debugln("file modified but not rescanned; not pulling:", realName) // Scan() is synchronous (i.e. blocks until the scan is // completed and returns an error), but a scan can't happen // while we're in the puller routine. Request the scan in the // background and it'll be handled when the current pulling // sweep is complete. As we do retries, we'll queue the scan // for this file up to ten times, but the last nine of those // scans will be cheap... go f.scan.Scan([]string{file.Name}) return } } } scanner.PopulateOffsets(file.Blocks) var blocks []protocol.BlockInfo var blocksSize int64 var reused []int32 // Check for an old temporary file which might have some blocks we could // reuse. tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, nil) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]struct{}, len(tempCopyBlocks)) for _, block := range tempCopyBlocks { existingBlocks[block.String()] = struct{}{} } // Since the blocks are already there, we don't need to get them. for i, block := range file.Blocks { _, ok := existingBlocks[block.String()] if !ok { blocks = append(blocks, block) blocksSize += int64(block.Size) } else { reused = append(reused, int32(i)) } } // The sharedpullerstate will know which flags to use when opening the // temp file depending if we are reusing any blocks or not. if len(reused) == 0 { // Otherwise, discard the file ourselves in order for the // sharedpuller not to panic when it fails to exclusively create a // file which already exists osutil.InWritableDir(os.Remove, tempName) } } else { // Copy the blocks, as we don't want to shuffle them on the FileInfo blocks = append(blocks, file.Blocks...) blocksSize = file.Size } if f.checkFreeSpace { if free, err := osutil.DiskFreeBytes(f.dir); err == nil && free < blocksSize { l.Warnf(`Folder "%s": insufficient disk space in %s for %s: have %.2f MiB, need %.2f MiB`, f.folderID, f.dir, file.Name, float64(free)/1024/1024, float64(blocksSize)/1024/1024) f.newError(file.Name, errors.New("insufficient space")) return } } // Shuffle the blocks for i := range blocks { j := rand.Intn(i + 1) blocks[i], blocks[j] = blocks[j], blocks[i] } events.Default.Log(events.ItemStarted, map[string]string{ "folder": f.folderID, "item": file.Name, "type": "file", "action": "update", }) s := sharedPullerState{ file: file, folder: f.folderID, tempName: tempName, realName: realName, copyTotal: len(blocks), copyNeeded: len(blocks), reused: len(reused), updated: time.Now(), available: reused, availableUpdated: time.Now(), ignorePerms: f.ignorePermissions(file), version: curFile.Version, mut: sync.NewRWMutex(), sparse: f.allowSparse, created: time.Now(), } l.Debugf("%v need file %s; copy %d, reused %v", f, file.Name, len(blocks), reused) cs := copyBlocksState{ sharedPullerState: &s, blocks: blocks, } copyChan <- cs }
// handleFile queues the copies and pulls as necessary for a single new or // changed file. func (p *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) { curFile, ok := p.model.CurrentFolderFile(p.folder, file.Name) if ok && len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) { // We are supposed to copy the entire file, and then fetch nothing. We // are only updating metadata, so we don't actually *need* to make the // copy. if debug { l.Debugln(p, "taking shortcut on", file.Name) } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "metadata", }) p.queue.Done(file.Name) var err error if file.IsSymlink() { err = p.shortcutSymlink(file) } else { err = p.shortcutFile(file) } events.Default.Log(events.ItemFinished, map[string]interface{}{ "folder": p.folder, "item": file.Name, "error": events.Error(err), "type": "file", "action": "metadata", }) if err != nil { l.Infoln("Puller: shortcut:", err) p.newError(file.Name, err) } else { p.dbUpdates <- dbUpdateJob{file, dbUpdateShortcutFile} } return } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "update", }) scanner.PopulateOffsets(file.Blocks) // Figure out the absolute filenames we need once and for all tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name)) realName := filepath.Join(p.dir, file.Name) reused := 0 var blocks []protocol.BlockInfo // Check for an old temporary file which might have some blocks we could // reuse. tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]struct{}, len(tempCopyBlocks)) for _, block := range tempCopyBlocks { existingBlocks[block.String()] = struct{}{} } // Since the blocks are already there, we don't need to get them. for _, block := range file.Blocks { _, ok := existingBlocks[block.String()] if !ok { blocks = append(blocks, block) } } // The sharedpullerstate will know which flags to use when opening the // temp file depending if we are reusing any blocks or not. reused = len(file.Blocks) - len(blocks) if reused == 0 { // Otherwise, discard the file ourselves in order for the // sharedpuller not to panic when it fails to exclusively create a // file which already exists os.Remove(tempName) } } else { blocks = file.Blocks } s := sharedPullerState{ file: file, folder: p.folder, tempName: tempName, realName: realName, copyTotal: len(blocks), copyNeeded: len(blocks), reused: reused, ignorePerms: p.ignorePermissions(file), version: curFile.Version, mut: sync.NewMutex(), } if debug { l.Debugf("%v need file %s; copy %d, reused %v", p, file.Name, len(blocks), reused) } cs := copyBlocksState{ sharedPullerState: &s, blocks: blocks, } copyChan <- cs }
// pullerIteration runs a single puller iteration for the given folder and // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. func (f *rwFolder) pullerIteration(ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) updateWg := sync.NewWaitGroup() copyWg := sync.NewWaitGroup() pullWg := sync.NewWaitGroup() doneWg := sync.NewWaitGroup() l.Debugln(f, "c", f.copiers, "p", f.pullers) f.dbUpdates = make(chan dbUpdateJob) updateWg.Add(1) go func() { // dbUpdaterRoutine finishes when f.dbUpdates is closed f.dbUpdaterRoutine() updateWg.Done() }() for i := 0; i < f.copiers; i++ { copyWg.Add(1) go func() { // copierRoutine finishes when copyChan is closed f.copierRoutine(copyChan, pullChan, finisherChan) copyWg.Done() }() } for i := 0; i < f.pullers; i++ { pullWg.Add(1) go func() { // pullerRoutine finishes when pullChan is closed f.pullerRoutine(pullChan, finisherChan) pullWg.Done() }() } doneWg.Add(1) // finisherRoutine finishes when finisherChan is closed go func() { f.finisherRoutine(finisherChan) doneWg.Done() }() f.model.fmut.RLock() folderFiles := f.model.folderFiles[f.folderID] f.model.fmut.RUnlock() // !!! // WithNeed takes a database snapshot (by necessity). By the time we've // handled a bunch of files it might have become out of date and we might // be attempting to sync with an old version of a file... // !!! changed := 0 fileDeletions := map[string]protocol.FileInfo{} dirDeletions := []protocol.FileInfo{} buckets := map[string][]protocol.FileInfo{} handleItem := func(fi protocol.FileInfo) bool { switch { case fi.IsDeleted(): // A deleted file, directory or symlink if fi.IsDirectory() { dirDeletions = append(dirDeletions, fi) } else { fileDeletions[fi.Name] = fi df, ok := f.model.CurrentFolderFile(f.folderID, fi.Name) // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong // type if we haven't yet managed to pull it. if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() { // Put files into buckets per first hash key := string(df.Blocks[0].Hash) buckets[key] = append(buckets[key], df) } } changed++ case fi.IsDirectory() && !fi.IsSymlink(): l.Debugln("Handling directory", fi.Name) f.handleDir(fi) changed++ case fi.IsSymlink(): l.Debugln("Handling symlink", fi.Name) f.handleSymlink(fi) changed++ default: return false } return true } folderFiles.WithNeed(protocol.LocalDeviceID, func(intf db.FileIntf) bool { // Needed items are delivered sorted lexicographically. We'll handle // directories as they come along, so parents before children. Files // are queued and the order may be changed later. if shouldIgnore(intf, ignores, f.ignoreDelete) { return true } if err := fileValid(intf); err != nil { // The file isn't valid so we can't process it. Pretend that we // tried and set the error for the file. f.newError(intf.FileName(), err) changed++ return true } file := intf.(protocol.FileInfo) l.Debugln(f, "handling", file.Name) if !handleItem(file) { // A new or changed file or symlink. This is the only case where // we do stuff concurrently in the background. We only queue // files where we are connected to at least one device that has // the file. devices := folderFiles.Availability(file.Name) for _, dev := range devices { if f.model.ConnectedTo(dev) { f.queue.Push(file.Name, file.Size, file.ModTime()) changed++ break } } } return true }) // Reorder the file queue according to configuration switch f.order { case config.OrderRandom: f.queue.Shuffle() case config.OrderAlphabetic: // The queue is already in alphabetic order. case config.OrderSmallestFirst: f.queue.SortSmallestFirst() case config.OrderLargestFirst: f.queue.SortLargestFirst() case config.OrderOldestFirst: f.queue.SortOldestFirst() case config.OrderNewestFirst: f.queue.SortNewestFirst() } // Process the file queue nextFile: for { select { case <-f.stop: // Stop processing files if the puller has been told to stop. break default: } fileName, ok := f.queue.Pop() if !ok { break } fi, ok := f.model.CurrentGlobalFile(f.folderID, fileName) if !ok { // File is no longer in the index. Mark it as done and drop it. f.queue.Done(fileName) continue } // Handles races where an index update arrives changing what the file // is between queueing and retrieving it from the queue, effectively // changing how the file should be handled. if handleItem(fi) { continue } // Check our list of files to be removed for a match, in which case // we can just do a rename instead. key := string(fi.Blocks[0].Hash) for i, candidate := range buckets[key] { if scanner.BlocksEqual(candidate.Blocks, fi.Blocks) { // Remove the candidate from the bucket lidx := len(buckets[key]) - 1 buckets[key][i] = buckets[key][lidx] buckets[key] = buckets[key][:lidx] // candidate is our current state of the file, where as the // desired state with the delete bit set is in the deletion // map. desired := fileDeletions[candidate.Name] // Remove the pending deletion (as we perform it by renaming) delete(fileDeletions, candidate.Name) f.renameFile(desired, fi) f.queue.Done(fileName) continue nextFile } } // Handle the file normally, by coping and pulling, etc. f.handleFile(fi, copyChan, finisherChan) } // Signal copy and puller routines that we are done with the in data for // this iteration. Wait for them to finish. close(copyChan) copyWg.Wait() close(pullChan) pullWg.Wait() // Signal the finisher chan that there will be no more input. close(finisherChan) // Wait for the finisherChan to finish. doneWg.Wait() for _, file := range fileDeletions { l.Debugln("Deleting file", file.Name) f.deleteFile(file) } for i := range dirDeletions { dir := dirDeletions[len(dirDeletions)-i-1] l.Debugln("Deleting dir", dir.Name) f.deleteDir(dir, ignores) } // Wait for db updates to complete close(f.dbUpdates) updateWg.Wait() return changed }
// pullerIteration runs a single puller iteration for the given folder and // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. func (p *rwFolder) pullerIteration(ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) updateWg := sync.NewWaitGroup() copyWg := sync.NewWaitGroup() pullWg := sync.NewWaitGroup() doneWg := sync.NewWaitGroup() if debug { l.Debugln(p, "c", p.copiers, "p", p.pullers) } p.dbUpdates = make(chan dbUpdateJob) updateWg.Add(1) go func() { // dbUpdaterRoutine finishes when p.dbUpdates is closed p.dbUpdaterRoutine() updateWg.Done() }() for i := 0; i < p.copiers; i++ { copyWg.Add(1) go func() { // copierRoutine finishes when copyChan is closed p.copierRoutine(copyChan, pullChan, finisherChan) copyWg.Done() }() } for i := 0; i < p.pullers; i++ { pullWg.Add(1) go func() { // pullerRoutine finishes when pullChan is closed p.pullerRoutine(pullChan, finisherChan) pullWg.Done() }() } doneWg.Add(1) // finisherRoutine finishes when finisherChan is closed go func() { p.finisherRoutine(finisherChan) doneWg.Done() }() p.model.fmut.RLock() folderFiles := p.model.folderFiles[p.folder] p.model.fmut.RUnlock() // !!! // WithNeed takes a database snapshot (by necessity). By the time we've // handled a bunch of files it might have become out of date and we might // be attempting to sync with an old version of a file... // !!! changed := 0 pullFileSize := int64(0) fileDeletions := map[string]protocol.FileInfo{} dirDeletions := []protocol.FileInfo{} buckets := map[string][]protocol.FileInfo{} folderFiles.WithNeed(protocol.LocalDeviceID, func(intf db.FileIntf) bool { // Needed items are delivered sorted lexicographically. We'll handle // directories as they come along, so parents before children. Files // are queued and the order may be changed later. file := intf.(protocol.FileInfo) if ignores.Match(file.Name) { // This is an ignored file. Skip it, continue iteration. return true } if debug { l.Debugln(p, "handling", file.Name) } switch { case file.IsDeleted(): // A deleted file, directory or symlink if file.IsDirectory() { dirDeletions = append(dirDeletions, file) } else { fileDeletions[file.Name] = file df, ok := p.model.CurrentFolderFile(p.folder, file.Name) // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong // type if we haven't yet managed to pull it. if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() { // Put files into buckets per first hash key := string(df.Blocks[0].Hash) buckets[key] = append(buckets[key], df) } } case file.IsDirectory() && !file.IsSymlink(): // A new or changed directory if debug { l.Debugln("Creating directory", file.Name) } p.handleDir(file) default: // A new or changed file or symlink. This is the only case where we // do stuff concurrently in the background pullFileSize += file.Size() p.queue.Push(file.Name, file.Size(), file.Modified) } changed++ return true }) // Check if we are able to store all files on disk if pullFileSize > 0 { folder, ok := p.model.cfg.Folders()[p.folder] if ok { if free, err := osutil.DiskFreeBytes(folder.Path()); err == nil && free < pullFileSize { l.Infof("Puller (folder %q): insufficient disk space available to pull %d files (%.2fMB)", p.folder, changed, float64(pullFileSize)/1024/1024) return 0 } } } // Reorder the file queue according to configuration switch p.order { case config.OrderRandom: p.queue.Shuffle() case config.OrderAlphabetic: // The queue is already in alphabetic order. case config.OrderSmallestFirst: p.queue.SortSmallestFirst() case config.OrderLargestFirst: p.queue.SortLargestFirst() case config.OrderOldestFirst: p.queue.SortOldestFirst() case config.OrderNewestFirst: p.queue.SortOldestFirst() } // Process the file queue nextFile: for { fileName, ok := p.queue.Pop() if !ok { break } f, ok := p.model.CurrentGlobalFile(p.folder, fileName) if !ok { // File is no longer in the index. Mark it as done and drop it. p.queue.Done(fileName) continue } // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong type if // the global index changed while we were processing this iteration. if !f.IsDeleted() && !f.IsSymlink() && !f.IsDirectory() { key := string(f.Blocks[0].Hash) for i, candidate := range buckets[key] { if scanner.BlocksEqual(candidate.Blocks, f.Blocks) { // Remove the candidate from the bucket lidx := len(buckets[key]) - 1 buckets[key][i] = buckets[key][lidx] buckets[key] = buckets[key][:lidx] // candidate is our current state of the file, where as the // desired state with the delete bit set is in the deletion // map. desired := fileDeletions[candidate.Name] // Remove the pending deletion (as we perform it by renaming) delete(fileDeletions, candidate.Name) p.renameFile(desired, f) p.queue.Done(fileName) continue nextFile } } } // Not a rename or a symlink, deal with it. p.handleFile(f, copyChan, finisherChan) } // Signal copy and puller routines that we are done with the in data for // this iteration. Wait for them to finish. close(copyChan) copyWg.Wait() close(pullChan) pullWg.Wait() // Signal the finisher chan that there will be no more input. close(finisherChan) // Wait for the finisherChan to finish. doneWg.Wait() for _, file := range fileDeletions { if debug { l.Debugln("Deleting file", file.Name) } p.deleteFile(file) } for i := range dirDeletions { dir := dirDeletions[len(dirDeletions)-i-1] if debug { l.Debugln("Deleting dir", dir.Name) } p.deleteDir(dir) } // Wait for db updates to complete close(p.dbUpdates) updateWg.Wait() return changed }