// handleFile queues the copies and pulls as necessary for a single new or // changed file. func (p *rwFolder) handleFile(file protocol.FileInfo, copyChan chan<- copyBlocksState, finisherChan chan<- *sharedPullerState) { curFile, hasCurFile := p.model.CurrentFolderFile(p.folder, file.Name) if hasCurFile && len(curFile.Blocks) == len(file.Blocks) && scanner.BlocksEqual(curFile.Blocks, file.Blocks) { // We are supposed to copy the entire file, and then fetch nothing. We // are only updating metadata, so we don't actually *need* to make the // copy. l.Debugln(p, "taking shortcut on", file.Name) events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "metadata", }) p.queue.Done(file.Name) var err error if file.IsSymlink() { err = p.shortcutSymlink(file) } else { err = p.shortcutFile(file) } events.Default.Log(events.ItemFinished, map[string]interface{}{ "folder": p.folder, "item": file.Name, "error": events.Error(err), "type": "file", "action": "metadata", }) if err != nil { l.Infoln("Puller: shortcut:", err) p.newError(file.Name, err) } else { p.dbUpdates <- dbUpdateJob{file, dbUpdateShortcutFile} } return } // Figure out the absolute filenames we need once and for all tempName := filepath.Join(p.dir, defTempNamer.TempName(file.Name)) realName := filepath.Join(p.dir, file.Name) if hasCurFile && !curFile.IsDirectory() && !curFile.IsSymlink() { // Check that the file on disk is what we expect it to be according to // the database. If there's a mismatch here, there might be local // changes that we don't know about yet and we should scan before // touching the file. If we can't stat the file we'll just pull it. if info, err := osutil.Lstat(realName); err == nil { mtime := p.virtualMtimeRepo.GetMtime(file.Name, info.ModTime()) if mtime.Unix() != curFile.Modified || info.Size() != curFile.Size() { l.Debugln("file modified but not rescanned; not pulling:", realName) // Scan() is synchronous (i.e. blocks until the scan is // completed and returns an error), but a scan can't happen // while we're in the puller routine. Request the scan in the // background and it'll be handled when the current pulling // sweep is complete. As we do retries, we'll queue the scan // for this file up to ten times, but the last nine of those // scans will be cheap... go p.Scan([]string{file.Name}) return } } } scanner.PopulateOffsets(file.Blocks) reused := 0 var blocks []protocol.BlockInfo var blocksSize int64 // Check for an old temporary file which might have some blocks we could // reuse. tempBlocks, err := scanner.HashFile(tempName, protocol.BlockSize, 0, nil) if err == nil { // Check for any reusable blocks in the temp file tempCopyBlocks, _ := scanner.BlockDiff(tempBlocks, file.Blocks) // block.String() returns a string unique to the block existingBlocks := make(map[string]struct{}, len(tempCopyBlocks)) for _, block := range tempCopyBlocks { existingBlocks[block.String()] = struct{}{} } // Since the blocks are already there, we don't need to get them. for _, block := range file.Blocks { _, ok := existingBlocks[block.String()] if !ok { blocks = append(blocks, block) blocksSize += int64(block.Size) } } // The sharedpullerstate will know which flags to use when opening the // temp file depending if we are reusing any blocks or not. reused = len(file.Blocks) - len(blocks) if reused == 0 { // Otherwise, discard the file ourselves in order for the // sharedpuller not to panic when it fails to exclusively create a // file which already exists osutil.InWritableDir(osutil.Remove, tempName) } } else { blocks = file.Blocks blocksSize = file.Size() } if p.checkFreeSpace { if free, err := osutil.DiskFreeBytes(p.dir); err == nil && free < blocksSize { l.Warnf(`Folder "%s": insufficient disk space in %s for %s: have %.2f MiB, need %.2f MiB`, p.folder, p.dir, file.Name, float64(free)/1024/1024, float64(blocksSize)/1024/1024) p.newError(file.Name, errors.New("insufficient space")) return } } events.Default.Log(events.ItemStarted, map[string]string{ "folder": p.folder, "item": file.Name, "type": "file", "action": "update", }) s := sharedPullerState{ file: file, folder: p.folder, tempName: tempName, realName: realName, copyTotal: len(blocks), copyNeeded: len(blocks), reused: reused, ignorePerms: p.ignorePermissions(file), version: curFile.Version, mut: sync.NewMutex(), sparse: p.allowSparse, } l.Debugf("%v need file %s; copy %d, reused %v", p, file.Name, len(blocks), reused) cs := copyBlocksState{ sharedPullerState: &s, blocks: blocks, } copyChan <- cs }
// pullerIteration runs a single puller iteration for the given folder and // returns the number items that should have been synced (even those that // might have failed). One puller iteration handles all files currently // flagged as needed in the folder. func (p *rwFolder) pullerIteration(ignores *ignore.Matcher) int { pullChan := make(chan pullBlockState) copyChan := make(chan copyBlocksState) finisherChan := make(chan *sharedPullerState) updateWg := sync.NewWaitGroup() copyWg := sync.NewWaitGroup() pullWg := sync.NewWaitGroup() doneWg := sync.NewWaitGroup() l.Debugln(p, "c", p.copiers, "p", p.pullers) p.dbUpdates = make(chan dbUpdateJob) updateWg.Add(1) go func() { // dbUpdaterRoutine finishes when p.dbUpdates is closed p.dbUpdaterRoutine() updateWg.Done() }() for i := 0; i < p.copiers; i++ { copyWg.Add(1) go func() { // copierRoutine finishes when copyChan is closed p.copierRoutine(copyChan, pullChan, finisherChan) copyWg.Done() }() } for i := 0; i < p.pullers; i++ { pullWg.Add(1) go func() { // pullerRoutine finishes when pullChan is closed p.pullerRoutine(pullChan, finisherChan) pullWg.Done() }() } doneWg.Add(1) // finisherRoutine finishes when finisherChan is closed go func() { p.finisherRoutine(finisherChan) doneWg.Done() }() p.model.fmut.RLock() folderFiles := p.model.folderFiles[p.folder] p.model.fmut.RUnlock() // !!! // WithNeed takes a database snapshot (by necessity). By the time we've // handled a bunch of files it might have become out of date and we might // be attempting to sync with an old version of a file... // !!! changed := 0 fileDeletions := map[string]protocol.FileInfo{} dirDeletions := []protocol.FileInfo{} buckets := map[string][]protocol.FileInfo{} handleFile := func(f protocol.FileInfo) bool { switch { case f.IsDeleted(): // A deleted file, directory or symlink if f.IsDirectory() { dirDeletions = append(dirDeletions, f) } else { fileDeletions[f.Name] = f df, ok := p.model.CurrentFolderFile(p.folder, f.Name) // Local file can be already deleted, but with a lower version // number, hence the deletion coming in again as part of // WithNeed, furthermore, the file can simply be of the wrong // type if we haven't yet managed to pull it. if ok && !df.IsDeleted() && !df.IsSymlink() && !df.IsDirectory() { // Put files into buckets per first hash key := string(df.Blocks[0].Hash) buckets[key] = append(buckets[key], df) } } case f.IsDirectory() && !f.IsSymlink(): // A new or changed directory l.Debugln("Creating directory", f.Name) p.handleDir(f) default: return false } return true } folderFiles.WithNeed(protocol.LocalDeviceID, func(intf db.FileIntf) bool { // Needed items are delivered sorted lexicographically. We'll handle // directories as they come along, so parents before children. Files // are queued and the order may be changed later. file := intf.(protocol.FileInfo) if ignores.Match(file.Name) { // This is an ignored file. Skip it, continue iteration. return true } l.Debugln(p, "handling", file.Name) if !handleFile(file) { // A new or changed file or symlink. This is the only case where we // do stuff concurrently in the background p.queue.Push(file.Name, file.Size(), file.Modified) } changed++ return true }) // Reorder the file queue according to configuration switch p.order { case config.OrderRandom: p.queue.Shuffle() case config.OrderAlphabetic: // The queue is already in alphabetic order. case config.OrderSmallestFirst: p.queue.SortSmallestFirst() case config.OrderLargestFirst: p.queue.SortLargestFirst() case config.OrderOldestFirst: p.queue.SortOldestFirst() case config.OrderNewestFirst: p.queue.SortNewestFirst() } // Process the file queue nextFile: for { select { case <-p.stop: // Stop processing files if the puller has been told to stop. break default: } fileName, ok := p.queue.Pop() if !ok { break } f, ok := p.model.CurrentGlobalFile(p.folder, fileName) if !ok { // File is no longer in the index. Mark it as done and drop it. p.queue.Done(fileName) continue } // Handles races where an index update arrives changing what the file // is between queueing and retrieving it from the queue, effectively // changing how the file should be handled. if handleFile(f) { continue } if !f.IsSymlink() { key := string(f.Blocks[0].Hash) for i, candidate := range buckets[key] { if scanner.BlocksEqual(candidate.Blocks, f.Blocks) { // Remove the candidate from the bucket lidx := len(buckets[key]) - 1 buckets[key][i] = buckets[key][lidx] buckets[key] = buckets[key][:lidx] // candidate is our current state of the file, where as the // desired state with the delete bit set is in the deletion // map. desired := fileDeletions[candidate.Name] // Remove the pending deletion (as we perform it by renaming) delete(fileDeletions, candidate.Name) p.renameFile(desired, f) p.queue.Done(fileName) continue nextFile } } } // Not a rename or a symlink, deal with it. p.handleFile(f, copyChan, finisherChan) } // Signal copy and puller routines that we are done with the in data for // this iteration. Wait for them to finish. close(copyChan) copyWg.Wait() close(pullChan) pullWg.Wait() // Signal the finisher chan that there will be no more input. close(finisherChan) // Wait for the finisherChan to finish. doneWg.Wait() for _, file := range fileDeletions { l.Debugln("Deleting file", file.Name) p.deleteFile(file) } for i := range dirDeletions { dir := dirDeletions[len(dirDeletions)-i-1] l.Debugln("Deleting dir", dir.Name) p.deleteDir(dir) } // Wait for db updates to complete close(p.dbUpdates) updateWg.Wait() return changed }