// Retrieves the sets of duplicate files within the database. func DuplicateFiles(tx *Tx) ([]entities.Files, error) { sql := ` SELECT id, directory, name, fingerprint, mod_time, size, is_dir FROM file WHERE fingerprint IN (SELECT fingerprint FROM file WHERE fingerprint != '' GROUP BY fingerprint HAVING count(1) > 1 ) ORDER BY fingerprint, directory || '/' || name` rows, err := tx.Query(sql) if err != nil { return nil, err } defer rows.Close() fileSets := make([]entities.Files, 0, 10) var fileSet entities.Files var previousFingerprint fingerprint.Fingerprint for rows.Next() { if rows.Err() != nil { return nil, err } var fileId entities.FileId var directory, name, fp string var modTime time.Time var size int64 var isDir bool err = rows.Scan(&fileId, &directory, &name, &fp, &modTime, &size, &isDir) if err != nil { return nil, err } fingerprint := fingerprint.Fingerprint(fp) if fingerprint != previousFingerprint { if fileSet != nil { fileSets = append(fileSets, fileSet) } fileSet = make(entities.Files, 0, 10) previousFingerprint = fingerprint } fileSet = append(fileSet, &entities.File{fileId, directory, name, fingerprint, modTime, size, isDir}) } // ensure last file set is added if len(fileSet) > 0 { fileSets = append(fileSets, fileSet) } return fileSets, nil }
func readFile(rows *sql.Rows) (*entities.File, error) { if !rows.Next() { return nil, nil } if rows.Err() != nil { return nil, rows.Err() } var fileId entities.FileId var directory, name, fp string var modTime time.Time var size int64 var isDir bool err := rows.Scan(&fileId, &directory, &name, &fp, &modTime, &size, &isDir) if err != nil { return nil, err } return &entities.File{fileId, directory, name, fingerprint.Fingerprint(fp), modTime, size, isDir}, nil }
func findDuplicatesOf(store *storage.Storage, tx *storage.Tx, paths []string, recursive bool) (error, warnings) { settings, err := store.Settings(tx) if err != nil { return err, nil } warnings := make(warnings, 0, 10) for _, path := range paths { _, err := os.Stat(path) if err != nil { switch { case os.IsNotExist(err): warnings = append(warnings, fmt.Sprintf("%v: no such file", path)) continue case os.IsPermission(err): warnings = append(warnings, fmt.Sprintf("%v: permission denied", path)) continue default: return err, warnings } } } if recursive { p, err := filesystem.Enumerate(paths...) if err != nil { return fmt.Errorf("could not enumerate paths: %v", err), warnings } paths = make([]string, len(p)) for index, path := range p { paths[index] = path.Path } } first := true for _, path := range paths { log.Infof(2, "%v: identifying duplicate files.", path) fp, err := fingerprint.Create(path, settings.FileFingerprintAlgorithm(), settings.DirectoryFingerprintAlgorithm(), settings.SymlinkFingerprintAlgorithm()) if err != nil { return fmt.Errorf("%v: could not create fingerprint: %v", path, err), warnings } if fp == fingerprint.Fingerprint("") { continue } files, err := store.FilesByFingerprint(tx, fp) if err != nil { return fmt.Errorf("%v: could not retrieve files matching fingerprint '%v': %v", path, fp, err), warnings } absPath, err := filepath.Abs(path) if err != nil { return fmt.Errorf("%v: could not determine absolute path: %v", path, err), warnings } // filter out the file we're searching on dupes := files.Where(func(file *entities.File) bool { return file.Path() != absPath }) if len(paths) > 1 && len(dupes) > 0 { if first { first = false } else { fmt.Println() } fmt.Printf("%v:\n", path) for _, dupe := range dupes { relPath := _path.Rel(dupe.Path()) fmt.Printf(" %v\n", relPath) } } else { for _, dupe := range dupes { relPath := _path.Rel(dupe.Path()) fmt.Println(relPath) } } } return nil, warnings }