Exemplo n.º 1
0
// Retrieves the sets of duplicate files within the database.
func DuplicateFiles(tx *Tx) ([]entities.Files, error) {
	sql := `
SELECT id, directory, name, fingerprint, mod_time, size, is_dir
FROM file
WHERE fingerprint IN (SELECT fingerprint
                      FROM file
                      WHERE fingerprint != ''
                      GROUP BY fingerprint
                      HAVING count(1) > 1
)
ORDER BY fingerprint, directory || '/' || name`

	rows, err := tx.Query(sql)
	if err != nil {
		return nil, err
	}
	defer rows.Close()

	fileSets := make([]entities.Files, 0, 10)
	var fileSet entities.Files
	var previousFingerprint fingerprint.Fingerprint

	for rows.Next() {
		if rows.Err() != nil {
			return nil, err
		}

		var fileId entities.FileId
		var directory, name, fp string
		var modTime time.Time
		var size int64
		var isDir bool
		err = rows.Scan(&fileId, &directory, &name, &fp, &modTime, &size, &isDir)
		if err != nil {
			return nil, err
		}

		fingerprint := fingerprint.Fingerprint(fp)

		if fingerprint != previousFingerprint {
			if fileSet != nil {
				fileSets = append(fileSets, fileSet)
			}
			fileSet = make(entities.Files, 0, 10)
			previousFingerprint = fingerprint
		}

		fileSet = append(fileSet, &entities.File{fileId, directory, name, fingerprint, modTime, size, isDir})
	}

	// ensure last file set is added
	if len(fileSet) > 0 {
		fileSets = append(fileSets, fileSet)
	}

	return fileSets, nil
}
Exemplo n.º 2
0
func readFile(rows *sql.Rows) (*entities.File, error) {
	if !rows.Next() {
		return nil, nil
	}
	if rows.Err() != nil {
		return nil, rows.Err()
	}

	var fileId entities.FileId
	var directory, name, fp string
	var modTime time.Time
	var size int64
	var isDir bool
	err := rows.Scan(&fileId, &directory, &name, &fp, &modTime, &size, &isDir)
	if err != nil {
		return nil, err
	}

	return &entities.File{fileId, directory, name, fingerprint.Fingerprint(fp), modTime, size, isDir}, nil
}
Exemplo n.º 3
0
func findDuplicatesOf(store *storage.Storage, tx *storage.Tx, paths []string, recursive bool) (error, warnings) {
	settings, err := store.Settings(tx)
	if err != nil {
		return err, nil
	}

	warnings := make(warnings, 0, 10)
	for _, path := range paths {
		_, err := os.Stat(path)
		if err != nil {
			switch {
			case os.IsNotExist(err):
				warnings = append(warnings, fmt.Sprintf("%v: no such file", path))
				continue
			case os.IsPermission(err):
				warnings = append(warnings, fmt.Sprintf("%v: permission denied", path))
				continue
			default:
				return err, warnings
			}
		}
	}

	if recursive {
		p, err := filesystem.Enumerate(paths...)
		if err != nil {
			return fmt.Errorf("could not enumerate paths: %v", err), warnings
		}

		paths = make([]string, len(p))
		for index, path := range p {
			paths[index] = path.Path
		}
	}

	first := true
	for _, path := range paths {
		log.Infof(2, "%v: identifying duplicate files.", path)

		fp, err := fingerprint.Create(path, settings.FileFingerprintAlgorithm(), settings.DirectoryFingerprintAlgorithm(), settings.SymlinkFingerprintAlgorithm())
		if err != nil {
			return fmt.Errorf("%v: could not create fingerprint: %v", path, err), warnings
		}

		if fp == fingerprint.Fingerprint("") {
			continue
		}

		files, err := store.FilesByFingerprint(tx, fp)
		if err != nil {
			return fmt.Errorf("%v: could not retrieve files matching fingerprint '%v': %v", path, fp, err), warnings
		}

		absPath, err := filepath.Abs(path)
		if err != nil {
			return fmt.Errorf("%v: could not determine absolute path: %v", path, err), warnings
		}

		// filter out the file we're searching on
		dupes := files.Where(func(file *entities.File) bool { return file.Path() != absPath })

		if len(paths) > 1 && len(dupes) > 0 {
			if first {
				first = false
			} else {
				fmt.Println()
			}

			fmt.Printf("%v:\n", path)

			for _, dupe := range dupes {
				relPath := _path.Rel(dupe.Path())
				fmt.Printf("  %v\n", relPath)
			}
		} else {
			for _, dupe := range dupes {
				relPath := _path.Rel(dupe.Path())
				fmt.Println(relPath)
			}
		}
	}

	return nil, warnings
}