func scanBlobs(dbDir, blobDir string, restart bool) { fsck, err := db.New(dbDir) if err != nil { log.Fatal(err) } last := fsck.Last() if last != "" { if restart { fmt.Println("overwriting blob scan resume marker at", last) last = "" } else { fmt.Println("resuming blob scan at", last) } } blobCh := streamBlobs(blobDir, last) stats := fs.NewStats() defer stats.LogEvery(10 * time.Second).Stop() defer log.Print(stats) for b := range blobCh { if !b.ValidContents() { stats.Add("corrupt") continue } ref := b.Ref() body := b.Open() s, ok := parseSchema(ref, body) body.Close() if !ok { stats.Add("data") if err := fsck.Place(ref.String(), b.Token, "", nil); err != nil { log.Fatal(err) } continue } needs := indexSchemaBlob(fsck, s) t := s.Type() stats.Add(t) if err := fsck.Place(ref.String(), b.Token, t, needs); err != nil { log.Fatal(err) } } }
func main() { dbDir := flag.String("db_dir", "", "FSCK state database directory") blobDir := flag.String("blob_dir", "", "Camlistore blob directory") mimeType := flag.String("mime_type", "image/jpeg", "MIME type of files to scan") print := flag.Bool("print", false, "Print ref and camera model") workers := fsck.Parallel{Workers: 32} flag.Var(workers, "workers", "parallel worker goroutines") flag.Parse() fdb, err := db.New(*dbDir) if err != nil { log.Fatal(err) } bs, err := dir.New(*blobDir) if err != nil { log.Fatal(err) } stats := fsck.NewStats() defer stats.LogTopNEvery(10, 10*time.Second).Stop() defer log.Print(stats) files := fsck.NewFiles(bs) go func() { files.ReadRefs(fdb.ListMIME(*mimeType)) files.Close() }() go files.LogErrors() workers.Go(func() { for r := range files.Readers { ex, err := exif.Decode(r) if err != nil { stats.Add("error") continue } tag, err := ex.Get(exif.Model) if err != nil { stats.Add("missing") continue } stats.Add(tag.String()) if *print { id := "unknown" if tag, err := ex.Get(exif.ImageUniqueID); err == nil { id = tag.String() stats.Add("unique-id-exif") } else if thumb, err := ex.JpegThumbnail(); err == nil { hash := sha1.Sum(thumb) id = hex.EncodeToString(hash[:20]) stats.Add("unique-id-thumb") } else if r.PartsSize() < 1e7 { if _, err := r.Seek(0, 0); err == nil { hash := sha1.New() io.Copy(hash, r) id = hex.EncodeToString(hash.Sum(nil)) stats.Add("unique-id-sha1") } else { id = "read-error" stats.Add("unique-id-sha1-error") } } else { stats.Add("unique-id-too-big") } fmt.Printf("%s %s %q %q\n", r.BlobRef(), id, r.FileName(), tag) } } }) workers.Wait() }