func missingBlobs(dbDir, blobDir string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } defer fsck.Close() bs, err := dir.New(blobDir) if err != nil { return err } // defer s.Close() - where is this?? missing := 0 // TODO(dichro): cache Parents() call results? for ref := range fsck.Missing() { if body, size, err := bs.Fetch(blob.MustParse(ref)); err == nil { log.Printf("missing ref %q found with size %d", ref, size) body.Close() continue } fmt.Println(ref) missing++ nodes, err := fsck.Parents(ref) if err != nil { log.Print(err) continue } printHierarchy(fsck, bs, 1, "", nodes) } fmt.Println("total", missing) return nil }
func mimeScanBlobs(dbDir, blobDir string, workers int) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } bs, err := dir.New(blobDir) if err != nil { return err } stats := fs.NewStats() defer stats.LogEvery(10 * time.Second).Stop() defer log.Print(stats) go func() { for _ = range time.Tick(10 * time.Second) { fmt.Println(time.Now(), stats) } }() blobCh := fsck.List("file") var wg sync.WaitGroup for i := 0; i < workers; i++ { wg.Add(1) go func() { defer wg.Done() for ref := range blobCh { s, err := schemaFromBlobRef(bs, ref) if err != nil { log.Printf("%s: previously indexed; now missing", ref) stats.Add("badschema") continue } file, err := s.NewFileReader(bs) if err != nil { log.Printf("%s: unreadable: %s", ref, err) stats.Add("unreadable") continue } mime, _ := magic.MIMETypeFromReader(file) file.Close() if mime != "" { if pos := strings.Index(mime, "; charset="); pos >= 0 { mime = mime[:pos] } if err := fsck.PlaceMIME(ref, mime); err != nil { log.Printf("%s: PlaceMIME(): %s", ref, mime) mime = "error" } } else { mime = "unknown" } stats.Add(mime) } }() } wg.Wait() return nil }
func filePath(dbDir, blobDir string, refs []string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } defer fsck.Close() bs, err := dir.New(blobDir) if err != nil { return err } for _, r := range refs { ch := make(chan []string, 10) go func() { fsck.StreamAllParentPaths(r, ch) close(ch) }() // TODO(dichro): print something if there's no paths PATH: for path := range ch { pretty := make([]string, 0, len(path)) foundFile := false for i := range path { p := path[len(path)-i-1] s, err := schemaFromBlobRef(bs, p) if err != nil { return err } str := fmt.Sprintf("(%s:%s)->", s.Type(), p) switch s.Type() { case "directory": str = s.FileName() + "/" case "file": if foundFile { // we already found a "file" that contains the // target blob. If we're seeing another "file" on // the way up, then that "file" must actually // contain a schema blob that ultimately references // our target blob, which is not what we're looking // for. continue PATH } foundFile = true str = s.FileName() case "static-set": continue case "bytes": continue } pretty = append(pretty, str) } fmt.Println(r, strings.Join(pretty, "")) } } return nil }
func (f *Flag) Set(path string) error { f.Path = path s, err := dir.New(path) _, ok := s.(blobserver.BlobStreamer) switch { case err != nil: return err case !ok: return errors.New("not a diskpacked repository") default: f.BS = s } return nil }
func newUploader() *Uploader { var cc *client.Client var httpStats *httputil.StatsTransport if d := *flagBlobDir; d != "" { ss, err := dir.New(d) if err != nil && d == "discard" { ss = discardStorage{} err = nil } if err != nil { log.Fatalf("Error using dir %s as storage: %v", d, err) } cc = client.NewStorageClient(ss) } else { cc = client.NewOrFail() proxy := http.ProxyFromEnvironment if flagProxyLocal { proxy = proxyFromEnvironment } tr := cc.TransportForConfig( &client.TransportConfig{ Proxy: proxy, Verbose: *flagHTTP, }) httpStats, _ = tr.(*httputil.StatsTransport) cc.SetHTTPClient(&http.Client{Transport: tr}) } if *cmdmain.FlagVerbose { cc.SetLogger(log.New(cmdmain.Stderr, "", log.LstdFlags)) } else { cc.SetLogger(nil) } pwd, err := os.Getwd() if err != nil { log.Fatalf("os.Getwd: %v", err) } return &Uploader{ Client: cc, transport: httpStats, pwd: pwd, fdGate: syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit } }
func main() { flag.Parse() if len(*blobDir)*len(*indexDir) == 0 { flag.Usage() return } s, err := dir.New(*blobDir) if err != nil { log.Fatal(err) } src, ok := s.(blobserver.BlobStreamer) if !ok { log.Fatalf("%v is not a BlobStreamer", s) } db, err := leveldb.NewStorage(*indexDir) if err != nil { log.Fatal(err) } defer db.Close() dst, err := index.New(db) if err != nil { log.Fatal(err) } fe := FetcherEnumerator{ FetcherEnumerator: s, c1: make(map[string]*blob.Blob), start: time.Now(), } dst.InitBlobSource(&fe) ch := make(chan blobserver.BlobAndToken) go fe.PrintStats() for i := 0; i < *parallel; i++ { go fe.Index(ch, dst) } ctx := context.New() if err := src.StreamBlobs(ctx, ch, *streamStart); err != nil { log.Fatal(err) } }
func streamBlobs(path, resume string) <-chan blobserver.BlobAndToken { s, err := dir.New(path) if err != nil { log.Fatal(err) } bs, ok := s.(blobserver.BlobStreamer) if !ok { log.Fatalf("%v is not a BlobStreamer", s) } ch := make(chan blobserver.BlobAndToken, 10) go func() { ctx := context.New() if err := bs.StreamBlobs(ctx, ch, resume); err != nil { log.Fatal(err) } }() return ch }
func main() { dbDir := flag.String("db_dir", "", "FSCK state database directory") blobDir := flag.String("blob_dir", "", "Camlistore blob directory") mimeType := flag.String("mime_type", "image/jpeg", "MIME type of files to scan") print := flag.Bool("print", false, "Print ref and camera model") workers := fsck.Parallel{Workers: 32} flag.Var(workers, "workers", "parallel worker goroutines") flag.Parse() fdb, err := db.New(*dbDir) if err != nil { log.Fatal(err) } bs, err := dir.New(*blobDir) if err != nil { log.Fatal(err) } stats := fsck.NewStats() defer stats.LogTopNEvery(10, 10*time.Second).Stop() defer log.Print(stats) files := fsck.NewFiles(bs) go func() { files.ReadRefs(fdb.ListMIME(*mimeType)) files.Close() }() go files.LogErrors() workers.Go(func() { for r := range files.Readers { ex, err := exif.Decode(r) if err != nil { stats.Add("error") continue } tag, err := ex.Get(exif.Model) if err != nil { stats.Add("missing") continue } stats.Add(tag.String()) if *print { id := "unknown" if tag, err := ex.Get(exif.ImageUniqueID); err == nil { id = tag.String() stats.Add("unique-id-exif") } else if thumb, err := ex.JpegThumbnail(); err == nil { hash := sha1.Sum(thumb) id = hex.EncodeToString(hash[:20]) stats.Add("unique-id-thumb") } else if r.PartsSize() < 1e7 { if _, err := r.Seek(0, 0); err == nil { hash := sha1.New() io.Copy(hash, r) id = hex.EncodeToString(hash.Sum(nil)) stats.Add("unique-id-sha1") } else { id = "read-error" stats.Add("unique-id-sha1-error") } } else { stats.Add("unique-id-too-big") } fmt.Printf("%s %s %q %q\n", r.BlobRef(), id, r.FileName(), tag) } } }) workers.Wait() }