func missingBlobs(dbDir, blobDir string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } defer fsck.Close() bs, err := dir.New(blobDir) if err != nil { return err } // defer s.Close() - where is this?? missing := 0 // TODO(dichro): cache Parents() call results? for ref := range fsck.Missing() { if body, size, err := bs.Fetch(blob.MustParse(ref)); err == nil { log.Printf("missing ref %q found with size %d", ref, size) body.Close() continue } fmt.Println(ref) missing++ nodes, err := fsck.Parents(ref) if err != nil { log.Print(err) continue } printHierarchy(fsck, bs, 1, "", nodes) } fmt.Println("total", missing) return nil }
func statsBlobs(dbDir string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } s := fsck.Stats() fmt.Println(s) if len(s.CamliTypes) != 0 { fmt.Println("camliTypes:") camliTypes := []string{} for t := range s.CamliTypes { camliTypes = append(camliTypes, t) } sort.Strings(camliTypes) for _, t := range camliTypes { fmt.Printf("\t%q: %d\n", t, s.CamliTypes[t]) } } if len(s.MIMETypes) != 0 { fmt.Println("MIMETypes:") types := []string{} for t := range s.MIMETypes { types = append(types, t) } sort.Strings(types) for _, t := range types { fmt.Printf("\t%q: %d\n", t, s.MIMETypes[t]) } } return nil }
func mimeScanBlobs(dbDir, blobDir string, workers int) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } bs, err := dir.New(blobDir) if err != nil { return err } stats := fs.NewStats() defer stats.LogEvery(10 * time.Second).Stop() defer log.Print(stats) go func() { for _ = range time.Tick(10 * time.Second) { fmt.Println(time.Now(), stats) } }() blobCh := fsck.List("file") var wg sync.WaitGroup for i := 0; i < workers; i++ { wg.Add(1) go func() { defer wg.Done() for ref := range blobCh { s, err := schemaFromBlobRef(bs, ref) if err != nil { log.Printf("%s: previously indexed; now missing", ref) stats.Add("badschema") continue } file, err := s.NewFileReader(bs) if err != nil { log.Printf("%s: unreadable: %s", ref, err) stats.Add("unreadable") continue } mime, _ := magic.MIMETypeFromReader(file) file.Close() if mime != "" { if pos := strings.Index(mime, "; charset="); pos >= 0 { mime = mime[:pos] } if err := fsck.PlaceMIME(ref, mime); err != nil { log.Printf("%s: PlaceMIME(): %s", ref, mime) mime = "error" } } else { mime = "unknown" } stats.Add(mime) } }() } wg.Wait() return nil }
func filePath(dbDir, blobDir string, refs []string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } defer fsck.Close() bs, err := dir.New(blobDir) if err != nil { return err } for _, r := range refs { ch := make(chan []string, 10) go func() { fsck.StreamAllParentPaths(r, ch) close(ch) }() // TODO(dichro): print something if there's no paths PATH: for path := range ch { pretty := make([]string, 0, len(path)) foundFile := false for i := range path { p := path[len(path)-i-1] s, err := schemaFromBlobRef(bs, p) if err != nil { return err } str := fmt.Sprintf("(%s:%s)->", s.Type(), p) switch s.Type() { case "directory": str = s.FileName() + "/" case "file": if foundFile { // we already found a "file" that contains the // target blob. If we're seeing another "file" on // the way up, then that "file" must actually // contain a schema blob that ultimately references // our target blob, which is not what we're looking // for. continue PATH } foundFile = true str = s.FileName() case "static-set": continue case "bytes": continue } pretty = append(pretty, str) } fmt.Println(r, strings.Join(pretty, "")) } } return nil }
func listBlobs(dbDir string, args []string) error { fsck, err := db.NewRO(dbDir) if err != nil { return err } var ch <-chan string switch args[0] { case "camli": ch = fsck.List(args[1]) case "mime": ch = fsck.ListMIME(args[1]) default: return errors.New(`unknown index, use "camli" or "mime"`) } for ref := range ch { fmt.Println(ref) } return nil }