Пример #1
0
func mimeScanBlobs(dbDir, blobDir string, workers int) error {
	fsck, err := db.NewRO(dbDir)
	if err != nil {
		return err
	}
	bs, err := dir.New(blobDir)
	if err != nil {
		return err
	}

	stats := fs.NewStats()
	defer stats.LogEvery(10 * time.Second).Stop()
	defer log.Print(stats)
	go func() {
		for _ = range time.Tick(10 * time.Second) {
			fmt.Println(time.Now(), stats)
		}
	}()

	blobCh := fsck.List("file")
	var wg sync.WaitGroup
	for i := 0; i < workers; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for ref := range blobCh {
				s, err := schemaFromBlobRef(bs, ref)
				if err != nil {
					log.Printf("%s: previously indexed; now missing", ref)
					stats.Add("badschema")
					continue
				}
				file, err := s.NewFileReader(bs)
				if err != nil {
					log.Printf("%s: unreadable: %s", ref, err)
					stats.Add("unreadable")
					continue
				}
				mime, _ := magic.MIMETypeFromReader(file)
				file.Close()
				if mime != "" {
					if pos := strings.Index(mime, "; charset="); pos >= 0 {
						mime = mime[:pos]
					}
					if err := fsck.PlaceMIME(ref, mime); err != nil {
						log.Printf("%s: PlaceMIME(): %s", ref, mime)
						mime = "error"
					}
				} else {
					mime = "unknown"
				}
				stats.Add(mime)
			}
		}()
	}
	wg.Wait()
	return nil
}
Пример #2
0
func scanBlobs(dbDir, blobDir string, restart bool) {
	fsck, err := db.New(dbDir)
	if err != nil {
		log.Fatal(err)
	}

	last := fsck.Last()
	if last != "" {
		if restart {
			fmt.Println("overwriting blob scan resume marker at", last)
			last = ""
		} else {
			fmt.Println("resuming blob scan at", last)
		}
	}

	blobCh := streamBlobs(blobDir, last)

	stats := fs.NewStats()
	defer stats.LogEvery(10 * time.Second).Stop()
	defer log.Print(stats)
	for b := range blobCh {
		if !b.ValidContents() {
			stats.Add("corrupt")
			continue
		}
		ref := b.Ref()
		body := b.Open()
		s, ok := parseSchema(ref, body)
		body.Close()
		if !ok {
			stats.Add("data")
			if err := fsck.Place(ref.String(), b.Token, "", nil); err != nil {
				log.Fatal(err)
			}
			continue
		}
		needs := indexSchemaBlob(fsck, s)
		t := s.Type()
		stats.Add(t)
		if err := fsck.Place(ref.String(), b.Token, t, needs); err != nil {
			log.Fatal(err)
		}
	}
}
Пример #3
0
func main() {
	dbDir := flag.String("db_dir", "", "FSCK state database directory")
	blobDir := flag.String("blob_dir", "", "Camlistore blob directory")
	mimeType := flag.String("mime_type", "image/jpeg", "MIME type of files to scan")
	print := flag.Bool("print", false, "Print ref and camera model")
	workers := fsck.Parallel{Workers: 32}
	flag.Var(workers, "workers", "parallel worker goroutines")
	flag.Parse()

	fdb, err := db.New(*dbDir)
	if err != nil {
		log.Fatal(err)
	}
	bs, err := dir.New(*blobDir)
	if err != nil {
		log.Fatal(err)
	}

	stats := fsck.NewStats()
	defer stats.LogTopNEvery(10, 10*time.Second).Stop()
	defer log.Print(stats)

	files := fsck.NewFiles(bs)
	go func() {
		files.ReadRefs(fdb.ListMIME(*mimeType))
		files.Close()
	}()
	go files.LogErrors()

	workers.Go(func() {
		for r := range files.Readers {
			ex, err := exif.Decode(r)
			if err != nil {
				stats.Add("error")
				continue
			}
			tag, err := ex.Get(exif.Model)
			if err != nil {
				stats.Add("missing")
				continue
			}
			stats.Add(tag.String())
			if *print {
				id := "unknown"
				if tag, err := ex.Get(exif.ImageUniqueID); err == nil {
					id = tag.String()
					stats.Add("unique-id-exif")
				} else if thumb, err := ex.JpegThumbnail(); err == nil {
					hash := sha1.Sum(thumb)
					id = hex.EncodeToString(hash[:20])
					stats.Add("unique-id-thumb")
				} else if r.PartsSize() < 1e7 {
					if _, err := r.Seek(0, 0); err == nil {
						hash := sha1.New()
						io.Copy(hash, r)
						id = hex.EncodeToString(hash.Sum(nil))
						stats.Add("unique-id-sha1")
					} else {
						id = "read-error"
						stats.Add("unique-id-sha1-error")
					}
				} else {
					stats.Add("unique-id-too-big")
				}
				fmt.Printf("%s %s %q %q\n", r.BlobRef(), id, r.FileName(), tag)
			}
		}
	})
	workers.Wait()
}