Ejemplo n.º 1
0
func missingBlobs(dbDir, blobDir string) error {
	fsck, err := db.NewRO(dbDir)
	if err != nil {
		return err
	}
	defer fsck.Close()
	bs, err := dir.New(blobDir)
	if err != nil {
		return err
	}
	// defer s.Close() - where is this??
	missing := 0
	// TODO(dichro): cache Parents() call results?
	for ref := range fsck.Missing() {
		if body, size, err := bs.Fetch(blob.MustParse(ref)); err == nil {
			log.Printf("missing ref %q found with size %d", ref, size)
			body.Close()
			continue
		}
		fmt.Println(ref)
		missing++
		nodes, err := fsck.Parents(ref)
		if err != nil {
			log.Print(err)
			continue
		}
		printHierarchy(fsck, bs, 1, "", nodes)
	}
	fmt.Println("total", missing)
	return nil
}
Ejemplo n.º 2
0
func mimeScanBlobs(dbDir, blobDir string, workers int) error {
	fsck, err := db.NewRO(dbDir)
	if err != nil {
		return err
	}
	bs, err := dir.New(blobDir)
	if err != nil {
		return err
	}

	stats := fs.NewStats()
	defer stats.LogEvery(10 * time.Second).Stop()
	defer log.Print(stats)
	go func() {
		for _ = range time.Tick(10 * time.Second) {
			fmt.Println(time.Now(), stats)
		}
	}()

	blobCh := fsck.List("file")
	var wg sync.WaitGroup
	for i := 0; i < workers; i++ {
		wg.Add(1)
		go func() {
			defer wg.Done()
			for ref := range blobCh {
				s, err := schemaFromBlobRef(bs, ref)
				if err != nil {
					log.Printf("%s: previously indexed; now missing", ref)
					stats.Add("badschema")
					continue
				}
				file, err := s.NewFileReader(bs)
				if err != nil {
					log.Printf("%s: unreadable: %s", ref, err)
					stats.Add("unreadable")
					continue
				}
				mime, _ := magic.MIMETypeFromReader(file)
				file.Close()
				if mime != "" {
					if pos := strings.Index(mime, "; charset="); pos >= 0 {
						mime = mime[:pos]
					}
					if err := fsck.PlaceMIME(ref, mime); err != nil {
						log.Printf("%s: PlaceMIME(): %s", ref, mime)
						mime = "error"
					}
				} else {
					mime = "unknown"
				}
				stats.Add(mime)
			}
		}()
	}
	wg.Wait()
	return nil
}
Ejemplo n.º 3
0
func filePath(dbDir, blobDir string, refs []string) error {
	fsck, err := db.NewRO(dbDir)
	if err != nil {
		return err
	}
	defer fsck.Close()
	bs, err := dir.New(blobDir)
	if err != nil {
		return err
	}
	for _, r := range refs {
		ch := make(chan []string, 10)
		go func() {
			fsck.StreamAllParentPaths(r, ch)
			close(ch)
		}()
		// TODO(dichro): print something if there's no paths
	PATH:
		for path := range ch {
			pretty := make([]string, 0, len(path))
			foundFile := false
			for i := range path {
				p := path[len(path)-i-1]
				s, err := schemaFromBlobRef(bs, p)
				if err != nil {
					return err
				}
				str := fmt.Sprintf("(%s:%s)->", s.Type(), p)
				switch s.Type() {
				case "directory":
					str = s.FileName() + "/"
				case "file":
					if foundFile {
						// we already found a "file" that contains the
						// target blob. If we're seeing another "file" on
						// the way up, then that "file" must actually
						// contain a schema blob that ultimately references
						// our target blob, which is not what we're looking
						// for.
						continue PATH
					}
					foundFile = true
					str = s.FileName()
				case "static-set":
					continue
				case "bytes":
					continue
				}
				pretty = append(pretty, str)
			}
			fmt.Println(r, strings.Join(pretty, ""))
		}
	}
	return nil
}
Ejemplo n.º 4
0
func (f *Flag) Set(path string) error {
	f.Path = path
	s, err := dir.New(path)
	_, ok := s.(blobserver.BlobStreamer)
	switch {
	case err != nil:
		return err
	case !ok:
		return errors.New("not a diskpacked repository")
	default:
		f.BS = s
	}
	return nil
}
Ejemplo n.º 5
0
func newUploader() *Uploader {
	var cc *client.Client
	var httpStats *httputil.StatsTransport
	if d := *flagBlobDir; d != "" {
		ss, err := dir.New(d)
		if err != nil && d == "discard" {
			ss = discardStorage{}
			err = nil
		}
		if err != nil {
			log.Fatalf("Error using dir %s as storage: %v", d, err)
		}
		cc = client.NewStorageClient(ss)
	} else {
		cc = client.NewOrFail()
		proxy := http.ProxyFromEnvironment
		if flagProxyLocal {
			proxy = proxyFromEnvironment
		}
		tr := cc.TransportForConfig(
			&client.TransportConfig{
				Proxy:   proxy,
				Verbose: *flagHTTP,
			})
		httpStats, _ = tr.(*httputil.StatsTransport)
		cc.SetHTTPClient(&http.Client{Transport: tr})
	}
	if *cmdmain.FlagVerbose {
		cc.SetLogger(log.New(cmdmain.Stderr, "", log.LstdFlags))
	} else {
		cc.SetLogger(nil)
	}

	pwd, err := os.Getwd()
	if err != nil {
		log.Fatalf("os.Getwd: %v", err)
	}

	return &Uploader{
		Client:    cc,
		transport: httpStats,
		pwd:       pwd,
		fdGate:    syncutil.NewGate(100), // gate things that waste fds, assuming a low system limit
	}
}
Ejemplo n.º 6
0
func main() {
	flag.Parse()

	if len(*blobDir)*len(*indexDir) == 0 {
		flag.Usage()
		return
	}
	s, err := dir.New(*blobDir)
	if err != nil {
		log.Fatal(err)
	}
	src, ok := s.(blobserver.BlobStreamer)
	if !ok {
		log.Fatalf("%v is not a BlobStreamer", s)
	}

	db, err := leveldb.NewStorage(*indexDir)
	if err != nil {
		log.Fatal(err)
	}
	defer db.Close()
	dst, err := index.New(db)
	if err != nil {
		log.Fatal(err)
	}

	fe := FetcherEnumerator{
		FetcherEnumerator: s,
		c1:                make(map[string]*blob.Blob),
		start:             time.Now(),
	}
	dst.InitBlobSource(&fe)

	ch := make(chan blobserver.BlobAndToken)
	go fe.PrintStats()
	for i := 0; i < *parallel; i++ {
		go fe.Index(ch, dst)
	}
	ctx := context.New()
	if err := src.StreamBlobs(ctx, ch, *streamStart); err != nil {
		log.Fatal(err)
	}
}
Ejemplo n.º 7
0
func streamBlobs(path, resume string) <-chan blobserver.BlobAndToken {
	s, err := dir.New(path)
	if err != nil {
		log.Fatal(err)
	}
	bs, ok := s.(blobserver.BlobStreamer)
	if !ok {
		log.Fatalf("%v is not a BlobStreamer", s)
	}

	ch := make(chan blobserver.BlobAndToken, 10)
	go func() {
		ctx := context.New()
		if err := bs.StreamBlobs(ctx, ch, resume); err != nil {
			log.Fatal(err)
		}
	}()
	return ch
}
Ejemplo n.º 8
0
func main() {
	dbDir := flag.String("db_dir", "", "FSCK state database directory")
	blobDir := flag.String("blob_dir", "", "Camlistore blob directory")
	mimeType := flag.String("mime_type", "image/jpeg", "MIME type of files to scan")
	print := flag.Bool("print", false, "Print ref and camera model")
	workers := fsck.Parallel{Workers: 32}
	flag.Var(workers, "workers", "parallel worker goroutines")
	flag.Parse()

	fdb, err := db.New(*dbDir)
	if err != nil {
		log.Fatal(err)
	}
	bs, err := dir.New(*blobDir)
	if err != nil {
		log.Fatal(err)
	}

	stats := fsck.NewStats()
	defer stats.LogTopNEvery(10, 10*time.Second).Stop()
	defer log.Print(stats)

	files := fsck.NewFiles(bs)
	go func() {
		files.ReadRefs(fdb.ListMIME(*mimeType))
		files.Close()
	}()
	go files.LogErrors()

	workers.Go(func() {
		for r := range files.Readers {
			ex, err := exif.Decode(r)
			if err != nil {
				stats.Add("error")
				continue
			}
			tag, err := ex.Get(exif.Model)
			if err != nil {
				stats.Add("missing")
				continue
			}
			stats.Add(tag.String())
			if *print {
				id := "unknown"
				if tag, err := ex.Get(exif.ImageUniqueID); err == nil {
					id = tag.String()
					stats.Add("unique-id-exif")
				} else if thumb, err := ex.JpegThumbnail(); err == nil {
					hash := sha1.Sum(thumb)
					id = hex.EncodeToString(hash[:20])
					stats.Add("unique-id-thumb")
				} else if r.PartsSize() < 1e7 {
					if _, err := r.Seek(0, 0); err == nil {
						hash := sha1.New()
						io.Copy(hash, r)
						id = hex.EncodeToString(hash.Sum(nil))
						stats.Add("unique-id-sha1")
					} else {
						id = "read-error"
						stats.Add("unique-id-sha1-error")
					}
				} else {
					stats.Add("unique-id-too-big")
				}
				fmt.Printf("%s %s %q %q\n", r.BlobRef(), id, r.FileName(), tag)
			}
		}
	})
	workers.Wait()
}