func fpridentify(s *siegfried.Siegfried, path string) []byte { fi, err := os.Open(path) defer fi.Close() if err != nil { return reply("error: failed to open " + path + "; got " + err.Error()) } c, err := s.Identify(path, fi) if err != nil { return reply("error: failed to scan " + path + "; got " + err.Error()) } var ids []string var warn string for i := range c { ids = append(ids, i.String()) if !i.Known() { warn = i.(*pronom.Identification).Warning } } switch len(ids) { case 0: return reply("error: scanning " + path + ": no puids returned") case 1: if warn != "" { return reply("error: format unknown; got " + warn) } return reply(ids[0]) default: return reply("error: multiple formats returned; got " + strings.Join(ids, ", ")) } }
func identifyT(s *siegfried.Siegfried, p string) ([]string, error) { ids := make([]string, 0) file, err := os.Open(p) if err != nil { return nil, fmt.Errorf("failed to open %v, got: %v", p, err) } t := time.Now() c, _ := s.Identify(file, p, "") for _, i := range c { ids = append(ids, i.String()) } err = file.Close() if err != nil { return nil, err } if len(ids) > 10 { fmt.Printf("test file %s has %d ids\n", p, len(ids)) } tooLong := time.Millisecond * 500 elapsed := time.Since(t) if elapsed > tooLong { fmt.Printf("[WARNING] time to match %s was %s\n", p, elapsed.String()) } return ids, nil }
func (c *csvWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) { fields := s.Fields() c.names = make([]string, len(fields)) l := 4 if ht >= 0 { l++ } for i, f := range fields { l += len(f) c.names[i] = f[0] } c.recs = make([][]string, 1) c.recs[0] = make([]string, l) c.recs[0][0], c.recs[0][1], c.recs[0][2], c.recs[0][3] = "filename", "filesize", "modified", "errors" idx := 4 if ht >= 0 { c.recs[0][4] = ht.header(false) idx++ } for _, f := range fields { copy(c.recs[0][idx:], f) idx += len(f) } c.w.Write(c.recs[0]) }
func fpridentify(s *siegfried.Siegfried, path string) []byte { fi, err := os.Open(path) defer fi.Close() if err != nil { return reply("error: failed to open " + path + "; got " + err.Error()) } ids, err := s.Identify(fi, path, "") if ids == nil { return reply("error: failed to scan " + path + "; got " + err.Error()) } switch len(ids) { case 0: return reply("error: scanning " + path + ": no formats returned") case 1: if !ids[0].Known() { return reply("error: format unknown; got " + ids[0].Warn()) } return reply(ids[0].String()) default: strs := make([]string, len(ids)) for i, v := range ids { strs[i] = v.String() } return reply("error: multiple formats returned; got " + strings.Join(strs, ", ")) } }
func identify(s *siegfried.Siegfried) func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) { mime, wr, nr := parseRequest(w, r) if r.Method == "POST" { f, h, err := r.FormFile("file") if err != nil { handleErr(w, http.StatusNotFound, err) return } defer f.Close() var sz int64 var mod string osf, ok := f.(*os.File) if ok { info, err := osf.Stat() if err != nil { handleErr(w, http.StatusInternalServerError, err) } sz = info.Size() mod = info.ModTime().String() } else { sz = r.ContentLength } w.Header().Set("Content-Type", mime) wr.writeHead(s) c, err := s.Identify(h.Filename, f) if c == nil { wr.writeFile(h.Filename, sz, mod, nil, fmt.Errorf("failed to identify %s, got: %v", h.Filename, err), nil) return } wr.writeFile(h.Filename, sz, mod, nil, err, idChan(c)) wr.writeTail() return } else { path, err := decodePath(r.URL.Path) if err != nil { handleErr(w, http.StatusNotFound, err) return } info, err := os.Stat(path) if err != nil { handleErr(w, http.StatusNotFound, err) return } w.Header().Set("Content-Type", mime) wr.writeHead(s) if info.IsDir() { multiIdentifyS(wr, s, path, nr) wr.writeTail() return } identifyFile(wr, s, path, info.Size(), info.ModTime().String()) wr.writeTail() return } } }
func makegob(s *siegfried.Siegfried, opts []config.Option) error { p, err := pronom.New(opts...) if err != nil { return err } err = s.Add(p) if err != nil { return err } return s.Save(config.Signature()) }
func multiIdentifyP(w writer, s *siegfried.Siegfried, r string, norecurse bool) error { wg := &sync.WaitGroup{} runtime.GOMAXPROCS(PROCS) resc := make(chan chan res, *multi) go printer(w, resc, wg) wf := func(path string, info os.FileInfo, err error) error { if err != nil { return fmt.Errorf("walking %s; got %v", path, err) } if info.IsDir() { if norecurse && path != r { return filepath.SkipDir } if *droido { wg.Add(1) rchan := make(chan res, 1) resc <- rchan go func() { rchan <- res{path, -1, info.ModTime().String(), nil, nil} // write directory with a -1 size for droid output only }() } return nil } wg.Add(1) rchan := make(chan res, 1) resc <- rchan go func() { f, err := os.Open(path) if err != nil { rchan <- res{path, 0, "", nil, err.(*os.PathError).Err} // return summary error only return } c, err := s.Identify(f, path, "") if c == nil { f.Close() rchan <- res{path, 0, "", nil, err} return } ids := makeIdSlice(idChan(c)) f.Close() rchan <- res{path, info.Size(), info.ModTime().Format(time.RFC3339), ids, err} }() return nil } err := filepath.Walk(r, wf) wg.Wait() close(resc) return err }
func identifyT(s *siegfried.Siegfried, p string) ([]string, error) { ids := make([]string, 0) file, err := os.Open(p) if err != nil { return nil, fmt.Errorf("failed to open %v, got: %v", p, err) } c, err := s.Identify(p, file) if c == nil { return nil, fmt.Errorf("failed to identify %v, got: %v", p, err) } for i := range c { ids = append(ids, i.String()) } err = file.Close() if err != nil { return nil, err } return ids, nil }
func makegob(s *siegfried.Siegfried, opts []config.Option) error { var id core.Identifier var err error if *mi != "" { id, err = mimeinfo.New(opts...) } else if *locfdd || *fdd != "" { id, err = loc.New(opts...) } else { id, err = pronom.New(opts...) } if err != nil { return err } err = s.Add(id) if err != nil { return err } return s.Save(config.Signature()) }
func identifyRdr(w writer, s *siegfried.Siegfried, r io.Reader, sz int64, path, mime, mod string) { lg.set(path) c, err := s.Identify(r, path, mime) lg.err(err) if c == nil { w.writeFile(path, sz, mod, nil, err, nil) lg.reset() return } var b *siegreader.Buffer var cs []byte if checksum != nil { b = s.Buffer() var i int64 l := checksum.BlockSize() for ; ; i += int64(l) { buf, _ := b.Slice(i, l) if buf == nil { break } checksum.Write(buf) } cs = checksum.Sum(nil) checksum.Reset() } a := w.writeFile(path, sz, mod, cs, err, idChan(c)) lg.reset() if !*archive || a == config.None { return } var d decompressor if b == nil { b = s.Buffer() } switch a { case config.Zip: d, err = newZip(siegreader.ReaderFrom(b), path, sz) case config.Gzip: d, err = newGzip(b, path) case config.Tar: d, err = newTar(siegreader.ReaderFrom(b), path) case config.ARC: d, err = newARC(siegreader.ReaderFrom(b), path) case config.WARC: d, err = newWARC(siegreader.ReaderFrom(b), path) } if err != nil { writeError(w, path, sz, mod, fmt.Errorf("failed to decompress, got: %v", err)) return } for err = d.next(); err == nil; err = d.next() { if *droido { for _, v := range d.dirs() { w.writeFile(v, -1, "", nil, nil, nil) } } identifyRdr(w, s, d.reader(), d.size(), d.path(), d.mime(), d.mod()) } }
func identifyRdr(w writer, s *siegfried.Siegfried, r io.Reader, path string, sz int64, mod string) { c, err := s.Identify(path, r) if c == nil { w.writeFile(path, sz, mod, nil, fmt.Errorf("failed to identify %s, got: %v", path, err), nil) return } var b siegreader.Buffer var cs []byte if checksum != nil { b = s.Buffer() checksum.Write(siegreader.Bytes(b)) // ignore error returned here cs = checksum.Sum(nil) checksum.Reset() } a := w.writeFile(path, sz, mod, cs, err, idChan(c)) if !*archive || a == config.None { return } var d decompressor if b == nil { b = s.Buffer() } switch a { case config.Zip: d, err = newZip(siegreader.ReaderFrom(b), path, sz) case config.Gzip: d, err = newGzip(b, path) case config.Tar: d, err = newTar(siegreader.ReaderFrom(b), path) } if err != nil { w.writeFile(path, sz, mod, nil, fmt.Errorf("failed to decompress %s, got: %v", path, err), nil) return } for err = d.next(); err == nil; err = d.next() { if *droido { for _, v := range d.dirs() { w.writeFile(v, -1, "", nil, nil, nil) } } identifyRdr(w, s, d.reader(), d.path(), d.size(), d.mod()) } }
func (j *jsonWriter) writeHead(s *siegfried.Siegfried) { j.w.WriteString(s.JSON()) j.w.WriteString("\"files\":[") }
func (y *yamlWriter) writeHead(s *siegfried.Siegfried) { y.w.WriteString(s.YAML()) }
func (j *jsonWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) { j.hh = ht.header(false) j.w.WriteString(s.JSON()) j.w.WriteString("\"files\":[") }
func (y *yamlWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) { y.hh = ht.header(true) y.w.WriteString(s.YAML()) }
func multiIdentifyP(w writer, s *siegfried.Siegfried, r string, norecurse bool) error { wg := &sync.WaitGroup{} runtime.GOMAXPROCS(PROCS) resc := make(chan chan res, *multi) go printer(w, resc, wg) var wf filepath.WalkFunc var origPath string wf = func(path string, info os.FileInfo, err error) error { var retry, origReset bool if err != nil { info, err = retryStat(path, err) if err != nil { return fmt.Errorf("walking %s; got %v", path, err) // fatal: return error and quit } retry = true } if info.IsDir() { if norecurse && path != r { return filepath.SkipDir } if *droido { wg.Add(1) rchan := make(chan res, 1) resc <- rchan go func() { rchan <- res{path, -1, info.ModTime().String(), nil, nil} // write directory with a -1 size for droid output only }() } if retry { if origPath == "" { origPath = path origReset = true } err = filepath.Walk(longpath(path), wf) if origReset { origPath = "" } return filepath.SkipDir } return nil } path = shortpath(path, origPath) wg.Add(1) rchan := make(chan res, 1) resc <- rchan go func() { f, err := os.Open(path) if err != nil { f, err = retryOpen(path, err) if err != nil { rchan <- res{path, 0, "", nil, err} return } } c, err := s.Identify(f, path, "") if c == nil { f.Close() rchan <- res{path, 0, "", nil, err} return } ids := makeIdSlice(idChan(c)) f.Close() rchan <- res{path, info.Size(), info.ModTime().Format(time.RFC3339), ids, err} }() return nil } err := filepath.Walk(r, wf) wg.Wait() close(resc) return err }