Example #1
0
func fpridentify(s *siegfried.Siegfried, path string) []byte {
	fi, err := os.Open(path)
	defer fi.Close()
	if err != nil {
		return reply("error: failed to open " + path + "; got " + err.Error())
	}
	c, err := s.Identify(path, fi)
	if err != nil {
		return reply("error: failed to scan " + path + "; got " + err.Error())
	}
	var ids []string
	var warn string
	for i := range c {
		ids = append(ids, i.String())
		if !i.Known() {
			warn = i.(*pronom.Identification).Warning
		}
	}
	switch len(ids) {
	case 0:
		return reply("error: scanning " + path + ": no puids returned")
	case 1:
		if warn != "" {
			return reply("error: format unknown; got " + warn)
		}
		return reply(ids[0])
	default:
		return reply("error: multiple formats returned; got " + strings.Join(ids, ", "))
	}
}
Example #2
0
func identifyT(s *siegfried.Siegfried, p string) ([]string, error) {
	ids := make([]string, 0)
	file, err := os.Open(p)
	if err != nil {
		return nil, fmt.Errorf("failed to open %v, got: %v", p, err)
	}
	t := time.Now()
	c, _ := s.Identify(file, p, "")
	for _, i := range c {
		ids = append(ids, i.String())
	}
	err = file.Close()
	if err != nil {
		return nil, err
	}
	if len(ids) > 10 {
		fmt.Printf("test file %s has %d ids\n", p, len(ids))
	}
	tooLong := time.Millisecond * 500
	elapsed := time.Since(t)
	if elapsed > tooLong {
		fmt.Printf("[WARNING] time to match %s was %s\n", p, elapsed.String())
	}
	return ids, nil
}
Example #3
0
func (c *csvWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) {
	fields := s.Fields()
	c.names = make([]string, len(fields))
	l := 4
	if ht >= 0 {
		l++
	}
	for i, f := range fields {
		l += len(f)
		c.names[i] = f[0]
	}
	c.recs = make([][]string, 1)
	c.recs[0] = make([]string, l)
	c.recs[0][0], c.recs[0][1], c.recs[0][2], c.recs[0][3] = "filename", "filesize", "modified", "errors"
	idx := 4
	if ht >= 0 {
		c.recs[0][4] = ht.header(false)
		idx++
	}
	for _, f := range fields {
		copy(c.recs[0][idx:], f)
		idx += len(f)
	}
	c.w.Write(c.recs[0])
}
Example #4
0
func fpridentify(s *siegfried.Siegfried, path string) []byte {
	fi, err := os.Open(path)
	defer fi.Close()
	if err != nil {
		return reply("error: failed to open " + path + "; got " + err.Error())
	}
	ids, err := s.Identify(fi, path, "")
	if ids == nil {
		return reply("error: failed to scan " + path + "; got " + err.Error())
	}
	switch len(ids) {
	case 0:
		return reply("error: scanning " + path + ": no formats returned")
	case 1:
		if !ids[0].Known() {
			return reply("error: format unknown; got " + ids[0].Warn())
		}
		return reply(ids[0].String())
	default:
		strs := make([]string, len(ids))
		for i, v := range ids {
			strs[i] = v.String()
		}
		return reply("error: multiple formats returned; got " + strings.Join(strs, ", "))
	}
}
Example #5
0
func identify(s *siegfried.Siegfried) func(w http.ResponseWriter, r *http.Request) {
	return func(w http.ResponseWriter, r *http.Request) {
		mime, wr, nr := parseRequest(w, r)
		if r.Method == "POST" {
			f, h, err := r.FormFile("file")
			if err != nil {
				handleErr(w, http.StatusNotFound, err)
				return
			}
			defer f.Close()
			var sz int64
			var mod string
			osf, ok := f.(*os.File)
			if ok {
				info, err := osf.Stat()
				if err != nil {
					handleErr(w, http.StatusInternalServerError, err)
				}
				sz = info.Size()
				mod = info.ModTime().String()
			} else {
				sz = r.ContentLength
			}
			w.Header().Set("Content-Type", mime)
			wr.writeHead(s)
			c, err := s.Identify(h.Filename, f)
			if c == nil {
				wr.writeFile(h.Filename, sz, mod, nil, fmt.Errorf("failed to identify %s, got: %v", h.Filename, err), nil)
				return
			}
			wr.writeFile(h.Filename, sz, mod, nil, err, idChan(c))
			wr.writeTail()
			return
		} else {
			path, err := decodePath(r.URL.Path)
			if err != nil {
				handleErr(w, http.StatusNotFound, err)
				return
			}
			info, err := os.Stat(path)
			if err != nil {
				handleErr(w, http.StatusNotFound, err)
				return
			}
			w.Header().Set("Content-Type", mime)
			wr.writeHead(s)
			if info.IsDir() {
				multiIdentifyS(wr, s, path, nr)
				wr.writeTail()
				return
			}
			identifyFile(wr, s, path, info.Size(), info.ModTime().String())
			wr.writeTail()
			return
		}
	}
}
Example #6
0
func makegob(s *siegfried.Siegfried, opts []config.Option) error {
	p, err := pronom.New(opts...)
	if err != nil {
		return err
	}
	err = s.Add(p)
	if err != nil {
		return err
	}
	return s.Save(config.Signature())
}
Example #7
0
func multiIdentifyP(w writer, s *siegfried.Siegfried, r string, norecurse bool) error {
	wg := &sync.WaitGroup{}
	runtime.GOMAXPROCS(PROCS)
	resc := make(chan chan res, *multi)
	go printer(w, resc, wg)
	wf := func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return fmt.Errorf("walking %s; got %v", path, err)
		}
		if info.IsDir() {
			if norecurse && path != r {
				return filepath.SkipDir
			}
			if *droido {
				wg.Add(1)
				rchan := make(chan res, 1)
				resc <- rchan
				go func() {
					rchan <- res{path, -1, info.ModTime().String(), nil, nil} // write directory with a -1 size for droid output only
				}()
			}
			return nil
		}
		wg.Add(1)
		rchan := make(chan res, 1)
		resc <- rchan
		go func() {
			f, err := os.Open(path)
			if err != nil {
				rchan <- res{path, 0, "", nil, err.(*os.PathError).Err} // return summary error only
				return
			}
			c, err := s.Identify(f, path, "")
			if c == nil {
				f.Close()
				rchan <- res{path, 0, "", nil, err}
				return
			}
			ids := makeIdSlice(idChan(c))
			f.Close()
			rchan <- res{path, info.Size(), info.ModTime().Format(time.RFC3339), ids, err}
		}()
		return nil
	}
	err := filepath.Walk(r, wf)
	wg.Wait()
	close(resc)
	return err
}
Example #8
0
func identifyT(s *siegfried.Siegfried, p string) ([]string, error) {
	ids := make([]string, 0)
	file, err := os.Open(p)
	if err != nil {
		return nil, fmt.Errorf("failed to open %v, got: %v", p, err)
	}
	c, err := s.Identify(p, file)
	if c == nil {
		return nil, fmt.Errorf("failed to identify %v, got: %v", p, err)
	}
	for i := range c {
		ids = append(ids, i.String())
	}
	err = file.Close()
	if err != nil {
		return nil, err
	}
	return ids, nil
}
Example #9
0
func makegob(s *siegfried.Siegfried, opts []config.Option) error {
	var id core.Identifier
	var err error
	if *mi != "" {
		id, err = mimeinfo.New(opts...)
	} else if *locfdd || *fdd != "" {
		id, err = loc.New(opts...)
	} else {
		id, err = pronom.New(opts...)
	}
	if err != nil {
		return err
	}
	err = s.Add(id)
	if err != nil {
		return err
	}
	return s.Save(config.Signature())
}
Example #10
0
func identifyRdr(w writer, s *siegfried.Siegfried, r io.Reader, sz int64, path, mime, mod string) {
	lg.set(path)
	c, err := s.Identify(r, path, mime)
	lg.err(err)
	if c == nil {
		w.writeFile(path, sz, mod, nil, err, nil)
		lg.reset()
		return
	}
	var b *siegreader.Buffer
	var cs []byte
	if checksum != nil {
		b = s.Buffer()
		var i int64
		l := checksum.BlockSize()
		for ; ; i += int64(l) {
			buf, _ := b.Slice(i, l)
			if buf == nil {
				break
			}
			checksum.Write(buf)
		}
		cs = checksum.Sum(nil)
		checksum.Reset()
	}
	a := w.writeFile(path, sz, mod, cs, err, idChan(c))
	lg.reset()
	if !*archive || a == config.None {
		return
	}
	var d decompressor
	if b == nil {
		b = s.Buffer()
	}
	switch a {
	case config.Zip:
		d, err = newZip(siegreader.ReaderFrom(b), path, sz)
	case config.Gzip:
		d, err = newGzip(b, path)
	case config.Tar:
		d, err = newTar(siegreader.ReaderFrom(b), path)
	case config.ARC:
		d, err = newARC(siegreader.ReaderFrom(b), path)
	case config.WARC:
		d, err = newWARC(siegreader.ReaderFrom(b), path)
	}
	if err != nil {
		writeError(w, path, sz, mod, fmt.Errorf("failed to decompress, got: %v", err))
		return
	}
	for err = d.next(); err == nil; err = d.next() {
		if *droido {
			for _, v := range d.dirs() {
				w.writeFile(v, -1, "", nil, nil, nil)
			}
		}
		identifyRdr(w, s, d.reader(), d.size(), d.path(), d.mime(), d.mod())
	}
}
Example #11
0
func identifyRdr(w writer, s *siegfried.Siegfried, r io.Reader, path string, sz int64, mod string) {
	c, err := s.Identify(path, r)
	if c == nil {
		w.writeFile(path, sz, mod, nil, fmt.Errorf("failed to identify %s, got: %v", path, err), nil)
		return
	}
	var b siegreader.Buffer
	var cs []byte
	if checksum != nil {
		b = s.Buffer()
		checksum.Write(siegreader.Bytes(b)) // ignore error returned here
		cs = checksum.Sum(nil)
		checksum.Reset()
	}
	a := w.writeFile(path, sz, mod, cs, err, idChan(c))
	if !*archive || a == config.None {
		return
	}
	var d decompressor
	if b == nil {
		b = s.Buffer()
	}
	switch a {
	case config.Zip:
		d, err = newZip(siegreader.ReaderFrom(b), path, sz)
	case config.Gzip:
		d, err = newGzip(b, path)
	case config.Tar:
		d, err = newTar(siegreader.ReaderFrom(b), path)
	}
	if err != nil {
		w.writeFile(path, sz, mod, nil, fmt.Errorf("failed to decompress %s, got: %v", path, err), nil)
		return
	}
	for err = d.next(); err == nil; err = d.next() {
		if *droido {
			for _, v := range d.dirs() {
				w.writeFile(v, -1, "", nil, nil, nil)
			}
		}
		identifyRdr(w, s, d.reader(), d.path(), d.size(), d.mod())
	}
}
Example #12
0
func (j *jsonWriter) writeHead(s *siegfried.Siegfried) {
	j.w.WriteString(s.JSON())
	j.w.WriteString("\"files\":[")
}
Example #13
0
func (y *yamlWriter) writeHead(s *siegfried.Siegfried) {
	y.w.WriteString(s.YAML())
}
Example #14
0
func (j *jsonWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) {
	j.hh = ht.header(false)
	j.w.WriteString(s.JSON())
	j.w.WriteString("\"files\":[")
}
Example #15
0
func (y *yamlWriter) writeHead(s *siegfried.Siegfried, ht hashTyp) {
	y.hh = ht.header(true)
	y.w.WriteString(s.YAML())
}
Example #16
0
func multiIdentifyP(w writer, s *siegfried.Siegfried, r string, norecurse bool) error {
	wg := &sync.WaitGroup{}
	runtime.GOMAXPROCS(PROCS)
	resc := make(chan chan res, *multi)
	go printer(w, resc, wg)
	var wf filepath.WalkFunc
	var origPath string
	wf = func(path string, info os.FileInfo, err error) error {
		var retry, origReset bool
		if err != nil {
			info, err = retryStat(path, err)
			if err != nil {
				return fmt.Errorf("walking %s; got %v", path, err) // fatal: return error and quit
			}
			retry = true
		}
		if info.IsDir() {
			if norecurse && path != r {
				return filepath.SkipDir
			}
			if *droido {
				wg.Add(1)
				rchan := make(chan res, 1)
				resc <- rchan
				go func() {
					rchan <- res{path, -1, info.ModTime().String(), nil, nil} // write directory with a -1 size for droid output only
				}()
			}
			if retry {
				if origPath == "" {
					origPath = path
					origReset = true
				}
				err = filepath.Walk(longpath(path), wf)
				if origReset {
					origPath = ""
				}
				return filepath.SkipDir
			}
			return nil
		}
		path = shortpath(path, origPath)
		wg.Add(1)
		rchan := make(chan res, 1)
		resc <- rchan
		go func() {
			f, err := os.Open(path)
			if err != nil {
				f, err = retryOpen(path, err)
				if err != nil {
					rchan <- res{path, 0, "", nil, err}
					return
				}
			}
			c, err := s.Identify(f, path, "")
			if c == nil {
				f.Close()
				rchan <- res{path, 0, "", nil, err}
				return
			}
			ids := makeIdSlice(idChan(c))
			f.Close()
			rchan <- res{path, info.Size(), info.ModTime().Format(time.RFC3339), ids, err}
		}()
		return nil
	}
	err := filepath.Walk(r, wf)
	wg.Wait()
	close(resc)
	return err
}