Beispiel #1
0
func (c *ContainerMatcher) identify(n string, rdr Reader, res chan core.Result) {
	// safe to call on a nil matcher (i.e. container matching switched off)
	if c == nil {
		close(res)
		return
	}
	id := c.newIdentifier(len(c.parts))
	var err error
	for err = rdr.Next(); err == nil; err = rdr.Next() {
		ct, ok := c.nameCTest[rdr.Name()]
		if !ok {
			continue
		}
		if config.Debug() {
			fmt.Fprintf(config.Out(), "{Name match - %s (container %d))}\n", rdr.Name(), c.conType)
		}
		// name has matched, let's test the CTests
		// ct.identify will generate a slice of hits which pass to
		// processHits which will return true if we can stop
		if c.processHits(ct.identify(c, id, rdr, rdr.Name()), id, ct, rdr.Name(), res) {
			break
		}
	}
	// send a default hit if no result and extension matches
	if c.extension != "" && !id.result && filepath.Ext(n) == "."+c.extension {
		res <- defaultHit(-1 - int(c.conType))
	}
	close(res)
}
Beispiel #2
0
func identifyFile(ctx *context, ctxts chan *context, gf getFn) {
	ctx.wg.Add(1)
	ctxts <- ctx
	if *multi == 1 || ctx.z || config.Slow() || config.Debug() {
		readFile(ctx, ctxts, gf)
		return
	}
	go func() {
		ctx.wg.Add(1)
		readFile(ctx, ctxts, gf)
		ctx.wg.Done()
	}()
}
Beispiel #3
0
func newLogger(opts string) (*logger, error) {
	lg := &logger{w: os.Stderr}
	if opts == "" {
		return lg, nil
	}
	for _, o := range strings.Split(opts, ",") {
		switch o {
		case "stderr":
		case "stdout", "out", "o":
			lg.w = os.Stdout
		case "progress", "p":
			lg.progress = true
		case "time", "t":
			lg.start = time.Now()
		case "error", "err", "e":
			lg.e = true
		case "warning", "warn", "w":
			lg.warn = true
		case "debug", "d":
			config.SetDebug()
		case "slow", "s":
			config.SetSlow()
		case "unknown", "u":
			lg.unknown = true
		case "known", "k":
			lg.known = true
		default:
			return nil, fmt.Errorf("unknown -log input %s; expect be comma-separated list of stdout,out,o,progress,p,error,err,e,warning,warn,w,debug,d,slow,s,unknown,u,known,k", opts)
		}
	}
	if config.Debug() || config.Slow() {
		lg.progress = false // progress reported internally
		config.SetOut(lg.w)
	}
	return lg, nil
}
Beispiel #4
0
// identify function - brings a new matcher into existence
func (b *Matcher) identify(buf *siegreader.Buffer, quit chan struct{}, r chan core.Result, exclude ...int) {
	buf.Quit = quit
	waitSet := b.priorities.WaitSet(exclude...)
	var maxBOF, maxEOF int
	if len(exclude) > 0 {
		maxBOF, maxEOF = waitSet.MaxOffsets()
	} else {
		maxBOF, maxEOF = b.maxBOF, b.maxEOF
	}
	incoming := b.scorer(buf, waitSet, quit, r)
	rdr := siegreader.LimitReaderFrom(buf, maxBOF)
	// First test BOF frameset
	bfchan := b.bofFrames.index(buf, false, quit)
	for bf := range bfchan {
		if config.Debug() {
			fmt.Fprintln(config.Out(), strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true})
		}
		incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true}
	}
	select {
	case <-quit: // the matcher has called quit
		for range bfchan {
		} // drain first
		close(incoming)
		return
	default:
	}

	// Do an initial check of BOF sequences
	b.start(true) // start bof matcher if not yet started
	var bchan chan wac.Result
	bchan = b.bAho.Index(rdr)
	for br := range bchan {
		if br.Index[0] == -1 {
			incoming <- progressStrike(br.Offset, false)
			if br.Offset > 131072 && (maxBOF < 0 || maxBOF > maxEOF*5) { // del buf.Stream 2^16	65536 2^17 131072
				break
			}
		} else {
			if config.Debug() {
				fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false})
			}
			incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}
		}
	}
	select {
	case <-quit: // the matcher has called quit
		for range bchan {
		} // drain first
		close(incoming)
		return
	default:
	}

	// Setup EOF tests
	efchan := b.eofFrames.index(buf, true, quit)
	b.start(false)
	rrdr := siegreader.LimitReverseReaderFrom(buf, maxEOF)
	echan := b.eAho.Index(rrdr)

	// if we have a maximum value on EOF do a sequential search
	if maxEOF >= 0 {
		if maxEOF != 0 {
			_, _ = buf.CanSeek(0, true) // force a full read to enable EOF scan to proceed for streams
		}
		for ef := range efchan {
			if config.Debug() {
				fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true})
			}
			incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}
		}
		// Scan complete EOF
		for er := range echan {
			if er.Index[0] == -1 {
				incoming <- progressStrike(er.Offset, true)
			} else {
				if config.Debug() {
					fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false})
				}
				incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}
			}
		}
		// send a final progress strike with the maximum EOF
		incoming <- progressStrike(int64(maxEOF), true)
		// Finally, finish BOF scan
		for br := range bchan {
			if br.Index[0] == -1 {
				incoming <- progressStrike(br.Offset, false)
			} else {
				if config.Debug() {
					fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false})
				}
				incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}
			}
		}
		close(incoming)
		return
	}
	// If no maximum on EOF do a parallel search
	for {
		select {
		case br, ok := <-bchan:
			if !ok {
				if maxBOF < 0 && maxEOF != 0 {
					_, _ = buf.CanSeek(0, true) // if we've a limit BOF reader, force a full read to enable EOF scan to proceed for streams
				}
				bchan = nil
			} else {
				if br.Index[0] == -1 {
					incoming <- progressStrike(br.Offset, false)
				} else {
					if config.Debug() {
						fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false})
					}
					incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}
				}
			}
		case ef, ok := <-efchan:
			if !ok {
				efchan = nil
			} else {
				if config.Debug() {
					fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true})
				}
				incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}
			}
		case er, ok := <-echan:
			if !ok {
				echan = nil
			} else {
				if er.Index[0] == -1 {
					incoming <- progressStrike(er.Offset, true)
				} else {
					if config.Debug() {
						fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false})
					}
					incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}
				}
			}
		}
		if bchan == nil && efchan == nil && echan == nil {
			close(incoming)
			return
		}
	}
}
Beispiel #5
0
func main() {
	flag.Parse()
	/*//UNCOMMENT TO RUN PROFILER
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()*/
	// configure home and signature if not default
	if *home != config.Home() {
		config.SetHome(*home)
	}
	if *sig != config.SignatureBase() {
		config.SetSignature(*sig)
	}
	// handle -update
	if *update {
		msg, err := updateSigs()
		if err != nil {
			log.Fatalf("[FATAL] failed to update signature file, %v", err)
		}
		fmt.Println(msg)
		return
	}
	// handle -hash error
	hashT := getHash(*hashf)
	if *hashf != "" && hashT < 0 {
		log.Fatalf("[FATAL] invalid hash type; choose from %s", hashChoices)
	}
	// load and handle signature errors
	s, err := siegfried.Load(config.Signature())
	if err != nil {
		log.Fatalf("[FATAL] error loading signature file, got: %v", err)
	}
	// handle -version
	if *version {
		version := config.Version()
		fmt.Printf("siegfried %d.%d.%d\n%s", version[0], version[1], version[2], s)
		return
	}
	// handle -fpr
	if *fprflag {
		log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr())
		serveFpr(config.Fpr(), s)
		return
	}
	// check -multi
	if *multi > maxMulti || *multi < 1 || (*archive && *multi > 1) {
		log.Println("[WARN] -multi must be > 0 and =< 1024. If -z, -multi must be 1. Resetting -multi to 1")
		*multi = 1
	}
	// start logger
	lg, err := newLogger(*logf)
	if err != nil {
		log.Fatalln(err)
	}
	if config.Slow() || config.Debug() {
		if *serve != "" || *fprflag {
			log.Fatalln("[FATAL] debug and slow logging cannot be run in server mode")
		}
	}
	// start throttle
	if *throttlef != 0 {
		throttle = time.NewTicker(*throttlef)
		defer throttle.Stop()
	}
	// start the printer
	lenCtxts := *multi
	if lenCtxts == 1 {
		lenCtxts = 8
	}
	ctxts := make(chan *context, lenCtxts)
	go printer(ctxts, lg)
	// set default writer
	var w writer
	switch {
	case *csvo:
		w = newCSV(os.Stdout)
	case *jsono:
		w = newJSON(os.Stdout)
	case *droido:
		w = newDroid(os.Stdout)
		if len(s.Fields()) != 1 || len(s.Fields()[0]) != 7 {
			close(ctxts)
			log.Fatalln("[FATAL] DROID output is limited to signature files with a single PRONOM identifier")
		}
	default:
		w = newYAML(os.Stdout)
	}
	// overrite writer with nil writer if logging is to stdout
	if lg != nil && lg.w == os.Stdout {
		w = logWriter{}
	}
	// setup default waitgroup
	wg := &sync.WaitGroup{}
	// setup context pool
	setCtxPool(s, w, wg, hashT, *archive)
	// handle -serve
	if *serve != "" {
		log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve)
		listen(*serve, s, ctxts)
		return
	}
	// handle no file/directory argument
	if flag.NArg() != 1 {
		close(ctxts)
		log.Fatalln("[FATAL] expecting a single file or directory argument")
	}

	w.writeHead(s, hashT)
	// support reading list files from stdin
	if flag.Arg(0) == "-" {
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			info, err := os.Stat(scanner.Text())
			if err != nil {
				info, err = retryStat(scanner.Text(), err)
			}
			if err != nil || info.IsDir() {
				ctx := getCtx(scanner.Text(), "", "", 0)
				ctx.res <- results{fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil, nil}
				ctx.wg.Add(1)
				ctxts <- ctx
			} else {
				identifyFile(getCtx(scanner.Text(), "", info.ModTime().Format(time.RFC3339), info.Size()), ctxts, getCtx)
			}
		}
	} else {
		err = identify(ctxts, flag.Arg(0), "", *nr, getCtx)
	}
	wg.Wait()
	close(ctxts)
	w.writeTail()
	// log time elapsed
	if !lg.start.IsZero() {
		fmt.Fprintf(lg.w, "%s %v\n", timeString, time.Since(lg.start))
	}
	if err != nil {
		log.Fatal(err)
	}
	os.Exit(0)
}
Beispiel #6
0
// IdentifyBuffer identifies a siegreader buffer. Supply the error from Get as the second argument.
func (s *Siegfried) IdentifyBuffer(buffer *siegreader.Buffer, err error, name, mime string) ([]core.Identification, error) {
	if err != nil && err != siegreader.ErrEmpty {
		return nil, fmt.Errorf("siegfried: error reading file; got %v", err)
	}
	recs := make([]core.Recorder, len(s.ids))
	for i, v := range s.ids {
		recs[i] = v.Recorder()
		if name != "" {
			recs[i].Active(core.NameMatcher)
		}
		if mime != "" {
			recs[i].Active(core.MIMEMatcher)
		}
		if err == nil {
			recs[i].Active(core.XMLMatcher)
			recs[i].Active(core.TextMatcher)
		}
	}
	// Log name for debug/slow
	if config.Debug() || config.Slow() {
		fmt.Fprintf(config.Out(), "[FILE] %s\n", name)
	}
	// Name Matcher
	if len(name) > 0 && s.nm != nil {
		nms, _ := s.nm.Identify(name, nil) // we don't care about an error here
		for v := range nms {
			for _, rec := range recs {
				if rec.Record(core.NameMatcher, v) {
					break
				}
			}
		}
	}
	// MIME Matcher
	if len(mime) > 0 && s.mm != nil {
		mms, _ := s.mm.Identify(mime, nil) // we don't care about an error here
		for v := range mms {
			for _, rec := range recs {
				if rec.Record(core.MIMEMatcher, v) {
					break
				}
			}
		}
	}
	// Container Matcher
	if s.cm != nil {
		if config.Debug() {
			fmt.Fprintln(config.Out(), ">>START CONTAINER MATCHER")
		}
		cms, cerr := s.cm.Identify(name, buffer)
		for v := range cms {
			for _, rec := range recs {
				if rec.Record(core.ContainerMatcher, v) {
					break
				}
			}
		}
		if err == nil {
			err = cerr
		}
	}
	satisfied := true
	// XML Matcher
	if s.xm != nil {
		for _, rec := range recs {
			if ok, _ := rec.Satisfied(core.XMLMatcher); !ok {
				satisfied = false
				break
			}
		}
		if !satisfied {
			if config.Debug() {
				fmt.Fprintln(config.Out(), ">>START XML MATCHER")
			}
			xms, xerr := s.xm.Identify("", buffer)
			for v := range xms {
				for _, rec := range recs {
					if rec.Record(core.XMLMatcher, v) {
						break
					}
				}
			}
			if err == nil {
				err = xerr
			}
		}
	}
	satisfied = true
	// RIFF Matcher
	if s.rm != nil {
		for _, rec := range recs {
			if ok, _ := rec.Satisfied(core.RIFFMatcher); !ok {
				satisfied = false
				break
			}
		}
		if !satisfied {
			if config.Debug() {
				fmt.Fprintln(config.Out(), ">>START RIFF MATCHER")
			}
			rms, rerr := s.rm.Identify("", buffer)
			for v := range rms {
				for _, rec := range recs {
					if rec.Record(core.RIFFMatcher, v) {
						break
					}
				}
			}
			if err == nil {
				err = rerr
			}
		}
	}
	satisfied = true
	exclude := make([]int, 0, len(recs))
	for _, rec := range recs {
		ok, ex := rec.Satisfied(core.ByteMatcher)
		if !ok {
			satisfied = false
		} else {
			exclude = append(exclude, ex)
		}
	}
	// Byte Matcher
	if s.bm != nil && !satisfied {
		if config.Debug() {
			fmt.Fprintln(config.Out(), ">>START BYTE MATCHER")
		}
		ids, _ := s.bm.Identify("", buffer, exclude...) // we don't care about an error here
		for v := range ids {
			for _, rec := range recs {
				if rec.Record(core.ByteMatcher, v) {
					break
				}
			}
		}
	}
	satisfied = true
	for _, rec := range recs {
		if ok, _ := rec.Satisfied(core.TextMatcher); !ok {
			satisfied = false
			break
		}
	}
	// Text Matcher
	if s.tm != nil && !satisfied {
		ids, _ := s.tm.Identify("", buffer) // we don't care about an error here
		for v := range ids {
			for _, rec := range recs {
				if rec.Record(core.TextMatcher, v) {
					break
				}
			}
		}
	}
	if len(recs) < 2 {
		return recs[0].Report(), err
	}
	var res []core.Identification
	for idx, rec := range recs {
		if config.Slow() || config.Debug() {
			for _, id := range rec.Report() {
				fmt.Fprintf(config.Out(), "matched: %s\n", id.String())
			}
		}
		if idx == 0 {
			res = rec.Report()
			continue
		}
		res = append(res, rec.Report()...)
	}
	return res, err
}
Beispiel #7
0
func (m Matcher) Identify(na string, b *siegreader.Buffer, exclude ...int) (chan core.Result, error) {
	buf, err := b.Slice(0, 8)
	if err != nil || buf[0] != 'R' || buf[1] != 'I' || buf[2] != 'F' || buf[3] != 'F' {
		res := make(chan core.Result)
		close(res)
		return res, nil
	}
	rcc, rrdr, err := riff.NewReader(siegreader.ReaderFrom(b))
	if err != nil {
		res := make(chan core.Result)
		close(res)
		return res, nil
	}
	// now make structures for testing
	uniqs := make(map[riff.FourCC]bool)
	res := make(chan core.Result)
	waitset := m.priorities.WaitSet(exclude...)
	// send and report if satisified
	send := func(cc riff.FourCC) bool {
		if config.Debug() {
			fmt.Fprintf(config.Out(), "riff match %s\n", string(cc[:]))
		}
		if uniqs[cc] {
			return false
		}
		uniqs[cc] = true
		for _, hit := range m.riffs[cc] {
			if waitset.Check(hit) {
				if config.Debug() {
					fmt.Fprintf(config.Out(), "sending riff match %s\n", string(cc[:]))
				}
				res <- result{hit, cc}
				if waitset.Put(hit) {
					return true
				}
			}
		}
		return false
	}
	// riff walk
	var descend func(*riff.Reader) bool
	descend = func(r *riff.Reader) bool {
		for {
			chunkID, chunkLen, chunkData, err := r.Next()
			if err != nil || send(chunkID) {
				return true
			}
			if chunkID == riff.LIST {
				listType, list, err := riff.NewListReader(chunkLen, chunkData)
				if err != nil || send(listType) {
					return true
				}
				if descend(list) {
					return true
				}
			}
		}
	}
	// go time
	go func() {
		if send(rcc) {
			close(res)
			return
		}
		descend(rrdr)
		close(res)
	}()
	return res, nil
}