Beispiel #1
0
func identifyFile(ctx *context, ctxts chan *context, gf getFn) {
	ctx.wg.Add(1)
	ctxts <- ctx
	if *multi == 1 || ctx.z || config.Slow() || config.Debug() {
		readFile(ctx, ctxts, gf)
		return
	}
	go func() {
		ctx.wg.Add(1)
		readFile(ctx, ctxts, gf)
		ctx.wg.Done()
	}()
}
Beispiel #2
0
func newLogger(opts string) (*logger, error) {
	lg := &logger{w: os.Stderr}
	if opts == "" {
		return lg, nil
	}
	for _, o := range strings.Split(opts, ",") {
		switch o {
		case "stderr":
		case "stdout", "out", "o":
			lg.w = os.Stdout
		case "progress", "p":
			lg.progress = true
		case "time", "t":
			lg.start = time.Now()
		case "error", "err", "e":
			lg.e = true
		case "warning", "warn", "w":
			lg.warn = true
		case "debug", "d":
			config.SetDebug()
		case "slow", "s":
			config.SetSlow()
		case "unknown", "u":
			lg.unknown = true
		case "known", "k":
			lg.known = true
		default:
			return nil, fmt.Errorf("unknown -log input %s; expect be comma-separated list of stdout,out,o,progress,p,error,err,e,warning,warn,w,debug,d,slow,s,unknown,u,known,k", opts)
		}
	}
	if config.Debug() || config.Slow() {
		lg.progress = false // progress reported internally
		config.SetOut(lg.w)
	}
	return lg, nil
}
Beispiel #3
0
func (b *Matcher) scorer(buf *siegreader.Buffer, waitSet *priority.WaitSet, q chan struct{}, r chan<- core.Result) chan<- strike {
	incoming := make(chan strike)
	hits := make(map[int]*hitItem)
	strikes := make(map[int]*strikeItem)

	var bof int64
	var eof int64

	var quitting bool
	quit := func() {
		close(q)
		quitting = true
	}

	newHit := func(i int) *hitItem {
		l := len(b.keyFrames[i])
		hit := &hitItem{
			potentialIdxs: make([]int, l),
			partials:      make([][][2]int64, l),
		}
		hits[i] = hit
		return hit
	}

	// given the current bof and eof, is there anything worth waiting for?
	continueWaiting := func(w []int) bool {
		var keepScanning bool
		// now for each of the possible signatures we are either waiting on or have partial/potential matches for, check whether there are live contenders
		for _, v := range w {
			kf := b.keyFrames[v]
			for i, f := range kf {
				off := bof
				if f.typ > frames.PREV {
					off = eof
				}
				var waitfor, excludable bool
				if f.key.pMax == -1 || f.key.pMax+int64(f.key.lMax) > off {
					waitfor = true
				} else if hit, ok := hits[v]; ok {
					if hit.partials[i] != nil {
						waitfor = true
					} else if hit.potentialIdxs[i] > 0 && strikes[hit.potentialIdxs[i]-1].hasPotential() {
						waitfor, excludable = true, true
					}
				}
				// if we've got to the end of the signature, and have determined this is a live one - return immediately & continue scan
				if waitfor {
					if i == len(kf)-1 {
						if !config.Slow() || !config.Checkpoint(bof) {
							return true
						}
						keepScanning = true
						fmt.Fprintf(config.Out(), "waiting on: %d, potentially excludable: %t\n", v, excludable)
					}
					continue
				}
				break
			}
		}
		return keepScanning
	}

	testStrike := func(st strike) []kfHit {
		// the offsets we *record* are always BOF offsets - these can be interpreted as EOF offsets when necessary
		off := st.offset
		if st.reverse {
			off = buf.Size() - st.offset - int64(st.length)
		}
		// grab the relevant testTree
		t := b.tests[st.idxa+st.idxb]
		res := make([]kfHit, 0, 10)
		// immediately apply key frames for the completes
		for _, kf := range t.complete {
			if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) {
				res = append(res, kfHit{kf, off, st.length})
			}
		}
		// if there are no incompletes, we are done
		if len(t.incomplete) < 1 {
			return res
		}
		// see what incompletes are worth pursuing
		var checkl, checkr bool
		for _, v := range t.incomplete {
			if checkl && checkr {
				break
			}
			if b.keyFrames[v.kf[0]][v.kf[1]].check(st.offset) && waitSet.Check(v.kf[0]) {
				if v.l {
					checkl = true
				}
				if v.r {
					checkr = true
				}
			}
		}
		if !checkl && !checkr {
			return res
		}
		// calculate the offset and lengths for the left and right test slices
		var lslc, rslc []byte
		var lpos, rpos int64
		var llen, rlen int
		if st.reverse {
			lpos, llen = st.offset+int64(st.length), t.maxLeftDistance
			rpos, rlen = st.offset-int64(t.maxRightDistance), t.maxRightDistance
			if rpos < 0 {
				rlen = rlen + int(rpos)
				rpos = 0
			}
		} else {
			lpos, llen = st.offset-int64(t.maxLeftDistance), t.maxLeftDistance
			rpos, rlen = st.offset+int64(st.length), t.maxRightDistance
			if lpos < 0 {
				llen = llen + int(lpos)
				lpos = 0
			}
		}
		//  the partials slice has a mirror entry for each of the testTree incompletes
		partials := make([]partial, len(t.incomplete))
		// test left (if there are valid left tests to try)
		if checkl {
			if st.reverse {
				lslc, _ = buf.EofSlice(lpos, llen)
			} else {
				lslc, _ = buf.Slice(lpos, llen)
			}
			left := matchTestNodes(t.left, lslc, true)
			for _, lp := range left {
				if partials[lp.followUp].l {
					partials[lp.followUp].ldistances = append(partials[lp.followUp].ldistances, lp.distances...)
				} else {
					partials[lp.followUp].l = true
					partials[lp.followUp].ldistances = lp.distances
				}
			}
		}
		// test right (if there are valid right tests to try)
		if checkr {
			if st.reverse {
				rslc, _ = buf.EofSlice(rpos, rlen)
			} else {
				rslc, _ = buf.Slice(rpos, rlen)
			}
			right := matchTestNodes(t.right, rslc, false)
			for _, rp := range right {
				if partials[rp.followUp].r {
					partials[rp.followUp].rdistances = append(partials[rp.followUp].rdistances, rp.distances...)
				} else {
					partials[rp.followUp].r = true
					partials[rp.followUp].rdistances = rp.distances
				}
			}
		}
		// now iterate through the partials, checking whether they fulfil any of the incompletes
		for i, p := range partials {
			if p.l == t.incomplete[i].l && p.r == t.incomplete[i].r {
				kf := t.incomplete[i].kf
				if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) {
					if !p.l {
						p.ldistances = []int{0}
					}
					if !p.r {
						p.rdistances = []int{0}
					}
					if oneEnough(kf[1], b.keyFrames[kf[0]]) {
						res = append(res, kfHit{kf, off - int64(p.ldistances[0]), p.ldistances[0] + st.length + p.rdistances[0]})
						continue
					}
					for _, ldistance := range p.ldistances {
						for _, rdistance := range p.rdistances {
							res = append(res, kfHit{kf, off - int64(ldistance), ldistance + st.length + rdistance})
						}
					}
				}
			}
		}
		return res
	}

	applyKeyFrame := func(hit kfHit) (bool, string) {
		kfs := b.keyFrames[hit.id[0]]
		if len(kfs) == 1 {
			return true, fmt.Sprintf("byte match at %d, %d", hit.offset, hit.length)
		}
		h, ok := hits[hit.id[0]]
		if !ok {
			h = newHit(hit.id[0])
		}
		if h.partials[hit.id[1]] == nil {
			h.partials[hit.id[1]] = [][2]int64{{hit.offset, int64(hit.length)}}
		} else {
			h.partials[hit.id[1]] = append(h.partials[hit.id[1]], [2]int64{hit.offset, int64(hit.length)})
		}
		for _, p := range h.partials {
			if p == nil {
				return false, ""
			}
		}
		prevOff := h.partials[0]
		basis := make([][][2]int64, len(kfs))
		basis[0] = prevOff
		prevKf := kfs[0]
		ok = false
		for i, kf := range kfs[1:] {
			var nextKf keyFrame
			if i+2 < len(kfs) {
				nextKf = kfs[i+2]
			}
			thisOff := h.partials[i+1]
			prevOff, ok = kf.checkRelated(prevKf, nextKf, thisOff, prevOff)
			if !ok {
				return false, ""
			}
			basis[i+1] = prevOff
			prevKf = kf
		}
		return true, fmt.Sprintf("byte match at %v", basis)
	}

	go func() {
		for in := range incoming {
			// if we've got a positive result, drain any remaining strikes from the matchers
			if quitting {
				continue
			}
			// if the strike reports progress, check if we should be continuing to wait
			if in.idxa == -1 {
				// update with the latest offset
				if in.reverse {
					eof = in.offset
				} else {
					bof = in.offset
				}
				w := waitSet.WaitingOnAt(bof, eof)
				// if any of the waitlists are nil, we will continue - unless we are past the known bof and known eof (points at which we *should* have got at least partial matches), in which case we will check if any partial/potential matches are live
				if w == nil {
					// keep going if we don't have a maximum known bof, or if our current bof/eof are less than the maximum known bof/eof
					if b.knownBOF < 0 || int64(b.knownBOF) > bof || int64(b.knownEOF) > eof {
						continue
					}
					// if we don't have a waitlist, and we are past the known bof and known eof, grab all the partials and potentials to check if any are live
					w = all(hits)
				}
				// exhausted all contenders, we can stop scanning
				if !continueWaiting(w) {
					quit()
				}
				continue
			}
			// now cache or satisfy the strike
			var hasPotential bool
			potentials := filterKF(b.tests[in.idxa+in.idxb].keyFrames(), waitSet)
			for _, pot := range potentials {
				// if any of the signatures are single keyframe we can satisfy immediately and skip cache
				if len(b.keyFrames[pot[0]]) == 1 {
					hasPotential = true
					break
				}
				if hit, ok := hits[pot[0]]; ok && hit.potentiallyComplete(pot[1], strikes) {
					hasPotential = true
					break
				}
			}
			if !hasPotential {
				// cache the strike
				s, ok := strikes[in.idxa+in.idxb]
				if !ok {
					s = &strikeItem{in, -1, nil}
					strikes[in.idxa+in.idxb] = s
				} else {
					if s.successive == nil {
						s.successive = make([][2]int64, 0, 10)
					}
					s.successive = append(s.successive, [2]int64{in.offset, int64(in.length)})
				}
				// range over the potentials, linking to the strike
				for _, pot := range potentials {
					if b.keyFrames[pot[0]][pot[1]].check(in.offset) {
						hit, ok := hits[pot[0]]
						if !ok {
							hit = newHit(pot[0])
						}
						hit.potentialIdxs[pot[1]] = in.idxa + in.idxb + 1
					}
				}
				goto end
			}
			// satisfy the strike
			for {
				ks := testStrike(in)
				for _, k := range ks {
					if match, basis := applyKeyFrame(k); match {
						if waitSet.Check(k.id[0]) {
							r <- result{k.id[0], basis}
							if waitSet.PutAt(k.id[0], bof, eof) {
								quit()
								goto end
							}
						}
						if h, ok := hits[k.id[0]]; ok {
							h.matched = true
						}
					}
				}
				potentials = filterKF(potentials, waitSet)
				var ok bool
				for _, pot := range potentials {
					in, ok = hits[pot[0]].nextPotential(strikes)
					if ok {
						break
					}
				}
				if !ok {
					break
				}
			}
		end: // keep looping until incoming is closed
		}
		close(r)
	}()
	return incoming
}
Beispiel #4
0
func main() {
	flag.Parse()
	/*//UNCOMMENT TO RUN PROFILER
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()*/
	// configure home and signature if not default
	if *home != config.Home() {
		config.SetHome(*home)
	}
	if *sig != config.SignatureBase() {
		config.SetSignature(*sig)
	}
	// handle -update
	if *update {
		msg, err := updateSigs()
		if err != nil {
			log.Fatalf("[FATAL] failed to update signature file, %v", err)
		}
		fmt.Println(msg)
		return
	}
	// handle -hash error
	hashT := getHash(*hashf)
	if *hashf != "" && hashT < 0 {
		log.Fatalf("[FATAL] invalid hash type; choose from %s", hashChoices)
	}
	// load and handle signature errors
	s, err := siegfried.Load(config.Signature())
	if err != nil {
		log.Fatalf("[FATAL] error loading signature file, got: %v", err)
	}
	// handle -version
	if *version {
		version := config.Version()
		fmt.Printf("siegfried %d.%d.%d\n%s", version[0], version[1], version[2], s)
		return
	}
	// handle -fpr
	if *fprflag {
		log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr())
		serveFpr(config.Fpr(), s)
		return
	}
	// check -multi
	if *multi > maxMulti || *multi < 1 || (*archive && *multi > 1) {
		log.Println("[WARN] -multi must be > 0 and =< 1024. If -z, -multi must be 1. Resetting -multi to 1")
		*multi = 1
	}
	// start logger
	lg, err := newLogger(*logf)
	if err != nil {
		log.Fatalln(err)
	}
	if config.Slow() || config.Debug() {
		if *serve != "" || *fprflag {
			log.Fatalln("[FATAL] debug and slow logging cannot be run in server mode")
		}
	}
	// start throttle
	if *throttlef != 0 {
		throttle = time.NewTicker(*throttlef)
		defer throttle.Stop()
	}
	// start the printer
	lenCtxts := *multi
	if lenCtxts == 1 {
		lenCtxts = 8
	}
	ctxts := make(chan *context, lenCtxts)
	go printer(ctxts, lg)
	// set default writer
	var w writer
	switch {
	case *csvo:
		w = newCSV(os.Stdout)
	case *jsono:
		w = newJSON(os.Stdout)
	case *droido:
		w = newDroid(os.Stdout)
		if len(s.Fields()) != 1 || len(s.Fields()[0]) != 7 {
			close(ctxts)
			log.Fatalln("[FATAL] DROID output is limited to signature files with a single PRONOM identifier")
		}
	default:
		w = newYAML(os.Stdout)
	}
	// overrite writer with nil writer if logging is to stdout
	if lg != nil && lg.w == os.Stdout {
		w = logWriter{}
	}
	// setup default waitgroup
	wg := &sync.WaitGroup{}
	// setup context pool
	setCtxPool(s, w, wg, hashT, *archive)
	// handle -serve
	if *serve != "" {
		log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve)
		listen(*serve, s, ctxts)
		return
	}
	// handle no file/directory argument
	if flag.NArg() != 1 {
		close(ctxts)
		log.Fatalln("[FATAL] expecting a single file or directory argument")
	}

	w.writeHead(s, hashT)
	// support reading list files from stdin
	if flag.Arg(0) == "-" {
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			info, err := os.Stat(scanner.Text())
			if err != nil {
				info, err = retryStat(scanner.Text(), err)
			}
			if err != nil || info.IsDir() {
				ctx := getCtx(scanner.Text(), "", "", 0)
				ctx.res <- results{fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil, nil}
				ctx.wg.Add(1)
				ctxts <- ctx
			} else {
				identifyFile(getCtx(scanner.Text(), "", info.ModTime().Format(time.RFC3339), info.Size()), ctxts, getCtx)
			}
		}
	} else {
		err = identify(ctxts, flag.Arg(0), "", *nr, getCtx)
	}
	wg.Wait()
	close(ctxts)
	w.writeTail()
	// log time elapsed
	if !lg.start.IsZero() {
		fmt.Fprintf(lg.w, "%s %v\n", timeString, time.Since(lg.start))
	}
	if err != nil {
		log.Fatal(err)
	}
	os.Exit(0)
}
Beispiel #5
0
// IdentifyBuffer identifies a siegreader buffer. Supply the error from Get as the second argument.
func (s *Siegfried) IdentifyBuffer(buffer *siegreader.Buffer, err error, name, mime string) ([]core.Identification, error) {
	if err != nil && err != siegreader.ErrEmpty {
		return nil, fmt.Errorf("siegfried: error reading file; got %v", err)
	}
	recs := make([]core.Recorder, len(s.ids))
	for i, v := range s.ids {
		recs[i] = v.Recorder()
		if name != "" {
			recs[i].Active(core.NameMatcher)
		}
		if mime != "" {
			recs[i].Active(core.MIMEMatcher)
		}
		if err == nil {
			recs[i].Active(core.XMLMatcher)
			recs[i].Active(core.TextMatcher)
		}
	}
	// Log name for debug/slow
	if config.Debug() || config.Slow() {
		fmt.Fprintf(config.Out(), "[FILE] %s\n", name)
	}
	// Name Matcher
	if len(name) > 0 && s.nm != nil {
		nms, _ := s.nm.Identify(name, nil) // we don't care about an error here
		for v := range nms {
			for _, rec := range recs {
				if rec.Record(core.NameMatcher, v) {
					break
				}
			}
		}
	}
	// MIME Matcher
	if len(mime) > 0 && s.mm != nil {
		mms, _ := s.mm.Identify(mime, nil) // we don't care about an error here
		for v := range mms {
			for _, rec := range recs {
				if rec.Record(core.MIMEMatcher, v) {
					break
				}
			}
		}
	}
	// Container Matcher
	if s.cm != nil {
		if config.Debug() {
			fmt.Fprintln(config.Out(), ">>START CONTAINER MATCHER")
		}
		cms, cerr := s.cm.Identify(name, buffer)
		for v := range cms {
			for _, rec := range recs {
				if rec.Record(core.ContainerMatcher, v) {
					break
				}
			}
		}
		if err == nil {
			err = cerr
		}
	}
	satisfied := true
	// XML Matcher
	if s.xm != nil {
		for _, rec := range recs {
			if ok, _ := rec.Satisfied(core.XMLMatcher); !ok {
				satisfied = false
				break
			}
		}
		if !satisfied {
			if config.Debug() {
				fmt.Fprintln(config.Out(), ">>START XML MATCHER")
			}
			xms, xerr := s.xm.Identify("", buffer)
			for v := range xms {
				for _, rec := range recs {
					if rec.Record(core.XMLMatcher, v) {
						break
					}
				}
			}
			if err == nil {
				err = xerr
			}
		}
	}
	satisfied = true
	// RIFF Matcher
	if s.rm != nil {
		for _, rec := range recs {
			if ok, _ := rec.Satisfied(core.RIFFMatcher); !ok {
				satisfied = false
				break
			}
		}
		if !satisfied {
			if config.Debug() {
				fmt.Fprintln(config.Out(), ">>START RIFF MATCHER")
			}
			rms, rerr := s.rm.Identify("", buffer)
			for v := range rms {
				for _, rec := range recs {
					if rec.Record(core.RIFFMatcher, v) {
						break
					}
				}
			}
			if err == nil {
				err = rerr
			}
		}
	}
	satisfied = true
	exclude := make([]int, 0, len(recs))
	for _, rec := range recs {
		ok, ex := rec.Satisfied(core.ByteMatcher)
		if !ok {
			satisfied = false
		} else {
			exclude = append(exclude, ex)
		}
	}
	// Byte Matcher
	if s.bm != nil && !satisfied {
		if config.Debug() {
			fmt.Fprintln(config.Out(), ">>START BYTE MATCHER")
		}
		ids, _ := s.bm.Identify("", buffer, exclude...) // we don't care about an error here
		for v := range ids {
			for _, rec := range recs {
				if rec.Record(core.ByteMatcher, v) {
					break
				}
			}
		}
	}
	satisfied = true
	for _, rec := range recs {
		if ok, _ := rec.Satisfied(core.TextMatcher); !ok {
			satisfied = false
			break
		}
	}
	// Text Matcher
	if s.tm != nil && !satisfied {
		ids, _ := s.tm.Identify("", buffer) // we don't care about an error here
		for v := range ids {
			for _, rec := range recs {
				if rec.Record(core.TextMatcher, v) {
					break
				}
			}
		}
	}
	if len(recs) < 2 {
		return recs[0].Report(), err
	}
	var res []core.Identification
	for idx, rec := range recs {
		if config.Slow() || config.Debug() {
			for _, id := range rec.Report() {
				fmt.Fprintf(config.Out(), "matched: %s\n", id.String())
			}
		}
		if idx == 0 {
			res = rec.Report()
			continue
		}
		res = append(res, rec.Report()...)
	}
	return res, err
}