func identifyFile(ctx *context, ctxts chan *context, gf getFn) { ctx.wg.Add(1) ctxts <- ctx if *multi == 1 || ctx.z || config.Slow() || config.Debug() { readFile(ctx, ctxts, gf) return } go func() { ctx.wg.Add(1) readFile(ctx, ctxts, gf) ctx.wg.Done() }() }
func newLogger(opts string) (*logger, error) { lg := &logger{w: os.Stderr} if opts == "" { return lg, nil } for _, o := range strings.Split(opts, ",") { switch o { case "stderr": case "stdout", "out", "o": lg.w = os.Stdout case "progress", "p": lg.progress = true case "time", "t": lg.start = time.Now() case "error", "err", "e": lg.e = true case "warning", "warn", "w": lg.warn = true case "debug", "d": config.SetDebug() case "slow", "s": config.SetSlow() case "unknown", "u": lg.unknown = true case "known", "k": lg.known = true default: return nil, fmt.Errorf("unknown -log input %s; expect be comma-separated list of stdout,out,o,progress,p,error,err,e,warning,warn,w,debug,d,slow,s,unknown,u,known,k", opts) } } if config.Debug() || config.Slow() { lg.progress = false // progress reported internally config.SetOut(lg.w) } return lg, nil }
func (b *Matcher) scorer(buf *siegreader.Buffer, waitSet *priority.WaitSet, q chan struct{}, r chan<- core.Result) chan<- strike { incoming := make(chan strike) hits := make(map[int]*hitItem) strikes := make(map[int]*strikeItem) var bof int64 var eof int64 var quitting bool quit := func() { close(q) quitting = true } newHit := func(i int) *hitItem { l := len(b.keyFrames[i]) hit := &hitItem{ potentialIdxs: make([]int, l), partials: make([][][2]int64, l), } hits[i] = hit return hit } // given the current bof and eof, is there anything worth waiting for? continueWaiting := func(w []int) bool { var keepScanning bool // now for each of the possible signatures we are either waiting on or have partial/potential matches for, check whether there are live contenders for _, v := range w { kf := b.keyFrames[v] for i, f := range kf { off := bof if f.typ > frames.PREV { off = eof } var waitfor, excludable bool if f.key.pMax == -1 || f.key.pMax+int64(f.key.lMax) > off { waitfor = true } else if hit, ok := hits[v]; ok { if hit.partials[i] != nil { waitfor = true } else if hit.potentialIdxs[i] > 0 && strikes[hit.potentialIdxs[i]-1].hasPotential() { waitfor, excludable = true, true } } // if we've got to the end of the signature, and have determined this is a live one - return immediately & continue scan if waitfor { if i == len(kf)-1 { if !config.Slow() || !config.Checkpoint(bof) { return true } keepScanning = true fmt.Fprintf(config.Out(), "waiting on: %d, potentially excludable: %t\n", v, excludable) } continue } break } } return keepScanning } testStrike := func(st strike) []kfHit { // the offsets we *record* are always BOF offsets - these can be interpreted as EOF offsets when necessary off := st.offset if st.reverse { off = buf.Size() - st.offset - int64(st.length) } // grab the relevant testTree t := b.tests[st.idxa+st.idxb] res := make([]kfHit, 0, 10) // immediately apply key frames for the completes for _, kf := range t.complete { if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { res = append(res, kfHit{kf, off, st.length}) } } // if there are no incompletes, we are done if len(t.incomplete) < 1 { return res } // see what incompletes are worth pursuing var checkl, checkr bool for _, v := range t.incomplete { if checkl && checkr { break } if b.keyFrames[v.kf[0]][v.kf[1]].check(st.offset) && waitSet.Check(v.kf[0]) { if v.l { checkl = true } if v.r { checkr = true } } } if !checkl && !checkr { return res } // calculate the offset and lengths for the left and right test slices var lslc, rslc []byte var lpos, rpos int64 var llen, rlen int if st.reverse { lpos, llen = st.offset+int64(st.length), t.maxLeftDistance rpos, rlen = st.offset-int64(t.maxRightDistance), t.maxRightDistance if rpos < 0 { rlen = rlen + int(rpos) rpos = 0 } } else { lpos, llen = st.offset-int64(t.maxLeftDistance), t.maxLeftDistance rpos, rlen = st.offset+int64(st.length), t.maxRightDistance if lpos < 0 { llen = llen + int(lpos) lpos = 0 } } // the partials slice has a mirror entry for each of the testTree incompletes partials := make([]partial, len(t.incomplete)) // test left (if there are valid left tests to try) if checkl { if st.reverse { lslc, _ = buf.EofSlice(lpos, llen) } else { lslc, _ = buf.Slice(lpos, llen) } left := matchTestNodes(t.left, lslc, true) for _, lp := range left { if partials[lp.followUp].l { partials[lp.followUp].ldistances = append(partials[lp.followUp].ldistances, lp.distances...) } else { partials[lp.followUp].l = true partials[lp.followUp].ldistances = lp.distances } } } // test right (if there are valid right tests to try) if checkr { if st.reverse { rslc, _ = buf.EofSlice(rpos, rlen) } else { rslc, _ = buf.Slice(rpos, rlen) } right := matchTestNodes(t.right, rslc, false) for _, rp := range right { if partials[rp.followUp].r { partials[rp.followUp].rdistances = append(partials[rp.followUp].rdistances, rp.distances...) } else { partials[rp.followUp].r = true partials[rp.followUp].rdistances = rp.distances } } } // now iterate through the partials, checking whether they fulfil any of the incompletes for i, p := range partials { if p.l == t.incomplete[i].l && p.r == t.incomplete[i].r { kf := t.incomplete[i].kf if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { if !p.l { p.ldistances = []int{0} } if !p.r { p.rdistances = []int{0} } if oneEnough(kf[1], b.keyFrames[kf[0]]) { res = append(res, kfHit{kf, off - int64(p.ldistances[0]), p.ldistances[0] + st.length + p.rdistances[0]}) continue } for _, ldistance := range p.ldistances { for _, rdistance := range p.rdistances { res = append(res, kfHit{kf, off - int64(ldistance), ldistance + st.length + rdistance}) } } } } } return res } applyKeyFrame := func(hit kfHit) (bool, string) { kfs := b.keyFrames[hit.id[0]] if len(kfs) == 1 { return true, fmt.Sprintf("byte match at %d, %d", hit.offset, hit.length) } h, ok := hits[hit.id[0]] if !ok { h = newHit(hit.id[0]) } if h.partials[hit.id[1]] == nil { h.partials[hit.id[1]] = [][2]int64{{hit.offset, int64(hit.length)}} } else { h.partials[hit.id[1]] = append(h.partials[hit.id[1]], [2]int64{hit.offset, int64(hit.length)}) } for _, p := range h.partials { if p == nil { return false, "" } } prevOff := h.partials[0] basis := make([][][2]int64, len(kfs)) basis[0] = prevOff prevKf := kfs[0] ok = false for i, kf := range kfs[1:] { var nextKf keyFrame if i+2 < len(kfs) { nextKf = kfs[i+2] } thisOff := h.partials[i+1] prevOff, ok = kf.checkRelated(prevKf, nextKf, thisOff, prevOff) if !ok { return false, "" } basis[i+1] = prevOff prevKf = kf } return true, fmt.Sprintf("byte match at %v", basis) } go func() { for in := range incoming { // if we've got a positive result, drain any remaining strikes from the matchers if quitting { continue } // if the strike reports progress, check if we should be continuing to wait if in.idxa == -1 { // update with the latest offset if in.reverse { eof = in.offset } else { bof = in.offset } w := waitSet.WaitingOnAt(bof, eof) // if any of the waitlists are nil, we will continue - unless we are past the known bof and known eof (points at which we *should* have got at least partial matches), in which case we will check if any partial/potential matches are live if w == nil { // keep going if we don't have a maximum known bof, or if our current bof/eof are less than the maximum known bof/eof if b.knownBOF < 0 || int64(b.knownBOF) > bof || int64(b.knownEOF) > eof { continue } // if we don't have a waitlist, and we are past the known bof and known eof, grab all the partials and potentials to check if any are live w = all(hits) } // exhausted all contenders, we can stop scanning if !continueWaiting(w) { quit() } continue } // now cache or satisfy the strike var hasPotential bool potentials := filterKF(b.tests[in.idxa+in.idxb].keyFrames(), waitSet) for _, pot := range potentials { // if any of the signatures are single keyframe we can satisfy immediately and skip cache if len(b.keyFrames[pot[0]]) == 1 { hasPotential = true break } if hit, ok := hits[pot[0]]; ok && hit.potentiallyComplete(pot[1], strikes) { hasPotential = true break } } if !hasPotential { // cache the strike s, ok := strikes[in.idxa+in.idxb] if !ok { s = &strikeItem{in, -1, nil} strikes[in.idxa+in.idxb] = s } else { if s.successive == nil { s.successive = make([][2]int64, 0, 10) } s.successive = append(s.successive, [2]int64{in.offset, int64(in.length)}) } // range over the potentials, linking to the strike for _, pot := range potentials { if b.keyFrames[pot[0]][pot[1]].check(in.offset) { hit, ok := hits[pot[0]] if !ok { hit = newHit(pot[0]) } hit.potentialIdxs[pot[1]] = in.idxa + in.idxb + 1 } } goto end } // satisfy the strike for { ks := testStrike(in) for _, k := range ks { if match, basis := applyKeyFrame(k); match { if waitSet.Check(k.id[0]) { r <- result{k.id[0], basis} if waitSet.PutAt(k.id[0], bof, eof) { quit() goto end } } if h, ok := hits[k.id[0]]; ok { h.matched = true } } } potentials = filterKF(potentials, waitSet) var ok bool for _, pot := range potentials { in, ok = hits[pot[0]].nextPotential(strikes) if ok { break } } if !ok { break } } end: // keep looping until incoming is closed } close(r) }() return incoming }
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ // configure home and signature if not default if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } // handle -update if *update { msg, err := updateSigs() if err != nil { log.Fatalf("[FATAL] failed to update signature file, %v", err) } fmt.Println(msg) return } // handle -hash error hashT := getHash(*hashf) if *hashf != "" && hashT < 0 { log.Fatalf("[FATAL] invalid hash type; choose from %s", hashChoices) } // load and handle signature errors s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("[FATAL] error loading signature file, got: %v", err) } // handle -version if *version { version := config.Version() fmt.Printf("siegfried %d.%d.%d\n%s", version[0], version[1], version[2], s) return } // handle -fpr if *fprflag { log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } // check -multi if *multi > maxMulti || *multi < 1 || (*archive && *multi > 1) { log.Println("[WARN] -multi must be > 0 and =< 1024. If -z, -multi must be 1. Resetting -multi to 1") *multi = 1 } // start logger lg, err := newLogger(*logf) if err != nil { log.Fatalln(err) } if config.Slow() || config.Debug() { if *serve != "" || *fprflag { log.Fatalln("[FATAL] debug and slow logging cannot be run in server mode") } } // start throttle if *throttlef != 0 { throttle = time.NewTicker(*throttlef) defer throttle.Stop() } // start the printer lenCtxts := *multi if lenCtxts == 1 { lenCtxts = 8 } ctxts := make(chan *context, lenCtxts) go printer(ctxts, lg) // set default writer var w writer switch { case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) if len(s.Fields()) != 1 || len(s.Fields()[0]) != 7 { close(ctxts) log.Fatalln("[FATAL] DROID output is limited to signature files with a single PRONOM identifier") } default: w = newYAML(os.Stdout) } // overrite writer with nil writer if logging is to stdout if lg != nil && lg.w == os.Stdout { w = logWriter{} } // setup default waitgroup wg := &sync.WaitGroup{} // setup context pool setCtxPool(s, w, wg, hashT, *archive) // handle -serve if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s, ctxts) return } // handle no file/directory argument if flag.NArg() != 1 { close(ctxts) log.Fatalln("[FATAL] expecting a single file or directory argument") } w.writeHead(s, hashT) // support reading list files from stdin if flag.Arg(0) == "-" { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil { info, err = retryStat(scanner.Text(), err) } if err != nil || info.IsDir() { ctx := getCtx(scanner.Text(), "", "", 0) ctx.res <- results{fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil, nil} ctx.wg.Add(1) ctxts <- ctx } else { identifyFile(getCtx(scanner.Text(), "", info.ModTime().Format(time.RFC3339), info.Size()), ctxts, getCtx) } } } else { err = identify(ctxts, flag.Arg(0), "", *nr, getCtx) } wg.Wait() close(ctxts) w.writeTail() // log time elapsed if !lg.start.IsZero() { fmt.Fprintf(lg.w, "%s %v\n", timeString, time.Since(lg.start)) } if err != nil { log.Fatal(err) } os.Exit(0) }
// IdentifyBuffer identifies a siegreader buffer. Supply the error from Get as the second argument. func (s *Siegfried) IdentifyBuffer(buffer *siegreader.Buffer, err error, name, mime string) ([]core.Identification, error) { if err != nil && err != siegreader.ErrEmpty { return nil, fmt.Errorf("siegfried: error reading file; got %v", err) } recs := make([]core.Recorder, len(s.ids)) for i, v := range s.ids { recs[i] = v.Recorder() if name != "" { recs[i].Active(core.NameMatcher) } if mime != "" { recs[i].Active(core.MIMEMatcher) } if err == nil { recs[i].Active(core.XMLMatcher) recs[i].Active(core.TextMatcher) } } // Log name for debug/slow if config.Debug() || config.Slow() { fmt.Fprintf(config.Out(), "[FILE] %s\n", name) } // Name Matcher if len(name) > 0 && s.nm != nil { nms, _ := s.nm.Identify(name, nil) // we don't care about an error here for v := range nms { for _, rec := range recs { if rec.Record(core.NameMatcher, v) { break } } } } // MIME Matcher if len(mime) > 0 && s.mm != nil { mms, _ := s.mm.Identify(mime, nil) // we don't care about an error here for v := range mms { for _, rec := range recs { if rec.Record(core.MIMEMatcher, v) { break } } } } // Container Matcher if s.cm != nil { if config.Debug() { fmt.Fprintln(config.Out(), ">>START CONTAINER MATCHER") } cms, cerr := s.cm.Identify(name, buffer) for v := range cms { for _, rec := range recs { if rec.Record(core.ContainerMatcher, v) { break } } } if err == nil { err = cerr } } satisfied := true // XML Matcher if s.xm != nil { for _, rec := range recs { if ok, _ := rec.Satisfied(core.XMLMatcher); !ok { satisfied = false break } } if !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START XML MATCHER") } xms, xerr := s.xm.Identify("", buffer) for v := range xms { for _, rec := range recs { if rec.Record(core.XMLMatcher, v) { break } } } if err == nil { err = xerr } } } satisfied = true // RIFF Matcher if s.rm != nil { for _, rec := range recs { if ok, _ := rec.Satisfied(core.RIFFMatcher); !ok { satisfied = false break } } if !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START RIFF MATCHER") } rms, rerr := s.rm.Identify("", buffer) for v := range rms { for _, rec := range recs { if rec.Record(core.RIFFMatcher, v) { break } } } if err == nil { err = rerr } } } satisfied = true exclude := make([]int, 0, len(recs)) for _, rec := range recs { ok, ex := rec.Satisfied(core.ByteMatcher) if !ok { satisfied = false } else { exclude = append(exclude, ex) } } // Byte Matcher if s.bm != nil && !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START BYTE MATCHER") } ids, _ := s.bm.Identify("", buffer, exclude...) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.ByteMatcher, v) { break } } } } satisfied = true for _, rec := range recs { if ok, _ := rec.Satisfied(core.TextMatcher); !ok { satisfied = false break } } // Text Matcher if s.tm != nil && !satisfied { ids, _ := s.tm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.TextMatcher, v) { break } } } } if len(recs) < 2 { return recs[0].Report(), err } var res []core.Identification for idx, rec := range recs { if config.Slow() || config.Debug() { for _, id := range rec.Report() { fmt.Fprintf(config.Out(), "matched: %s\n", id.String()) } } if idx == 0 { res = rec.Report() continue } res = append(res, rec.Report()...) } return res, err }