func (c *ContainerMatcher) identify(n string, rdr Reader, res chan core.Result) { // safe to call on a nil matcher (i.e. container matching switched off) if c == nil { close(res) return } id := c.newIdentifier(len(c.parts)) var err error for err = rdr.Next(); err == nil; err = rdr.Next() { ct, ok := c.nameCTest[rdr.Name()] if !ok { continue } if config.Debug() { fmt.Fprintf(config.Out(), "{Name match - %s (container %d))}\n", rdr.Name(), c.conType) } // name has matched, let's test the CTests // ct.identify will generate a slice of hits which pass to // processHits which will return true if we can stop if c.processHits(ct.identify(c, id, rdr, rdr.Name()), id, ct, rdr.Name(), res) { break } } // send a default hit if no result and extension matches if c.extension != "" && !id.result && filepath.Ext(n) == "."+c.extension { res <- defaultHit(-1 - int(c.conType)) } close(res) }
func identifyFile(ctx *context, ctxts chan *context, gf getFn) { ctx.wg.Add(1) ctxts <- ctx if *multi == 1 || ctx.z || config.Slow() || config.Debug() { readFile(ctx, ctxts, gf) return } go func() { ctx.wg.Add(1) readFile(ctx, ctxts, gf) ctx.wg.Done() }() }
func newLogger(opts string) (*logger, error) { lg := &logger{w: os.Stderr} if opts == "" { return lg, nil } for _, o := range strings.Split(opts, ",") { switch o { case "stderr": case "stdout", "out", "o": lg.w = os.Stdout case "progress", "p": lg.progress = true case "time", "t": lg.start = time.Now() case "error", "err", "e": lg.e = true case "warning", "warn", "w": lg.warn = true case "debug", "d": config.SetDebug() case "slow", "s": config.SetSlow() case "unknown", "u": lg.unknown = true case "known", "k": lg.known = true default: return nil, fmt.Errorf("unknown -log input %s; expect be comma-separated list of stdout,out,o,progress,p,error,err,e,warning,warn,w,debug,d,slow,s,unknown,u,known,k", opts) } } if config.Debug() || config.Slow() { lg.progress = false // progress reported internally config.SetOut(lg.w) } return lg, nil }
// identify function - brings a new matcher into existence func (b *Matcher) identify(buf *siegreader.Buffer, quit chan struct{}, r chan core.Result, exclude ...int) { buf.Quit = quit waitSet := b.priorities.WaitSet(exclude...) var maxBOF, maxEOF int if len(exclude) > 0 { maxBOF, maxEOF = waitSet.MaxOffsets() } else { maxBOF, maxEOF = b.maxBOF, b.maxEOF } incoming := b.scorer(buf, waitSet, quit, r) rdr := siegreader.LimitReaderFrom(buf, maxBOF) // First test BOF frameset bfchan := b.bofFrames.index(buf, false, quit) for bf := range bfchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true}) } incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true} } select { case <-quit: // the matcher has called quit for range bfchan { } // drain first close(incoming) return default: } // Do an initial check of BOF sequences b.start(true) // start bof matcher if not yet started var bchan chan wac.Result bchan = b.bAho.Index(rdr) for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) if br.Offset > 131072 && (maxBOF < 0 || maxBOF > maxEOF*5) { // del buf.Stream 2^16 65536 2^17 131072 break } } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } select { case <-quit: // the matcher has called quit for range bchan { } // drain first close(incoming) return default: } // Setup EOF tests efchan := b.eofFrames.index(buf, true, quit) b.start(false) rrdr := siegreader.LimitReverseReaderFrom(buf, maxEOF) echan := b.eAho.Index(rrdr) // if we have a maximum value on EOF do a sequential search if maxEOF >= 0 { if maxEOF != 0 { _, _ = buf.CanSeek(0, true) // force a full read to enable EOF scan to proceed for streams } for ef := range efchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } // Scan complete EOF for er := range echan { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } // send a final progress strike with the maximum EOF incoming <- progressStrike(int64(maxEOF), true) // Finally, finish BOF scan for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } close(incoming) return } // If no maximum on EOF do a parallel search for { select { case br, ok := <-bchan: if !ok { if maxBOF < 0 && maxEOF != 0 { _, _ = buf.CanSeek(0, true) // if we've a limit BOF reader, force a full read to enable EOF scan to proceed for streams } bchan = nil } else { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } case ef, ok := <-efchan: if !ok { efchan = nil } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } case er, ok := <-echan: if !ok { echan = nil } else { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } } if bchan == nil && efchan == nil && echan == nil { close(incoming) return } } }
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ // configure home and signature if not default if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } // handle -update if *update { msg, err := updateSigs() if err != nil { log.Fatalf("[FATAL] failed to update signature file, %v", err) } fmt.Println(msg) return } // handle -hash error hashT := getHash(*hashf) if *hashf != "" && hashT < 0 { log.Fatalf("[FATAL] invalid hash type; choose from %s", hashChoices) } // load and handle signature errors s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("[FATAL] error loading signature file, got: %v", err) } // handle -version if *version { version := config.Version() fmt.Printf("siegfried %d.%d.%d\n%s", version[0], version[1], version[2], s) return } // handle -fpr if *fprflag { log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } // check -multi if *multi > maxMulti || *multi < 1 || (*archive && *multi > 1) { log.Println("[WARN] -multi must be > 0 and =< 1024. If -z, -multi must be 1. Resetting -multi to 1") *multi = 1 } // start logger lg, err := newLogger(*logf) if err != nil { log.Fatalln(err) } if config.Slow() || config.Debug() { if *serve != "" || *fprflag { log.Fatalln("[FATAL] debug and slow logging cannot be run in server mode") } } // start throttle if *throttlef != 0 { throttle = time.NewTicker(*throttlef) defer throttle.Stop() } // start the printer lenCtxts := *multi if lenCtxts == 1 { lenCtxts = 8 } ctxts := make(chan *context, lenCtxts) go printer(ctxts, lg) // set default writer var w writer switch { case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) if len(s.Fields()) != 1 || len(s.Fields()[0]) != 7 { close(ctxts) log.Fatalln("[FATAL] DROID output is limited to signature files with a single PRONOM identifier") } default: w = newYAML(os.Stdout) } // overrite writer with nil writer if logging is to stdout if lg != nil && lg.w == os.Stdout { w = logWriter{} } // setup default waitgroup wg := &sync.WaitGroup{} // setup context pool setCtxPool(s, w, wg, hashT, *archive) // handle -serve if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s, ctxts) return } // handle no file/directory argument if flag.NArg() != 1 { close(ctxts) log.Fatalln("[FATAL] expecting a single file or directory argument") } w.writeHead(s, hashT) // support reading list files from stdin if flag.Arg(0) == "-" { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil { info, err = retryStat(scanner.Text(), err) } if err != nil || info.IsDir() { ctx := getCtx(scanner.Text(), "", "", 0) ctx.res <- results{fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil, nil} ctx.wg.Add(1) ctxts <- ctx } else { identifyFile(getCtx(scanner.Text(), "", info.ModTime().Format(time.RFC3339), info.Size()), ctxts, getCtx) } } } else { err = identify(ctxts, flag.Arg(0), "", *nr, getCtx) } wg.Wait() close(ctxts) w.writeTail() // log time elapsed if !lg.start.IsZero() { fmt.Fprintf(lg.w, "%s %v\n", timeString, time.Since(lg.start)) } if err != nil { log.Fatal(err) } os.Exit(0) }
// IdentifyBuffer identifies a siegreader buffer. Supply the error from Get as the second argument. func (s *Siegfried) IdentifyBuffer(buffer *siegreader.Buffer, err error, name, mime string) ([]core.Identification, error) { if err != nil && err != siegreader.ErrEmpty { return nil, fmt.Errorf("siegfried: error reading file; got %v", err) } recs := make([]core.Recorder, len(s.ids)) for i, v := range s.ids { recs[i] = v.Recorder() if name != "" { recs[i].Active(core.NameMatcher) } if mime != "" { recs[i].Active(core.MIMEMatcher) } if err == nil { recs[i].Active(core.XMLMatcher) recs[i].Active(core.TextMatcher) } } // Log name for debug/slow if config.Debug() || config.Slow() { fmt.Fprintf(config.Out(), "[FILE] %s\n", name) } // Name Matcher if len(name) > 0 && s.nm != nil { nms, _ := s.nm.Identify(name, nil) // we don't care about an error here for v := range nms { for _, rec := range recs { if rec.Record(core.NameMatcher, v) { break } } } } // MIME Matcher if len(mime) > 0 && s.mm != nil { mms, _ := s.mm.Identify(mime, nil) // we don't care about an error here for v := range mms { for _, rec := range recs { if rec.Record(core.MIMEMatcher, v) { break } } } } // Container Matcher if s.cm != nil { if config.Debug() { fmt.Fprintln(config.Out(), ">>START CONTAINER MATCHER") } cms, cerr := s.cm.Identify(name, buffer) for v := range cms { for _, rec := range recs { if rec.Record(core.ContainerMatcher, v) { break } } } if err == nil { err = cerr } } satisfied := true // XML Matcher if s.xm != nil { for _, rec := range recs { if ok, _ := rec.Satisfied(core.XMLMatcher); !ok { satisfied = false break } } if !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START XML MATCHER") } xms, xerr := s.xm.Identify("", buffer) for v := range xms { for _, rec := range recs { if rec.Record(core.XMLMatcher, v) { break } } } if err == nil { err = xerr } } } satisfied = true // RIFF Matcher if s.rm != nil { for _, rec := range recs { if ok, _ := rec.Satisfied(core.RIFFMatcher); !ok { satisfied = false break } } if !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START RIFF MATCHER") } rms, rerr := s.rm.Identify("", buffer) for v := range rms { for _, rec := range recs { if rec.Record(core.RIFFMatcher, v) { break } } } if err == nil { err = rerr } } } satisfied = true exclude := make([]int, 0, len(recs)) for _, rec := range recs { ok, ex := rec.Satisfied(core.ByteMatcher) if !ok { satisfied = false } else { exclude = append(exclude, ex) } } // Byte Matcher if s.bm != nil && !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START BYTE MATCHER") } ids, _ := s.bm.Identify("", buffer, exclude...) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.ByteMatcher, v) { break } } } } satisfied = true for _, rec := range recs { if ok, _ := rec.Satisfied(core.TextMatcher); !ok { satisfied = false break } } // Text Matcher if s.tm != nil && !satisfied { ids, _ := s.tm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.TextMatcher, v) { break } } } } if len(recs) < 2 { return recs[0].Report(), err } var res []core.Identification for idx, rec := range recs { if config.Slow() || config.Debug() { for _, id := range rec.Report() { fmt.Fprintf(config.Out(), "matched: %s\n", id.String()) } } if idx == 0 { res = rec.Report() continue } res = append(res, rec.Report()...) } return res, err }
func (m Matcher) Identify(na string, b *siegreader.Buffer, exclude ...int) (chan core.Result, error) { buf, err := b.Slice(0, 8) if err != nil || buf[0] != 'R' || buf[1] != 'I' || buf[2] != 'F' || buf[3] != 'F' { res := make(chan core.Result) close(res) return res, nil } rcc, rrdr, err := riff.NewReader(siegreader.ReaderFrom(b)) if err != nil { res := make(chan core.Result) close(res) return res, nil } // now make structures for testing uniqs := make(map[riff.FourCC]bool) res := make(chan core.Result) waitset := m.priorities.WaitSet(exclude...) // send and report if satisified send := func(cc riff.FourCC) bool { if config.Debug() { fmt.Fprintf(config.Out(), "riff match %s\n", string(cc[:])) } if uniqs[cc] { return false } uniqs[cc] = true for _, hit := range m.riffs[cc] { if waitset.Check(hit) { if config.Debug() { fmt.Fprintf(config.Out(), "sending riff match %s\n", string(cc[:])) } res <- result{hit, cc} if waitset.Put(hit) { return true } } } return false } // riff walk var descend func(*riff.Reader) bool descend = func(r *riff.Reader) bool { for { chunkID, chunkLen, chunkData, err := r.Next() if err != nil || send(chunkID) { return true } if chunkID == riff.LIST { listType, list, err := riff.NewListReader(chunkLen, chunkData) if err != nil || send(listType) { return true } if descend(list) { return true } } } } // go time go func() { if send(rcc) { close(res) return } descend(rrdr) close(res) }() return res, nil }