func TestParseHex(t *testing.T) { ts, _, _, err := process("x-fmt/8", bsStub1.Hex, false) if err != nil { t.Error("Parse items: Error", err) } if len(ts) != 6 { t.Error("Parse items: Expecting 6 patterns, got", len(ts)) } tok := ts[5] if tok.Min() != 10 || tok.Max() != 10 { t.Error("Parse items: Expecting 10,10, got", tok.Min(), tok.Max()) } tok = ts[3] if tok.Min() != 2 || tok.Max() != 2 { t.Error("Parse items: Expecting 2,2, got", tok.Min(), tok.Max()) } if !tok.Pat().Equals(Range{[]byte{0}, []byte{3}}) { t.Error("Parse items: Expecting [00:03], got", tok.Pat()) } ts, _, _, _ = process("fmt/390", bsStub5.Hex, false) tok = ts[12] if tok.Min() != 5 || tok.Max() != -1 { t.Error("Parse items: Expecting 5-0, got", tok.Min(), tok.Max()) } if !tok.Pat().Equals(patterns.Sequence(processText("7E41"))) { t.Error("Parse items: Expecting 7E41, got", tok.Pat()) } ts, _, _, _ = process("x-fmt/317", bsStub6.Hex, false) seqs := ts[2].Pat().Sequences() if !seqs[0].Equals(patterns.Sequence(processText("0D0A"))) { t.Error("Parse items: Expecting [13 10], got", []byte(seqs[0])) } }
func TestMask(t *testing.T) { apple := Mask{ pat: patterns.Sequence{'a', 'p', 'p', 0, 0, 'l', 'e'}, val: []byte{255, 255, 255, 0, 0, 255, 255}, } apple2 := Mask{ pat: patterns.Sequence{'a', 'p', 'p', 0, 0, 'l', 'e'}, val: []byte{255, 255, 255, 0, 0, 255, 255}, } if !apple.Equals(apple2) { t.Error("Mask fail: Equality") } if r, _ := apple.Test([]byte("apPyzle")); r { t.Error("Mask fail: shouldn't match") } if r, l := apple.Test([]byte("appyzle")); !r || l != 7 { t.Error("Mask fail: should match") } if r, l := apple.TestR([]byte("appyzle")); !r || l != 7 { t.Error("Mask fail: should match reverse") } saver := persist.NewLoadSaver(nil) apple.Save(saver) loader := persist.NewLoadSaver(saver.Bytes()) _ = loader.LoadByte() p := loadMask(loader) if !p.Equals(apple) { t.Errorf("expecting %s, got %s", apple, p) } seqsTest := Mask{ pat: patterns.Sequence("ap"), val: []byte{0xFF, 0xFE}, } if seqs := seqsTest.Sequences(); len(seqs) != 2 || seqs[1][1] != 'q' { t.Error(seqs) } pats, ints := unmask(apple) if len(ints) != 2 || ints[0] != 0 || ints[1] != 2 { t.Errorf("Unmask fail, got ints %v", ints) } if len(pats) != 2 || !pats[0].Equals(patterns.Sequence("app")) || !pats[1].Equals(patterns.Sequence("le")) { t.Errorf("Unmask fail, got pats %v", pats) } pats, ints = unmask(Mask{ pat: patterns.Sequence{'A', 'C', '0', '0', '0', '0'}, val: []byte{0xFF, 0xFF, 0xF0, 0xF0, 0xF0, 0xF0}, }) if len(ints) != 2 || ints[0] != 0 || ints[1] != 0 { t.Errorf("Unmask fail, got ints %v", ints) } if len(pats) != 2 || !pats[0].Equals(patterns.Sequence("AC")) || !pats[1].Equals(Mask{ pat: patterns.Sequence{'0', '0', '0', '0'}, val: []byte{0xF0, 0xF0, 0xF0, 0xF0}, }) { t.Errorf("Unmask fail, got pats %v", pats) } }
func TestProcessGroup(t *testing.T) { // try PRONOM form l := lexPRONOM("test", "(FF|10[!00:10])") <-l.items // discard group entry pat, err := processGroup(l) if err != nil { t.Fatal(err) } expect := patterns.Choice{ patterns.Sequence([]byte{255}), patterns.List{ patterns.Sequence([]byte{16}), patterns.Not{Range{[]byte{0}, []byte{16}}}, }, } if !pat.Equals(expect) { t.Errorf("expecting %v, got %v", expect, pat) } // try container form l = lexPRONOM("test2", "[10 'cats']") <-l.items pat, err = processGroup(l) if err != nil { t.Fatal(err) } expect = patterns.Choice{ patterns.Sequence([]byte{16}), patterns.Sequence([]byte("cats")), } if !pat.Equals(expect) { t.Errorf("expecting %v, got %v", expect, pat) } // try simple l = lexPRONOM("test3", "[00:10]") <-l.items pat, err = processGroup(l) if err != nil { t.Fatal(err) } rng := Range{[]byte{0}, []byte{16}} if !pat.Equals(rng) { t.Errorf("expecting %v, got %v", expect, rng) } }
func magics(m []string) ([]frames.Signature, error) { hx, ascii, hxx, asciix, err := characterise(m) if err != nil { return nil, err } if len(hx) > 0 { sigs := make([]frames.Signature, len(hx)) for i, v := range hx { byts, offs, masks, err := dehex(v, hxx[i]) if err != nil { return nil, err } sigs[i] = make(frames.Signature, len(byts)) for ii, vv := range byts { rel := frames.BOF if ii > 0 { rel = frames.PREV } var pat patterns.Pattern if masks[ii] { pat = patterns.Mask(vv[0]) } else { pat = patterns.Sequence(vv) } sigs[i][ii] = frames.NewFrame(rel, pat, offs[ii], offs[ii]) } } return sigs, nil } else if len(ascii) > 0 { sigs := make([]frames.Signature, len(ascii)) for i, v := range ascii { pat := patterns.Sequence(v) sigs[i] = frames.Signature{frames.NewFrame(frames.BOF, pat, asciix[i], asciix[i])} } return sigs, nil } return nil, nil }
func process(puid, seq string, eof bool) (frames.Signature, int, int, error) { typ := frames.PREV if eof { typ = frames.SUCC } var min, max int l := lexPRONOM(puid, seq) sig := frames.Signature{} for i := l.nextItem(); i.typ != itemEOF; i = l.nextItem() { switch i.typ { case itemError: return nil, 0, 0, errors.New("parse error " + puid + ": " + i.String()) case itemWildSingle: min++ max++ case itemWildStart: min, _ = decodeNum(i.val) case itemCurlyRight: //detect {n} wildcards by checking if the max value has been set if max == 0 { max = min } case itemWildEnd: if i.val == "*" { max = -1 } else { max, _ = decodeNum(i.val) } case itemWild: max = -1 case itemEnterGroup: pat, err := processGroup(l) if err != nil { return nil, 0, 0, errors.New("parse error " + puid + ": " + err.Error()) } sig = append(sig, frames.NewFrame(typ, pat, min, max)) min, max = 0, 0 case itemUnprocessedText: sig = append(sig, frames.NewFrame(typ, patterns.Sequence(processText(i.val)), min, max)) min, max = 0, 0 } } return sig, min, max, nil }
func toPattern(m mappings.Match) (patterns.Pattern, int, int, error) { min, max, err := toOffset(m.Offset) if err != nil { return nil, min, max, err } var pat patterns.Pattern switch m.Typ { case "byte": i, err := strconv.ParseInt(m.Value, 0, 16) if err != nil { return nil, min, max, err } pat = Int8(i) case "big16": i, err := strconv.ParseInt(m.Value, 0, 32) if err != nil { return nil, min, max, err } pat = Big16(i) case "little16": i, err := strconv.ParseInt(m.Value, 0, 32) if err != nil { return nil, min, max, err } pat = Little16(i) case "host16": i, err := strconv.ParseInt(m.Value, 0, 32) if err != nil { return nil, min, max, err } pat = Host16(i) case "big32": i, err := strconv.ParseInt(m.Value, 0, 64) if err != nil { return nil, min, max, err } pat = Big32(i) case "little32": i, err := strconv.ParseInt(m.Value, 0, 64) if err != nil { return nil, min, max, err } pat = Little32(i) case "host32": i, err := strconv.ParseInt(m.Value, 0, 64) if err != nil { return nil, min, max, err } pat = Host32(i) case "string", "": // if no type given, assume string pat = patterns.Sequence(unquote(m.Value)) case "stringignorecase": pat = IgnoreCase(unquote(m.Value)) case "unicodeLE": uints := utf16.Encode([]rune(string(unquote(m.Value)))) buf := make([]byte, len(uints)*2) for i, u := range uints { binary.LittleEndian.PutUint16(buf[i*2:], u) } pat = patterns.Sequence(buf) case "regex": return nil, min, max, nil // ignore regex magic default: return nil, min, max, errors.New("unknown magic type: " + m.Typ + " val: " + m.Value) } if len(m.Mask) > 0 { pat = Mask{pat, unquote(m.Mask)} } return pat, min, max, err }
// groups are chunks of PRONOM/Droid patterns delimited by parentheses or brackets // these chunks represent any non-sequence pattern (choices, ranges, bitmasks, not-patterns etc.) func processGroup(l *lexer) (patterns.Pattern, error) { var ( list patterns.List // bucket to stuff patterns into choice patterns.Choice // bucket to stuff choices into val []byte // bucket to stuff text values not, mask, anyMask, rng bool // retains state from previous tokens ) // when commit a pattern (to the list), go back to zero state reset := func() { val = []byte{} not, mask, anyMask, rng = false, false, false, false } // make a pattern based on the current state makePat := func() patterns.Pattern { if len(val) == 0 { return nil } var pat patterns.Pattern switch { case mask: pat = patterns.Mask(val[0]) case anyMask: pat = patterns.AnyMask(val[0]) default: pat = patterns.Sequence(val) } if not { pat = patterns.Not{pat} } reset() return pat } // add patterns to the choice addChoice := func() (patterns.Choice, error) { switch len(list) { case 0: return nil, errors.New(l.name + " has choice marker without preceding pattern") case 1: choice = append(choice, list[0]) default: choice = append(choice, list) } list = patterns.List{} return choice, nil } for { i := <-l.items switch i.typ { default: return nil, errors.New(l.name + " encountered unexpected token " + i.val) case itemEnterGroup: // recurse e.g. for a range nested within a choice if pat := makePat(); pat != nil { list = append(list, pat) } pat, err := processGroup(l) if err != nil { return nil, err } list = append(list, pat) case itemExitGroup: if pat := makePat(); pat != nil { list = append(list, pat) } if len(choice) > 0 { return addChoice() } else { switch len(list) { case 0: return nil, errors.New(l.name + " has group with no legal pattern") case 1: return list[0], nil default: return list, nil } } case itemRangeMarker: rng = true case itemChoiceMarker: if pat := makePat(); pat != nil { list = append(list, pat) } _, err := addChoice() if err != nil { return nil, err } case itemNotMarker: not = true case itemMaskMarker: mask = true case itemAnyMaskMarker: anyMask = true case itemUnprocessedText: v := processText(i.val) // if it is a range, we need values before and after the range marker, so add it here if rng { r := Range{val, v} if not { list = append(list, patterns.Not{r}) } else { list = append(list, r) } reset() } else { val = v } } } }
Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, Window{PREV, 46, 1439, patterns.Sequence{255, 254}}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence{16}, patterns.Sequence{17}, patterns.Sequence{18}, patterns.Sequence{19}, patterns.Sequence{20}}}, }, 418: { Fixed{BOF, 0, patterns.Sequence("%!PS-Adobe-2.0")}, Window{PREV, 16, 512, patterns.Sequence("%%DocumentNeededResources:")}, Window{PREV, 1, 512, patterns.Sequence("%%+ procset Adobe_Illustrator")}, Fixed{PREV, 0, patterns.Choice{patterns.Sequence("_AI3"), patterns.Sequence("A_AI3")}}, }, 363: { Window{BOF, 0, 320, patterns.Sequence("@@@@@@@@@@@@@@@@@@@@@@")}, Fixed{BOF, 3200, patterns.Sequence{0, 0}}, Fixed{PREV, 15, patterns.Not{patterns.Sequence{0}}}, Fixed{PREV, 3, patterns.Not{patterns.Sequence{0}}}, Fixed{PREV, 2, patterns.Choice{ patterns.Sequence{1, 0}, patterns.List{ patterns.Sequence{0}, patterns.Sequence{8}, // Actual signature has range here },