func processSubSequence(puid string, ss mappings.SubSequence, eof, vry bool) (frames.Signature, error) { sig, _, _, err := process(puid, ss.Sequence, eof) if err != nil { return nil, err } if len(ss.LeftFragments) > 0 { sig, err = appendFragments(puid, sig, ss.LeftFragments, true, eof) if err != nil { return nil, err } } if len(ss.RightFragments) > 0 { sig, err = appendFragments(puid, sig, ss.RightFragments, false, eof) if err != nil { return nil, err } } if ss.Position > 1 { vry = true } calcOffset := func(minS, maxS string, vry bool) (int, int, error) { min, err := decodeNum(minS) if err != nil { return 0, 0, err } if maxS == "" { if vry { return min, -1, nil } return min, 0, nil } max, err := decodeNum(maxS) if err != nil { return 0, 0, err } return min, max, nil } min, max, err := calcOffset(ss.SubSeqMinOffset, ss.SubSeqMaxOffset, vry) if err != nil { return nil, err } if eof { if ss.Position == 1 { sig[len(sig)-1] = frames.NewFrame(frames.EOF, sig[len(sig)-1].Pat(), min, max) } else { sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), min, max) } } else { if ss.Position == 1 { sig[0] = frames.NewFrame(frames.BOF, sig[0].Pat(), min, max) } else { sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), min, max) } } return sig, nil }
func process(puid, seq string, eof bool) (frames.Signature, int, int, error) { typ := frames.PREV if eof { typ = frames.SUCC } var min, max int l := lexPRONOM(puid, seq) sig := frames.Signature{} for i := l.nextItem(); i.typ != itemEOF; i = l.nextItem() { switch i.typ { case itemError: return nil, 0, 0, errors.New("parse error " + puid + ": " + i.String()) case itemWildSingle: min++ max++ case itemWildStart: min, _ = decodeNum(i.val) case itemCurlyRight: //detect {n} wildcards by checking if the max value has been set if max == 0 { max = min } case itemWildEnd: if i.val == "*" { max = -1 } else { max, _ = decodeNum(i.val) } case itemWild: max = -1 case itemEnterGroup: pat, err := processGroup(l) if err != nil { return nil, 0, 0, errors.New("parse error " + puid + ": " + err.Error()) } sig = append(sig, frames.NewFrame(typ, pat, min, max)) min, max = 0, 0 case itemUnprocessedText: sig = append(sig, frames.NewFrame(typ, patterns.Sequence(processText(i.val)), min, max)) min, max = 0, 0 } } return sig, min, max, nil }
// PRONOM func processPRONOM(puid string, s mappings.Signature) (frames.Signature, error) { sig := make(frames.Signature, 0, 1) for _, bs := range s.ByteSequences { // check if <Offset> or <MaxOffset> elements are present min, err := decodeNum(bs.Offset) if err != nil { return nil, err } max, err := decodeNum(bs.MaxOffset) if err != nil { return nil, err } // lack of a max offset implies a fixed offset for BOF and EOF seqs (not VAR) if max == 0 { max = min } else { max = max + min // the max offset in a PRONOM report is relative to the "offset" value, not to the BOF/EOF } var eof bool if bs.Position == pronomeof { eof = true } // parse the hexstring seg, lmin, lmax, err := process(puid, bs.Hex, eof) if err != nil { return nil, err } // check position and add patterns to signature switch bs.Position { case pronombof: if seg[0].Min() != 0 || seg[0].Max() != 0 { min, max = seg[0].Min(), seg[0].Max() } seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max) case pronomvry: if max == 0 { max = -1 } if seg[0].Min() != 0 || seg[0].Max() != 0 { min, max = seg[0].Min(), seg[0].Max() } if min == max { max = -1 } seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max) case pronomeof: if len(seg) > 1 { for i, f := range seg[:len(seg)-1] { seg[i] = frames.NewFrame(frames.SUCC, f.Pat(), seg[i+1].Min(), seg[i+1].Max()) } } // handle edge case where there is a {x-y} at end of EOF seq e.g. x-fmt/263 if lmin != 0 || lmax != 0 { min, max = lmin, lmax } seg[len(seg)-1] = frames.NewFrame(frames.EOF, seg[len(seg)-1].Pat(), min, max) default: return nil, errors.New("Pronom parse error: invalid ByteSequence position " + bs.Position) } // add the segment to the complete signature sig = appendSig(sig, seg, bs.Position) } return sig, nil }
// append a slice of fragments (left or right) to the central droid sequence func appendFragments(puid string, sig frames.Signature, frags []mappings.Fragment, left, eof bool) (frames.Signature, error) { // First off, group the fragments: // droid fragments (right or left) can share positions. If such fragments have same offsets, they are a patterns.Choice. If not, then err. var maxPos int for _, f := range frags { if f.Position == 0 { return nil, errors.New("Pronom: encountered fragment without a position, puid " + puid) } if f.Position > maxPos { maxPos = f.Position } } fs := make([][]mappings.Fragment, maxPos) for _, f := range frags { fs[f.Position-1] = append(fs[f.Position-1], f) } for _, r := range fs { max, min := r[0].MaxOffset, r[0].MinOffset for _, v := range r { if v.MaxOffset != max || v.MinOffset != min { return nil, errors.New("Pronom: encountered fragments at same positions with different offsets, puid " + puid) } } } typ := frames.PREV if eof { typ = frames.SUCC } var choice patterns.Choice offs := make([][2]int, len(fs)) ns := make([]frames.Signature, len(fs)) //l := len(sig) // iterate over the grouped fragments for i, v := range fs { if len(v) > 1 { choice = patterns.Choice{} for _, c := range v { pats, _, _, err := process(puid, c.Value, eof) if err != nil { return nil, err } if len(pats) > 1 { list := make(patterns.List, len(pats)) for i, v := range pats { list[i] = v.Pat() } choice = append(choice, list) } else { choice = append(choice, pats[0].Pat()) } } ns[i] = frames.Signature{frames.NewFrame(typ, choice, 0, 0)} } else { pats, _, _, err := process(puid, v[0].Value, eof) if err != nil { return nil, err } ns[i] = pats } min, err := decodeNum(v[0].MinOffset) if err != nil { return nil, err } var max int if v[0].MaxOffset == "" { max = -1 } else { max, err = decodeNum(v[0].MaxOffset) if err != nil { return nil, err } } offs[i] = [2]int{min, max} } // Now make the frames by adding in offset information (if left fragments, this needs to be taken from their neighbour) if left { if eof { for i, v := range ns { v[len(v)-1] = frames.NewFrame(frames.SUCC, v[len(v)-1].Pat(), offs[i][0], offs[i][1]) sig = append(v, sig...) } } else { for i, v := range ns { sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), offs[i][0], offs[i][1]) sig = append(v, sig...) } } } else { if eof { for i, v := range ns { sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), offs[i][0], offs[i][1]) sig = append(sig, v...) } } else { for i, v := range ns { v[0] = frames.NewFrame(frames.PREV, v[0].Pat(), offs[i][0], offs[i][1]) sig = append(sig, v...) } } } return sig, nil }