Пример #1
0
func processSubSequence(puid string, ss mappings.SubSequence, eof, vry bool) (frames.Signature, error) {
	sig, _, _, err := process(puid, ss.Sequence, eof)
	if err != nil {
		return nil, err
	}
	if len(ss.LeftFragments) > 0 {
		sig, err = appendFragments(puid, sig, ss.LeftFragments, true, eof)
		if err != nil {
			return nil, err
		}
	}
	if len(ss.RightFragments) > 0 {
		sig, err = appendFragments(puid, sig, ss.RightFragments, false, eof)
		if err != nil {
			return nil, err
		}
	}
	if ss.Position > 1 {
		vry = true
	}
	calcOffset := func(minS, maxS string, vry bool) (int, int, error) {
		min, err := decodeNum(minS)
		if err != nil {
			return 0, 0, err
		}
		if maxS == "" {
			if vry {
				return min, -1, nil
			}
			return min, 0, nil
		}
		max, err := decodeNum(maxS)
		if err != nil {
			return 0, 0, err
		}
		return min, max, nil
	}
	min, max, err := calcOffset(ss.SubSeqMinOffset, ss.SubSeqMaxOffset, vry)
	if err != nil {
		return nil, err
	}
	if eof {
		if ss.Position == 1 {
			sig[len(sig)-1] = frames.NewFrame(frames.EOF, sig[len(sig)-1].Pat(), min, max)
		} else {
			sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), min, max)
		}
	} else {
		if ss.Position == 1 {
			sig[0] = frames.NewFrame(frames.BOF, sig[0].Pat(), min, max)
		} else {
			sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), min, max)
		}
	}
	return sig, nil
}
Пример #2
0
func process(puid, seq string, eof bool) (frames.Signature, int, int, error) {
	typ := frames.PREV
	if eof {
		typ = frames.SUCC
	}
	var min, max int
	l := lexPRONOM(puid, seq)
	sig := frames.Signature{}
	for i := l.nextItem(); i.typ != itemEOF; i = l.nextItem() {
		switch i.typ {
		case itemError:
			return nil, 0, 0, errors.New("parse error " + puid + ": " + i.String())
		case itemWildSingle:
			min++
			max++
		case itemWildStart:
			min, _ = decodeNum(i.val)
		case itemCurlyRight: //detect {n} wildcards by checking if the max value has been set
			if max == 0 {
				max = min
			}
		case itemWildEnd:
			if i.val == "*" {
				max = -1
			} else {
				max, _ = decodeNum(i.val)
			}
		case itemWild:
			max = -1
		case itemEnterGroup:
			pat, err := processGroup(l)
			if err != nil {
				return nil, 0, 0, errors.New("parse error " + puid + ": " + err.Error())
			}
			sig = append(sig, frames.NewFrame(typ, pat, min, max))
			min, max = 0, 0
		case itemUnprocessedText:
			sig = append(sig, frames.NewFrame(typ, patterns.Sequence(processText(i.val)), min, max))
			min, max = 0, 0
		}
	}
	return sig, min, max, nil
}
Пример #3
0
// PRONOM
func processPRONOM(puid string, s mappings.Signature) (frames.Signature, error) {
	sig := make(frames.Signature, 0, 1)
	for _, bs := range s.ByteSequences {
		// check if <Offset> or <MaxOffset> elements are present
		min, err := decodeNum(bs.Offset)
		if err != nil {
			return nil, err
		}
		max, err := decodeNum(bs.MaxOffset)
		if err != nil {
			return nil, err
		}
		// lack of a max offset implies a fixed offset for BOF and EOF seqs (not VAR)
		if max == 0 {
			max = min
		} else {
			max = max + min // the max offset in a PRONOM report is relative to the "offset" value, not to the BOF/EOF
		}
		var eof bool
		if bs.Position == pronomeof {
			eof = true
		}
		// parse the hexstring
		seg, lmin, lmax, err := process(puid, bs.Hex, eof)
		if err != nil {
			return nil, err
		}
		// check position and add patterns to signature
		switch bs.Position {
		case pronombof:
			if seg[0].Min() != 0 || seg[0].Max() != 0 {
				min, max = seg[0].Min(), seg[0].Max()
			}
			seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max)
		case pronomvry:
			if max == 0 {
				max = -1
			}
			if seg[0].Min() != 0 || seg[0].Max() != 0 {
				min, max = seg[0].Min(), seg[0].Max()
			}
			if min == max {
				max = -1
			}
			seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max)
		case pronomeof:
			if len(seg) > 1 {
				for i, f := range seg[:len(seg)-1] {
					seg[i] = frames.NewFrame(frames.SUCC, f.Pat(), seg[i+1].Min(), seg[i+1].Max())
				}
			}
			// handle edge case where there is a {x-y} at end of EOF seq e.g. x-fmt/263
			if lmin != 0 || lmax != 0 {
				min, max = lmin, lmax
			}
			seg[len(seg)-1] = frames.NewFrame(frames.EOF, seg[len(seg)-1].Pat(), min, max)
		default:
			return nil, errors.New("Pronom parse error: invalid ByteSequence position " + bs.Position)
		}
		// add the segment to the complete signature
		sig = appendSig(sig, seg, bs.Position)
	}
	return sig, nil
}
Пример #4
0
// append a slice of fragments (left or right) to the central droid sequence
func appendFragments(puid string, sig frames.Signature, frags []mappings.Fragment, left, eof bool) (frames.Signature, error) {
	// First off, group the fragments:
	// droid fragments (right or left) can share positions. If such fragments have same offsets, they are a patterns.Choice. If not, then err.
	var maxPos int
	for _, f := range frags {
		if f.Position == 0 {
			return nil, errors.New("Pronom: encountered fragment without a position, puid " + puid)
		}
		if f.Position > maxPos {
			maxPos = f.Position
		}
	}
	fs := make([][]mappings.Fragment, maxPos)
	for _, f := range frags {
		fs[f.Position-1] = append(fs[f.Position-1], f)
	}
	for _, r := range fs {
		max, min := r[0].MaxOffset, r[0].MinOffset
		for _, v := range r {
			if v.MaxOffset != max || v.MinOffset != min {
				return nil, errors.New("Pronom: encountered fragments at same positions with different offsets, puid " + puid)
			}
		}
	}
	typ := frames.PREV
	if eof {
		typ = frames.SUCC
	}
	var choice patterns.Choice
	offs := make([][2]int, len(fs))
	ns := make([]frames.Signature, len(fs))
	//l := len(sig)
	// iterate over the grouped fragments
	for i, v := range fs {
		if len(v) > 1 {
			choice = patterns.Choice{}
			for _, c := range v {
				pats, _, _, err := process(puid, c.Value, eof)
				if err != nil {
					return nil, err
				}
				if len(pats) > 1 {
					list := make(patterns.List, len(pats))
					for i, v := range pats {
						list[i] = v.Pat()
					}
					choice = append(choice, list)
				} else {
					choice = append(choice, pats[0].Pat())
				}
			}
			ns[i] = frames.Signature{frames.NewFrame(typ, choice, 0, 0)}
		} else {
			pats, _, _, err := process(puid, v[0].Value, eof)
			if err != nil {
				return nil, err
			}
			ns[i] = pats
		}
		min, err := decodeNum(v[0].MinOffset)
		if err != nil {
			return nil, err
		}
		var max int
		if v[0].MaxOffset == "" {
			max = -1
		} else {
			max, err = decodeNum(v[0].MaxOffset)
			if err != nil {
				return nil, err
			}
		}
		offs[i] = [2]int{min, max}
	}
	// Now make the frames by adding in offset information (if left fragments, this needs to be taken from their neighbour)
	if left {
		if eof {
			for i, v := range ns {
				v[len(v)-1] = frames.NewFrame(frames.SUCC, v[len(v)-1].Pat(), offs[i][0], offs[i][1])
				sig = append(v, sig...)
			}
		} else {
			for i, v := range ns {
				sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), offs[i][0], offs[i][1])
				sig = append(v, sig...)
			}
		}
	} else {
		if eof {
			for i, v := range ns {
				sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), offs[i][0], offs[i][1])
				sig = append(sig, v...)
			}

		} else {
			for i, v := range ns {
				v[0] = frames.NewFrame(frames.PREV, v[0].Pat(), offs[i][0], offs[i][1])
				sig = append(sig, v...)
			}
		}
	}
	return sig, nil
}