コード例 #1
0
ファイル: parse.go プロジェクト: richardlehane/siegfried
func processSubSequence(puid string, ss mappings.SubSequence, eof, vry bool) (frames.Signature, error) {
	sig, _, _, err := process(puid, ss.Sequence, eof)
	if err != nil {
		return nil, err
	}
	if len(ss.LeftFragments) > 0 {
		sig, err = appendFragments(puid, sig, ss.LeftFragments, true, eof)
		if err != nil {
			return nil, err
		}
	}
	if len(ss.RightFragments) > 0 {
		sig, err = appendFragments(puid, sig, ss.RightFragments, false, eof)
		if err != nil {
			return nil, err
		}
	}
	if ss.Position > 1 {
		vry = true
	}
	calcOffset := func(minS, maxS string, vry bool) (int, int, error) {
		min, err := decodeNum(minS)
		if err != nil {
			return 0, 0, err
		}
		if maxS == "" {
			if vry {
				return min, -1, nil
			}
			return min, min, nil // if not var - max should be at least min (which is prob 0)
		}
		max, err := decodeNum(maxS)
		if err != nil {
			return 0, 0, err
		}
		if max == 0 { // fix bug fmt/837 where has a min but no max
			max = min
		}
		return min, max, nil
	}
	min, max, err := calcOffset(ss.SubSeqMinOffset, ss.SubSeqMaxOffset, vry)
	if err != nil {
		return nil, err
	}
	if eof {
		if ss.Position == 1 {
			sig[len(sig)-1] = frames.NewFrame(frames.EOF, sig[len(sig)-1].Pat(), min, max)
		} else {
			sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), min, max)
		}
	} else {
		if ss.Position == 1 {
			sig[0] = frames.NewFrame(frames.BOF, sig[0].Pat(), min, max)
		} else {
			sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), min, max)
		}
	}
	return sig, nil
}
コード例 #2
0
ファイル: mimeinfo.go プロジェクト: richardlehane/siegfried
func toFrames(m mappings.Match) ([]frames.Frame, error) {
	pat, min, max, err := toPattern(m)
	if err != nil || pat == nil {
		return nil, err
	}
	mask, ok := pat.(Mask)
	if !ok {
		return []frames.Frame{frames.NewFrame(frames.BOF, pat, min, max)}, nil
	}
	pats, ints := unmask(mask)
	f := []frames.Frame{frames.NewFrame(frames.BOF, pats[0], min+ints[0], max+ints[0])}
	if len(pats) > 1 {
		for i, p := range pats[1:] {
			f = append(f, frames.NewFrame(frames.PREV, p, ints[i+1], ints[i+1]))
		}
	}
	return f, nil
}
コード例 #3
0
ファイル: parse.go プロジェクト: richardlehane/siegfried
func process(puid, seq string, eof bool) (frames.Signature, int, int, error) {
	typ := frames.PREV
	if eof {
		typ = frames.SUCC
	}
	var min, max int
	l := lexPRONOM(puid, seq)
	sig := frames.Signature{}
	for i := l.nextItem(); i.typ != itemEOF; i = l.nextItem() {
		switch i.typ {
		case itemError:
			return nil, 0, 0, errors.New("parse error " + puid + ": " + i.String())
		case itemWildSingle:
			min++
			max++
		case itemWildStart:
			min, _ = decodeNum(i.val)
		case itemCurlyRight: //detect {n} wildcards by checking if the max value has been set
			if max == 0 {
				max = min
			}
		case itemWildEnd:
			if i.val == "*" {
				max = -1
			} else {
				max, _ = decodeNum(i.val)
			}
		case itemWild:
			max = -1
		case itemEnterGroup:
			pat, err := processGroup(l)
			if err != nil {
				return nil, 0, 0, errors.New("parse error " + puid + ": " + err.Error())
			}
			sig = append(sig, frames.NewFrame(typ, pat, min, max))
			min, max = 0, 0
		case itemUnprocessedText:
			sig = append(sig, frames.NewFrame(typ, patterns.Sequence(processText(i.val)), min, max))
			min, max = 0, 0
		}
	}
	return sig, min, max, nil
}
コード例 #4
0
ファイル: parse.go プロジェクト: richardlehane/siegfried
func magics(m []string) ([]frames.Signature, error) {
	hx, ascii, hxx, asciix, err := characterise(m)
	if err != nil {
		return nil, err
	}
	if len(hx) > 0 {
		sigs := make([]frames.Signature, len(hx))
		for i, v := range hx {
			byts, offs, masks, err := dehex(v, hxx[i])
			if err != nil {
				return nil, err
			}
			sigs[i] = make(frames.Signature, len(byts))
			for ii, vv := range byts {
				rel := frames.BOF
				if ii > 0 {
					rel = frames.PREV
				}
				var pat patterns.Pattern
				if masks[ii] {
					pat = patterns.Mask(vv[0])
				} else {
					pat = patterns.Sequence(vv)
				}
				sigs[i][ii] = frames.NewFrame(rel, pat, offs[ii], offs[ii])
			}
		}
		return sigs, nil
	} else if len(ascii) > 0 {
		sigs := make([]frames.Signature, len(ascii))
		for i, v := range ascii {
			pat := patterns.Sequence(v)
			sigs[i] = frames.Signature{frames.NewFrame(frames.BOF, pat, asciix[i], asciix[i])}
		}
		return sigs, nil
	}
	return nil, nil
}
コード例 #5
0
ファイル: parse.go プロジェクト: richardlehane/siegfried
// PRONOM
func processPRONOM(puid string, s mappings.Signature) (frames.Signature, error) {
	sig := make(frames.Signature, 0, 1)
	for _, bs := range s.ByteSequences {
		// check if <Offset> or <MaxOffset> elements are present
		min, err := decodeNum(bs.Offset)
		if err != nil {
			return nil, err
		}
		max, err := decodeNum(bs.MaxOffset)
		if err != nil {
			return nil, err
		}
		// lack of a max offset implies a fixed offset for BOF and EOF seqs (not VAR)
		if max == 0 {
			max = min
		} else {
			max = max + min // the max offset in a PRONOM report is relative to the "offset" value, not to the BOF/EOF
		}
		var eof bool
		if bs.Position == pronomeof {
			eof = true
		}
		// parse the hexstring
		seg, lmin, lmax, err := process(puid, bs.Hex, eof)
		if err != nil {
			return nil, err
		}
		// check position and add patterns to signature
		switch bs.Position {
		case pronombof:
			if seg[0].Min() != 0 || seg[0].Max() != 0 {
				min, max = seg[0].Min(), seg[0].Max()
			}
			seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max)
		case pronomvry:
			if max == 0 {
				max = -1
			}
			if seg[0].Min() != 0 || seg[0].Max() != 0 {
				min, max = seg[0].Min(), seg[0].Max()
			}
			if min == max {
				max = -1
			}
			seg[0] = frames.NewFrame(frames.BOF, seg[0].Pat(), min, max)
		case pronomeof:
			if len(seg) > 1 {
				for i, f := range seg[:len(seg)-1] {
					seg[i] = frames.NewFrame(frames.SUCC, f.Pat(), seg[i+1].Min(), seg[i+1].Max())
				}
			}
			// handle edge case where there is a {x-y} at end of EOF seq e.g. x-fmt/263
			if lmin != 0 || lmax != 0 {
				min, max = lmin, lmax
			}
			seg[len(seg)-1] = frames.NewFrame(frames.EOF, seg[len(seg)-1].Pat(), min, max)
		default:
			return nil, errors.New("Pronom parse error: invalid ByteSequence position " + bs.Position)
		}
		// add the segment to the complete signature
		sig = appendSig(sig, seg, bs.Position)
	}
	return sig, nil
}
コード例 #6
0
ファイル: parse.go プロジェクト: richardlehane/siegfried
// append a slice of fragments (left or right) to the central droid sequence
func appendFragments(puid string, sig frames.Signature, frags []mappings.Fragment, left, eof bool) (frames.Signature, error) {
	// First off, group the fragments:
	// droid fragments (right or left) can share positions. If such fragments have same offsets, they are a patterns.Choice. If not, then err.
	var maxPos int
	for _, f := range frags {
		if f.Position == 0 {
			return nil, errors.New("Pronom: encountered fragment without a position, puid " + puid)
		}
		if f.Position > maxPos {
			maxPos = f.Position
		}
	}
	fs := make([][]mappings.Fragment, maxPos)
	for _, f := range frags {
		fs[f.Position-1] = append(fs[f.Position-1], f)
	}
	for _, r := range fs {
		max, min := r[0].MaxOffset, r[0].MinOffset
		for _, v := range r {
			if v.MaxOffset != max || v.MinOffset != min {
				return nil, errors.New("Pronom: encountered fragments at same positions with different offsets, puid " + puid)
			}
		}
	}
	typ := frames.PREV
	if eof {
		typ = frames.SUCC
	}
	var choice patterns.Choice
	offs := make([][2]int, len(fs))
	ns := make([]frames.Signature, len(fs))
	//l := len(sig)
	// iterate over the grouped fragments
	for i, v := range fs {
		if len(v) > 1 {
			choice = patterns.Choice{}
			for _, c := range v {
				pats, _, _, err := process(puid, c.Value, eof)
				if err != nil {
					return nil, err
				}
				if len(pats) > 1 {
					list := make(patterns.List, len(pats))
					for i, v := range pats {
						list[i] = v.Pat()
					}
					choice = append(choice, list)
				} else {
					choice = append(choice, pats[0].Pat())
				}
			}
			ns[i] = frames.Signature{frames.NewFrame(typ, choice, 0, 0)}
		} else {
			pats, _, _, err := process(puid, v[0].Value, eof)
			if err != nil {
				return nil, err
			}
			ns[i] = pats
		}
		min, err := decodeNum(v[0].MinOffset)
		if err != nil {
			return nil, err
		}
		var max int
		if v[0].MaxOffset == "" {
			max = -1
		} else {
			max, err = decodeNum(v[0].MaxOffset)
			if err != nil {
				return nil, err
			}
		}
		offs[i] = [2]int{min, max}
	}
	// Now make the frames by adding in offset information (if left fragments, this needs to be taken from their neighbour)
	if left {
		if eof {
			for i, v := range ns {
				v[len(v)-1] = frames.NewFrame(frames.SUCC, v[len(v)-1].Pat(), offs[i][0], offs[i][1])
				sig = append(v, sig...)
			}
		} else {
			for i, v := range ns {
				sig[0] = frames.NewFrame(frames.PREV, sig[0].Pat(), offs[i][0], offs[i][1])
				sig = append(v, sig...)
			}
		}
	} else {
		if eof {
			for i, v := range ns {
				sig[len(sig)-1] = frames.NewFrame(frames.SUCC, sig[len(sig)-1].Pat(), offs[i][0], offs[i][1])
				sig = append(sig, v...)
			}

		} else {
			for i, v := range ns {
				v[0] = frames.NewFrame(frames.PREV, v[0].Pat(), offs[i][0], offs[i][1])
				sig = append(sig, v...)
			}
		}
	}
	return sig, nil
}