Esempio n. 1
0
func (s *CJKBigramFilter) outputBigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
	if *itemsInRing == 2 {
		thisShingleRing := r.Move(-1)
		shingledBytes := make([]byte, 0)

		// do first token
		prev := thisShingleRing.Value.(*analysis.Token)
		shingledBytes = append(shingledBytes, prev.Term...)

		// do second token
		thisShingleRing = thisShingleRing.Next()
		curr := thisShingleRing.Value.(*analysis.Token)
		shingledBytes = append(shingledBytes, curr.Term...)

		token := analysis.Token{
			Type:     analysis.Double,
			Term:     shingledBytes,
			Position: pos,
			Start:    prev.Start,
			End:      curr.End,
		}
		return &token
	}
	return nil
}
Esempio n. 2
0
func (s *CJKBigramFilter) buildUnigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
	if *itemsInRing == 2 {
		thisShingleRing := r.Move(-1)
		// do first token
		prev := thisShingleRing.Value.(*analysis.Token)
		token := analysis.Token{
			Type:     analysis.Single,
			Term:     prev.Term,
			Position: pos,
			Start:    prev.Start,
			End:      prev.End,
		}
		return &token
	} else if *itemsInRing == 1 {
		// do first token
		prev := r.Value.(*analysis.Token)
		token := analysis.Token{
			Type:     analysis.Single,
			Term:     prev.Term,
			Position: pos,
			Start:    prev.Start,
			End:      prev.End,
		}
		return &token
	}
	return nil
}
Esempio n. 3
0
func current_extremes(r *ring.Ring) (int, int) {
	length := r.Len()
	if length%2 != 0 {
		fmt.Fprintln(os.Stderr, "error: Ring must be even")
		os.Exit(1)
	}
	fmt.Println(r.Value)

	return r.Value.(int), r.Move(length / 2).Value.(int)
}
Esempio n. 4
0
func (s *ShingleFilter) shingleCurrentRingState(ring *ring.Ring, itemsInRing int) analysis.TokenStream {
	rv := make(analysis.TokenStream, 0)
	for shingleN := s.min; shingleN <= s.max; shingleN++ {
		// if there are enough items in the ring
		// to produce a shingle of this size
		if itemsInRing >= shingleN {
			thisShingleRing := ring.Move(-(shingleN - 1))
			shingledBytes := make([]byte, 0)
			pos := 0
			start := -1
			end := 0
			for i := 0; i < shingleN; i++ {
				if i != 0 {
					shingledBytes = append(shingledBytes, []byte(s.tokenSeparator)...)
				}
				curr := thisShingleRing.Value.(*analysis.Token)
				if pos == 0 && curr.Position != 0 {
					pos = curr.Position
				}
				if start == -1 && curr.Start != -1 {
					start = curr.Start
				}
				if curr.End != -1 {
					end = curr.End
				}
				shingledBytes = append(shingledBytes, curr.Term...)
				thisShingleRing = thisShingleRing.Next()
			}
			token := analysis.Token{
				Type: analysis.Shingle,
				Term: shingledBytes,
			}
			if pos != 0 {
				token.Position = pos
			}
			if start != -1 {
				token.Start = start
			}
			if end != -1 {
				token.End = end
			}
			rv = append(rv, &token)
		}
	}
	return rv
}