示例#1
0
文件: index.go 项目: buckhx/diglet
//prefixes assumes a \x00 delimited data with \x00 padding
func prefixes(idx *suffixarray.Index, data, q []byte) (keys [][]byte) {
	for _, i := range idx.Lookup(q, -1) {
		if data[i-1] == zero { //if previous byte is zero, it's a prefix
			var l int
			for l = i; data[l] != zero; l++ {
				// iterating until end of word
			}
			k := data[i:l]
			keys = append(keys, k)
		}
	}
	return
}
示例#2
0
func test_sa_speed(T int, // size of text
	P int, // size of pattern.
	N int, // number of occurrences of pattern in text
) (float64, float64) {

	M := int(T / P)
	fmt.Printf("Test suffix array speed: T=%v, P=%v, N=%v, M=%v\n",
		T, P, N, M)
	if M <= 0 {
		panic("M must be greater than zero")
	}

	// text:  String to be indexed and searched
	// pattern:  Substring to search for
	pattern, text := make_pattern_text(T, P, N)

	var (
		count int
		start time.Time
	)

	var text_index *suffixarray.Index
	count = 0
	for start = time.Now(); time.Since(start).Seconds() < MAX_TIME; {
		text_index = suffixarray.New(text[:])
		count++
	}
	if count < 1 {
		panic("Count index")
	}
	duration_index := time.Since(start).Seconds() / float64(count)
	// fmt.Printf(" count=%d,dt_index=%.1f,duration_index=%g\n",
	// 	count, time.Since(start).Seconds(), duration_index)

	// matches := []int{} // number of pattern matches
	n_matches := 0
	count = 0

	for start = time.Now(); time.Since(start).Seconds() < MAX_TIME; {
		offsets := text_index.Lookup(pattern, -1)
		if len(offsets) != N {
			panic(fmt.Sprintf("%d matched, expected %d, count=%d", len(offsets), N, count))
		}
		// matches = append(matches, len(offsets))

		n_matches += N
		count++
	}
	if count < 1 {
		panic("Count lookup")
	}
	duration_lookup_total := time.Since(start).Seconds()
	duration_lookup := duration_lookup_total / float64(count)
	// fmt.Printf(" count=%d,dt_lookup=%.1f, duration_lookup=%g\n",
	// 	count, duration_lookup_total, duration_lookup)

	// n_matches := sum(matches)
	// if n_matches < 1 {
	// 	panic("matches")
	// }

	duration_match := duration_lookup_total / float64(n_matches) // Duration per pattern match
	duration_char := duration_match / float64(P)

	fmt.Printf(" %7d %e %e : %e %e\n", n_matches, duration_index, duration_lookup,
		duration_match, duration_char)

	return duration_index, duration_lookup
}