//prefixes assumes a \x00 delimited data with \x00 padding func prefixes(idx *suffixarray.Index, data, q []byte) (keys [][]byte) { for _, i := range idx.Lookup(q, -1) { if data[i-1] == zero { //if previous byte is zero, it's a prefix var l int for l = i; data[l] != zero; l++ { // iterating until end of word } k := data[i:l] keys = append(keys, k) } } return }
func test_sa_speed(T int, // size of text P int, // size of pattern. N int, // number of occurrences of pattern in text ) (float64, float64) { M := int(T / P) fmt.Printf("Test suffix array speed: T=%v, P=%v, N=%v, M=%v\n", T, P, N, M) if M <= 0 { panic("M must be greater than zero") } // text: String to be indexed and searched // pattern: Substring to search for pattern, text := make_pattern_text(T, P, N) var ( count int start time.Time ) var text_index *suffixarray.Index count = 0 for start = time.Now(); time.Since(start).Seconds() < MAX_TIME; { text_index = suffixarray.New(text[:]) count++ } if count < 1 { panic("Count index") } duration_index := time.Since(start).Seconds() / float64(count) // fmt.Printf(" count=%d,dt_index=%.1f,duration_index=%g\n", // count, time.Since(start).Seconds(), duration_index) // matches := []int{} // number of pattern matches n_matches := 0 count = 0 for start = time.Now(); time.Since(start).Seconds() < MAX_TIME; { offsets := text_index.Lookup(pattern, -1) if len(offsets) != N { panic(fmt.Sprintf("%d matched, expected %d, count=%d", len(offsets), N, count)) } // matches = append(matches, len(offsets)) n_matches += N count++ } if count < 1 { panic("Count lookup") } duration_lookup_total := time.Since(start).Seconds() duration_lookup := duration_lookup_total / float64(count) // fmt.Printf(" count=%d,dt_lookup=%.1f, duration_lookup=%g\n", // count, duration_lookup_total, duration_lookup) // n_matches := sum(matches) // if n_matches < 1 { // panic("matches") // } duration_match := duration_lookup_total / float64(n_matches) // Duration per pattern match duration_char := duration_match / float64(P) fmt.Printf(" %7d %e %e : %e %e\n", n_matches, duration_index, duration_lookup, duration_match, duration_char) return duration_index, duration_lookup }