Golang New Beispiele, index/suffixarray.New Golang Beispiele

Beispiel #1

0

Datei anzeigen

Datei: gss.go Projekt: thinkination/genomic-spiral-sieve

// Searches the given Nucleotide-Sequence file-path for the given 'searchString' using sophisticated compression
func search(fileName string, sequenceToSearchFor string) (bool, []int) {
	var isPresent bool = false
	var master []byte
	var offsets []int

	lines, err := readLines(fileName)
	if err != nil {
		log.Fatalf("readLines: %s", err)
	} else {
		for _, line := range lines {
			master = *(compress(&line))
		}
	}

	// dat, _ := ioutil.ReadFile("./encoded.txt")
	index := suffixarray.New(master)

	searchString := sequenceToSearchFor
	searchBytes := *(compress(&searchString))

	//https://code.google.com/p/go/source/browse/src/pkg/index/suffixarray/suffixarray.go?name=release#190
	//Gets exactly the first match alone

	// offsets = index.Lookup([]byte(searchBytes), -1)

	//https://code.google.com/p/go/source/browse/src/pkg/index/suffixarray/suffixarray.go?name=release#174
	//Gets all matches
	offsets = index.Lookup([]byte(searchBytes), 1)

	if len(offsets) == 1 {
		isPresent = true
	}
	return isPresent, offsets
}

Beispiel #2

0

Datei anzeigen

Datei: suffixArrayExample.go Projekt: nrshrivatsan/meg

func main() {
	//A set of words delimited by space
	words := "a apple sphere atom atmosphere"

	//A suffix array created in golang by converting the given string into bytes
	index := suffixarray.New([]byte(words))

	//Lookup Time complexity =  O(log(N)*len(s) + len(result))

	// N : the size of the indexed data
	// s : substring to be seached for
	// result : array containing integers which represent the index of the given substring 's' in the suffix array

	//NOTE
	// Let's take the following example
	// var s string = "apple"
	// fmt.Println([]byte(s)) would print  = [97 112 112 108 101].
	// Golang Suffix array uses the byte representation of the sub-string "s" in order to perform most optimal comutation

	offsets1 := index.Lookup([]byte("sphere"), -1) // the list of all indices where s occurs in data

	//Prints unsorted array of integers which are the indices of the given substring
	fmt.Println(offsets1)

}

Beispiel #3

0

Datei anzeigen

Datei: main.go Projekt: kyokomi-sandbox/sandbox

func _indexSuffixArray() string {
	docs := []string{
		"mercury", "venus", "earth", "mars",
		"jupiter", "saturn", "uranus", "pluto",
	}

	var data []byte
	var offsets []int

	for _, d := range docs {
		data = append(data, []byte(d)...)
		offsets = append(offsets, len(data))
	}
	sfx := suffixarray.New(data)

	query := "earth"

	idxs := sfx.Lookup([]byte(query), -1)
	var results []int
	for _, idx := range idxs {
		i := sort.Search(len(offsets), func(i int) bool { return offsets[i] > idx })
		if idx+len(query) <= offsets[i] {
			results = append(results, i)
		}
	}

	return fmt.Sprintf("%q is in documents %v\n", query, results)
}

Beispiel #4

0

Datei anzeigen

Datei: utils.go Projekt: paulhammond/docker

func NewTruncIndex() *TruncIndex {
	return &TruncIndex{
		index: suffixarray.New([]byte{' '}),
		ids:   make(map[string]bool),
		bytes: []byte{' '},
	}
}

Beispiel #5

0

Datei anzeigen

Datei: stw.go Projekt: isaiah/go_scheduler_talk

func main() {
	const N = 6e5
	const M = 100
	const I = 20
	const J = 10
	const P = 2
	data := make([]byte, N)
	for i := 0; i < N; i++ {
		data[i] = byte(rand.Intn(255))
	}
	done := make(chan bool, P)
	for p := 0; p < P; p++ {
		go func() {
			for i := 0; i < I; i++ {
				suffix := suffixarray.New(data)
				for j := 0; j < J; j++ {
					str := make([]byte, M)
					for m := 0; m < M; m++ {
						str[m] = byte(rand.Intn(255))
					}
					_ = suffix.Lookup(str, 10)
				}
			}
			done <- true
		}()
	}
	for p := 0; p < P; p++ {
		<-done
	}
}

Beispiel #6

0

Datei anzeigen

Datei: utils.go Projekt: idvoretskyi/coreos-kubernetes

func (idx *TruncIndex) Add(id string) error {
	idx.Lock()
	defer idx.Unlock()
	if err := idx.addId(id); err != nil {
		return err
	}
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Beispiel #7

0

Datei anzeigen

Datei: words_benchmark_test.go Projekt: eliben/code-for-blog

func BenchmarkBuildSuffixArray(b *testing.B) {
	words := getDictWords()

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
		_ = suffixarray.New(data)
	}
}

Beispiel #8

0

Datei anzeigen

Datei: waf.go Projekt: Zigazou/nataraja

func (waf *WAF) GoSufArray_UserAgentIsClean(UA []byte) bool {
	index := suffixarray.New(UA)
	for _, robot := range waf.bad_robots {
		if len(index.Lookup(robot, 1)) > 0 {
			return false
		}
	}

	return true
}

Beispiel #9

0

Datei anzeigen

Datei: rest.go Projekt: hobbeswalsh/cardgame

func check_url(substr, str string) bool {
	var dst []byte
	substr_bytes := strconv.AppendQuoteToASCII(dst, substr)
	str_bytes := strconv.AppendQuoteToASCII(dst, str)
	index := suffixarray.New(str_bytes)
	offsets := index.Lookup(substr_bytes, -1)
	if offsets == nil {
		return false
	}
	return offsets[0] == 0
}

Beispiel #10

0

Datei anzeigen

Datei: strings.go Projekt: funkygao/dlogmon

func benchFast(line string, substr string) time.Duration {
	index := fs.New([]byte(line))
	start := time.Now()
	for i := 0; i < LOOPS; i++ {
		index.Lookup([]byte(substr), 1)
	}
	end := time.Now()
	delta := end.Sub(start)
	fmt.Printf("%10s: %20s\t%16s %10s\n", "fast", substr, delta, delta/LOOPS)
	return delta
}

Beispiel #11

0

Datei anzeigen

Datei: utils.go Projekt: ChaosCloud/docker

func NewTruncIndex(ids []string) (idx *TruncIndex) {
	idx = &TruncIndex{
		ids:   make(map[string]bool),
		bytes: []byte{' '},
	}
	for _, id := range ids {
		idx.ids[id] = true
		idx.bytes = append(idx.bytes, []byte(id+" ")...)
	}
	idx.index = suffixarray.New(idx.bytes)
	return
}

Beispiel #12

0

Datei anzeigen

Datei: main.go Projekt: jstanley0/stripe-ctf-3

func (s *Searcher) indexFile(path string, info os.FileInfo, err error) error {
	// only index 1/4 of the files per server
	if int(path[len(path)-1])%4 != s.id {
		return nil
	}
	if info.Mode().IsRegular() && info.Size() < (1<<20) {
		name := strings.TrimPrefix(path, s.base_path)
		data, _ := ioutil.ReadFile(path)
		s.files[name] = suffixarray.New(data)
	}
	return nil
}

Beispiel #13

0

Datei anzeigen

Datei: utils.go Projekt: paulhammond/docker

func (idx *TruncIndex) Add(id string) error {
	if strings.Contains(id, " ") {
		return fmt.Errorf("Illegal character: ' '")
	}
	if _, exists := idx.ids[id]; exists {
		return fmt.Errorf("Id already exists: %s", id)
	}
	idx.ids[id] = true
	idx.bytes = append(idx.bytes, []byte(id+" ")...)
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Beispiel #14

0

Datei anzeigen

Datei: words_benchmark_test.go Projekt: eliben/code-for-blog

func BenchmarkLookupXX(b *testing.B) {
	words := getDictWords()
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		indices := sa.Lookup([]byte(XXwordToTry), 1)
		if len(indices) > 0 {
			_ = getStringFromIndex(data, indices[0])
		}
	}
}

Beispiel #15

0

Datei anzeigen

Datei: utils.go Projekt: paulhammond/docker

func (idx *TruncIndex) Delete(id string) error {
	if _, exists := idx.ids[id]; !exists {
		return fmt.Errorf("No such id: %s", id)
	}
	before, after, err := idx.lookup(id)
	if err != nil {
		return err
	}
	delete(idx.ids, id)
	idx.bytes = append(idx.bytes[:before], idx.bytes[after:]...)
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Beispiel #16

0

Datei anzeigen

Datei: manual_index.go Projekt: nullstyle/mcdev

// Index builds a new suffixarray for the package names previously registered
// with this instance.
func (idx *ManualIndex) Index() error {
	var buf bytes.Buffer

	for pkg := range idx.packages {
		_, err := fmt.Fprintf(&buf, "\x00%s", pkg)
		if err != nil {
			return err
		}
	}

	idx.index = suffixarray.New(buf.Bytes())
	return nil
}

Beispiel #17

0

Datei anzeigen

Datei: index.go Projekt: buckhx/diglet

func (idx *SuffixIndex) sort() {
	if idx.index == nil {
		keys := make([][]byte, len(idx.tiles))
		i := 0
		for k := range idx.tiles {
			keys[i] = []byte(k)
			i++
		}
		d := []byte{zero}
		b := bytes.Join(keys, d)                    //join w/ zeros
		idx.indexed = bytes.Join([][]byte{d, d}, b) //pad w/ zeros
		idx.index = suffixarray.New(idx.indexed)
	}
}

Beispiel #18

0

Datei anzeigen

Datei: words_benchmark_test.go Projekt: eliben/code-for-blog

func TestSuffixArrayFind(t *testing.T) {
	words := getDictWords()
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	buf := &bytes.Buffer{}
	sa.Write(buf)
	fmt.Println("size:", buf.Len())

	indices := sa.Lookup([]byte("yrate"), 1)
	if indices == nil || len(indices) < 1 {
		t.Fatal("not found")
	}
}

Beispiel #19

0

Datei anzeigen

func test(data, what []byte) {
	s := suffixarray.New(data)
	idx0 := s.Lookup(what, -1)
	idx1 := simple(data, what)
	if len(idx0) != len(idx1) {
		panic(fmt.Sprintf("len mismatch: %+v, %+v", idx0, idx1))
	}
	sort.Ints(idx0)
	for i, x := range idx0 {
		if x != idx1[i] {
			panic(fmt.Sprintf("data mismatch: %+v, %+v", idx0, idx1))
		}
	}
}

Beispiel #20

0

Datei anzeigen

Datei: dogberry.go Projekt: NovemberFoxtrot/dogberry

func main() {
	sometext, err := ioutil.ReadFile(os.Args[1])

	if err != nil {
		log.Println(err)
	}

	index := suffixarray.New(sometext)

	offsets := index.Lookup([]byte("*"), -1)

	for _, value := range offsets[0:100] {
		log.Println(value, string(sometext[value]))
	}
}

Beispiel #21

0

Datei anzeigen

Datei: fuzzy.go Projekt: sparrc/fuzzy

// Takes the known dictionary listing and creates a suffix array
// model for these terms. If a model already existed, it is discarded
func (model *Model) updateSuffixArr() {
	if !model.UseAutocomplete {
		return
	}
	model.RLock()
	termArr := make([]string, 0, 1000)
	for term, count := range model.Data {
		if count.Corpus > model.Threshold || count.Query > 0 { // TODO: query threshold?
			termArr = append(termArr, term)
		}
	}
	model.SuffixArrConcat = "\x00" + strings.Join(termArr, "\x00") + "\x00"
	model.SuffixArr = suffixarray.New([]byte(model.SuffixArrConcat))
	model.SuffDivergence = 0
	model.RUnlock()
}

Beispiel #22

0

Datei anzeigen

Datei: ind.go Projekt: hyndio/hyd.me

// golang.org网站的全文搜索是基于suffix array实现的【http://t.cn/hBJekg】，
// 可能觉得效果不错，就把suffix array添加到golang的标准库里面了。【http://t.cn/hBJekd】
// http://blog.csdn.net/fxsjy/article/details/6297523
func main() {
	fmt.Println("Hello, 世界")
	str := `The Go programming language is an open source project to make programmers more productive. 
	Go is expressive, concise, clean, and efficient. 
	Its concurrency mechanisms make it easy to write programs that get the most out of multicore 
	and networked machines, hyd, while its novel type system enables flexible 
	and modular program construction. 
	Go compiles quickly to machine code yet has the convenience of garbage collection 
	and the power of run-time reflection. It's a fast, statically typed, 
	compiled language that feels like a dynamically typed, interpreted language.`

	index := suffixarray.New([]byte(str))
	offsets1 := index.Lookup([]byte("hyd"), -1)
	for _, i := range offsets1 {
		fmt.Println(str[i:])
	}

}

Beispiel #23

0

Datei anzeigen

Datei: hello.go Projekt: peterwilliams97/go-work

func test_sa() {

	data := []byte("i am a test i am a test i am a test i am a test i am a test")
	s := data[2:4]

	// create index for some data
	index := suffixarray.New(data)

	// lookup byte slice s
	offsets1 := index.Lookup(s, -1) // the list of all indices where s occurs in data
	offsets2 := index.Lookup(s, 3)  // the list of at most 3 indices where s occurs in data
	fmt.Println("test_sa")
	fmt.Println(string(s))
	fmt.Println(offsets1)
	fmt.Println(offsets2)
	for _, i := range offsets1 {
		m := data[i : i+2]
		fmt.Println(string(m))
	}
}

Beispiel #24

0

Datei anzeigen

Datei: next.go Projekt: nise-nabe/misc-pages

func main() {
	str, _ := ioutil.ReadFile("large.in")
	next := func(str string) func() string {
		reg, _ := regexp.Compile("\\S+")
		is := suffixarray.New([]byte(str)).FindAllIndex(reg, -1)
		return func() (result string) {
			if len(is) < 1 {
				return ""
			}
			result = str[is[0][0]:is[0][1]]
			is = is[1:]
			return
		}
	}(string(str))
	t := time.Now().UnixNano()
	for i := 0; i < 100000000; i++ {
		next()
	}
	log.Println(time.Now().UnixNano() - t)
}

Beispiel #25

0

Datei anzeigen

Datei: next.go Projekt: nise-nabe/misc-pages

func main() {
	str := `1234 2 3333 4 5  aaaaa
s
aaa
`
	next := func(str string) func() string {
		reg, _ := regexp.Compile("\\S+")
		is := suffixarray.New([]byte(str)).FindAllIndex(reg, -1)
		return func() (result string) {
			if len(is) < 1 {
				return ""
			}
			result = str[is[0][0]:is[0][1]]
			is = is[1:]
			return
		}
	}(str)
	for x := next(); x != ""; x = next() {
		log.Println(x)
	}
}

Beispiel #26

0

Datei anzeigen

Datei: convert.go Projekt: ReanGD/go-web-search

// Convert ...
func Convert(path string) error {
	file, err := os.Open(path)
	if err != nil {
		return err
	}

	defer file.Close()

	d := createDictReader(createTokenReader(file))

	var buffer bytes.Buffer
	_, _ = buffer.WriteRune('@')
	for !d.isDone() {
		g, err := d.nextGroup()
		if err != nil {
			return err
		}
		for _, w := range g.words {
			_, _ = buffer.WriteString(w.name)
			_, _ = buffer.WriteRune('@')
		}
	}
	sa := suffixarray.New(buffer.Bytes()[:])

	flags := os.O_CREATE | os.O_WRONLY
	dictFile, err := os.OpenFile("morph.dict", flags, 0666)
	if err != nil {
		return err
	}

	defer dictFile.Close()

	err = sa.Write(dictFile)
	if err != nil {
		return err
	}

	return nil

}

Beispiel #27

0

Datei anzeigen

Datei: using_suffixarray.go Projekt: eliben/code-for-blog

func main() {
	words := []string{
		"banana",
		"apple",
		"pear",
		"tangerine",
		"orange",
		"lemon",
		"peach",
		"persimmon",
	}

	// Combine all words into a single byte slice, separated by \x00 bytes (which
	// do not appear in words), adding one on each end too.
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	indices := sa.Lookup([]byte("an"), -1)
	if len(indices) > 0 {
		fmt.Println("Lookup returns:", indices)
	} else {
		fmt.Println("Lookup: not found")
	}

	// Reconstruct matches from indices found by Lookup.
	for _, idx := range indices {
		fmt.Println(getStringFromIndex(data, idx))
	}

	// Here using a completely "literal" regexp, similar to the usage of Lookup,
	// to compare what the two methods return. FindAllIndex can take an arbitrary
	// regexp - but beware of the caveat discussed in the blog post.
	r := regexp.MustCompile("an")
	matches := sa.FindAllIndex(r, -1)
	fmt.Println("FindAllIndex returns:", matches)
}

Beispiel #28

0

Datei anzeigen

Datei: index.go Projekt: gnanderson/go

// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
	var x Indexer
	th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time

	// initialize Indexer
	// (use some reasonably sized maps to start)
	x.fset = token.NewFileSet()
	x.packages = make(map[string]*Pak, 256)
	x.words = make(map[string]*IndexResult, 8192)

	// index all files in the directories given by dirnames
	for dirname := range dirnames {
		list, err := fs.ReadDir(dirname)
		if err != nil {
			continue // ignore this directory
		}
		for _, f := range list {
			if !f.IsDir() {
				x.visitFile(dirname, f, fulltextIndex)
			}
			th.Throttle()
		}
	}

	if !fulltextIndex {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(h.Decls)
		others := reduce(h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist = append(wlist, &wordPair{canonical(w), w})
		th.Throttle()
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < len(alist); i++ {
		a := alist[i].(*AltWords)
		alts[a.Canon] = a
	}

	// create text index
	var suffixes *suffixarray.Index
	if fulltextIndex {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
}

Beispiel #29

0

Datei anzeigen

Datei: index.go Projekt: Bosh-for-Cpi/bosh-2605

// NewIndex creates a new index for the .go files provided by the corpus.
func (c *Corpus) NewIndex() *Index {
	// initialize Indexer
	// (use some reasonably sized maps to start)
	x := &Indexer{
		c:           c,
		fset:        token.NewFileSet(),
		fsOpenGate:  make(chan bool, maxOpenFiles),
		strings:     make(map[string]string),
		packages:    make(map[Pak]*Pak, 256),
		words:       make(map[string]*IndexResult, 8192),
		throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
		importCount: make(map[string]int),
		packagePath: make(map[string]map[string]bool),
		exports:     make(map[string]map[string]SpotKind),
		idents:      make(map[SpotKind]map[string][]Ident, 4),
	}

	// index all files in the directories given by dirnames
	var wg sync.WaitGroup // outstanding ReadDir + visitFile
	dirGate := make(chan bool, maxOpenDirs)
	for dirname := range c.fsDirnames() {
		if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
			continue
		}
		dirGate <- true
		wg.Add(1)
		go func(dirname string) {
			defer func() { <-dirGate }()
			defer wg.Done()

			list, err := c.fs.ReadDir(dirname)
			if err != nil {
				log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
				return // ignore this directory
			}
			for _, fi := range list {
				wg.Add(1)
				go func(fi os.FileInfo) {
					defer wg.Done()
					x.visitFile(dirname, fi)
				}(fi)
			}
		}(dirname)
	}
	wg.Wait()

	if !c.IndexFullText {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(h.Decls)
		others := reduce(h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist = append(wlist, &wordPair{canonical(w), w})
		x.throttle.Throttle()
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < len(alist); i++ {
		a := alist[i].(*AltWords)
		alts[a.Canon] = a
	}

	// create text index
	var suffixes *suffixarray.Index
	if c.IndexFullText {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	for _, idMap := range x.idents {
		for _, ir := range idMap {
			sort.Sort(byPackage(ir))
		}
	}

	return &Index{
		fset:        x.fset,
		suffixes:    suffixes,
		words:       words,
		alts:        alts,
		snippets:    x.snippets,
		stats:       x.stats,
		importCount: x.importCount,
		packagePath: x.packagePath,
		exports:     x.exports,
		idents:      x.idents,
		opts: indexOptions{
			Docs:       x.c.IndexDocs,
			GoCode:     x.c.IndexGoCode,
			FullText:   x.c.IndexFullText,
			MaxResults: x.c.MaxResults,
		},
	}
}

Beispiel #30

0

Datei anzeigen

Datei: index.go Projekt: go-nosql/golang

// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
	var x Indexer

	// initialize Indexer
	x.fset = token.NewFileSet()
	x.words = make(map[string]*IndexResult)

	// index all files in the directories given by dirnames
	for dirname := range dirnames {
		list, err := ioutil.ReadDir(dirname)
		if err != nil {
			continue // ignore this directory
		}
		for _, f := range list {
			if !f.IsDirectory() {
				x.visitFile(dirname, f, fulltextIndex)
			}
		}
	}

	if !fulltextIndex {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(&h.Decls)
		others := reduce(&h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist.Push(&wordPair{canonical(w), w})
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < alist.Len(); i++ {
		a := alist.At(i).(*AltWords)
		alts[a.Canon] = a
	}

	// convert snippet vector into a list
	snippets := make([]*Snippet, x.snippets.Len())
	for i := 0; i < x.snippets.Len(); i++ {
		snippets[i] = x.snippets.At(i).(*Snippet)
	}

	// create text index
	var suffixes *suffixarray.Index
	if fulltextIndex {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	return &Index{x.fset, suffixes, words, alts, snippets, x.stats}
}