Golang New Examples, index/suffixarray.New Golang Examples

Example #1

0

Show file

File: gss.go Project: thinkination/genomic-spiral-sieve

// Searches the given Nucleotide-Sequence file-path for the given 'searchString' using sophisticated compression
func search(fileName string, sequenceToSearchFor string) (bool, []int) {
	var isPresent bool = false
	var master []byte
	var offsets []int

	lines, err := readLines(fileName)
	if err != nil {
		log.Fatalf("readLines: %s", err)
	} else {
		for _, line := range lines {
			master = *(compress(&line))
		}
	}

	// dat, _ := ioutil.ReadFile("./encoded.txt")
	index := suffixarray.New(master)

	searchString := sequenceToSearchFor
	searchBytes := *(compress(&searchString))

	//https://code.google.com/p/go/source/browse/src/pkg/index/suffixarray/suffixarray.go?name=release#190
	//Gets exactly the first match alone

	// offsets = index.Lookup([]byte(searchBytes), -1)

	//https://code.google.com/p/go/source/browse/src/pkg/index/suffixarray/suffixarray.go?name=release#174
	//Gets all matches
	offsets = index.Lookup([]byte(searchBytes), 1)

	if len(offsets) == 1 {
		isPresent = true
	}
	return isPresent, offsets
}

Example #2

0

Show file

File: suffixArrayExample.go Project: nrshrivatsan/meg

func main() {
	//A set of words delimited by space
	words := "a apple sphere atom atmosphere"

	//A suffix array created in golang by converting the given string into bytes
	index := suffixarray.New([]byte(words))

	//Lookup Time complexity =  O(log(N)*len(s) + len(result))

	// N : the size of the indexed data
	// s : substring to be seached for
	// result : array containing integers which represent the index of the given substring 's' in the suffix array

	//NOTE
	// Let's take the following example
	// var s string = "apple"
	// fmt.Println([]byte(s)) would print  = [97 112 112 108 101].
	// Golang Suffix array uses the byte representation of the sub-string "s" in order to perform most optimal comutation

	offsets1 := index.Lookup([]byte("sphere"), -1) // the list of all indices where s occurs in data

	//Prints unsorted array of integers which are the indices of the given substring
	fmt.Println(offsets1)

}

Example #3

0

Show file

File: main.go Project: kyokomi-sandbox/sandbox

func _indexSuffixArray() string {
	docs := []string{
		"mercury", "venus", "earth", "mars",
		"jupiter", "saturn", "uranus", "pluto",
	}

	var data []byte
	var offsets []int

	for _, d := range docs {
		data = append(data, []byte(d)...)
		offsets = append(offsets, len(data))
	}
	sfx := suffixarray.New(data)

	query := "earth"

	idxs := sfx.Lookup([]byte(query), -1)
	var results []int
	for _, idx := range idxs {
		i := sort.Search(len(offsets), func(i int) bool { return offsets[i] > idx })
		if idx+len(query) <= offsets[i] {
			results = append(results, i)
		}
	}

	return fmt.Sprintf("%q is in documents %v\n", query, results)
}

Example #4

0

Show file

File: utils.go Project: paulhammond/docker

func NewTruncIndex() *TruncIndex {
	return &TruncIndex{
		index: suffixarray.New([]byte{' '}),
		ids:   make(map[string]bool),
		bytes: []byte{' '},
	}
}

Example #5

0

Show file

File: stw.go Project: isaiah/go_scheduler_talk

func main() {
	const N = 6e5
	const M = 100
	const I = 20
	const J = 10
	const P = 2
	data := make([]byte, N)
	for i := 0; i < N; i++ {
		data[i] = byte(rand.Intn(255))
	}
	done := make(chan bool, P)
	for p := 0; p < P; p++ {
		go func() {
			for i := 0; i < I; i++ {
				suffix := suffixarray.New(data)
				for j := 0; j < J; j++ {
					str := make([]byte, M)
					for m := 0; m < M; m++ {
						str[m] = byte(rand.Intn(255))
					}
					_ = suffix.Lookup(str, 10)
				}
			}
			done <- true
		}()
	}
	for p := 0; p < P; p++ {
		<-done
	}
}

Example #6

0

Show file

File: utils.go Project: idvoretskyi/coreos-kubernetes

func (idx *TruncIndex) Add(id string) error {
	idx.Lock()
	defer idx.Unlock()
	if err := idx.addId(id); err != nil {
		return err
	}
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Example #7

0

Show file

File: words_benchmark_test.go Project: eliben/code-for-blog

func BenchmarkBuildSuffixArray(b *testing.B) {
	words := getDictWords()

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
		_ = suffixarray.New(data)
	}
}

Example #8

0

Show file

File: waf.go Project: Zigazou/nataraja

func (waf *WAF) GoSufArray_UserAgentIsClean(UA []byte) bool {
	index := suffixarray.New(UA)
	for _, robot := range waf.bad_robots {
		if len(index.Lookup(robot, 1)) > 0 {
			return false
		}
	}

	return true
}

Example #9

0

Show file

File: rest.go Project: hobbeswalsh/cardgame

func check_url(substr, str string) bool {
	var dst []byte
	substr_bytes := strconv.AppendQuoteToASCII(dst, substr)
	str_bytes := strconv.AppendQuoteToASCII(dst, str)
	index := suffixarray.New(str_bytes)
	offsets := index.Lookup(substr_bytes, -1)
	if offsets == nil {
		return false
	}
	return offsets[0] == 0
}

Example #10

0

Show file

File: strings.go Project: funkygao/dlogmon

func benchFast(line string, substr string) time.Duration {
	index := fs.New([]byte(line))
	start := time.Now()
	for i := 0; i < LOOPS; i++ {
		index.Lookup([]byte(substr), 1)
	}
	end := time.Now()
	delta := end.Sub(start)
	fmt.Printf("%10s: %20s\t%16s %10s\n", "fast", substr, delta, delta/LOOPS)
	return delta
}

Example #11

0

Show file

File: utils.go Project: ChaosCloud/docker

func NewTruncIndex(ids []string) (idx *TruncIndex) {
	idx = &TruncIndex{
		ids:   make(map[string]bool),
		bytes: []byte{' '},
	}
	for _, id := range ids {
		idx.ids[id] = true
		idx.bytes = append(idx.bytes, []byte(id+" ")...)
	}
	idx.index = suffixarray.New(idx.bytes)
	return
}

Example #12

0

Show file

File: main.go Project: jstanley0/stripe-ctf-3

func (s *Searcher) indexFile(path string, info os.FileInfo, err error) error {
	// only index 1/4 of the files per server
	if int(path[len(path)-1])%4 != s.id {
		return nil
	}
	if info.Mode().IsRegular() && info.Size() < (1<<20) {
		name := strings.TrimPrefix(path, s.base_path)
		data, _ := ioutil.ReadFile(path)
		s.files[name] = suffixarray.New(data)
	}
	return nil
}

Example #13

0

Show file

File: utils.go Project: paulhammond/docker

func (idx *TruncIndex) Add(id string) error {
	if strings.Contains(id, " ") {
		return fmt.Errorf("Illegal character: ' '")
	}
	if _, exists := idx.ids[id]; exists {
		return fmt.Errorf("Id already exists: %s", id)
	}
	idx.ids[id] = true
	idx.bytes = append(idx.bytes, []byte(id+" ")...)
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Example #14

0

Show file

File: words_benchmark_test.go Project: eliben/code-for-blog

func BenchmarkLookupXX(b *testing.B) {
	words := getDictWords()
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		indices := sa.Lookup([]byte(XXwordToTry), 1)
		if len(indices) > 0 {
			_ = getStringFromIndex(data, indices[0])
		}
	}
}

Example #15

0

Show file

File: utils.go Project: paulhammond/docker

func (idx *TruncIndex) Delete(id string) error {
	if _, exists := idx.ids[id]; !exists {
		return fmt.Errorf("No such id: %s", id)
	}
	before, after, err := idx.lookup(id)
	if err != nil {
		return err
	}
	delete(idx.ids, id)
	idx.bytes = append(idx.bytes[:before], idx.bytes[after:]...)
	idx.index = suffixarray.New(idx.bytes)
	return nil
}

Example #16

0

Show file

File: manual_index.go Project: nullstyle/mcdev

// Index builds a new suffixarray for the package names previously registered
// with this instance.
func (idx *ManualIndex) Index() error {
	var buf bytes.Buffer

	for pkg := range idx.packages {
		_, err := fmt.Fprintf(&buf, "\x00%s", pkg)
		if err != nil {
			return err
		}
	}

	idx.index = suffixarray.New(buf.Bytes())
	return nil
}

Example #17

0

Show file

File: index.go Project: buckhx/diglet

func (idx *SuffixIndex) sort() {
	if idx.index == nil {
		keys := make([][]byte, len(idx.tiles))
		i := 0
		for k := range idx.tiles {
			keys[i] = []byte(k)
			i++
		}
		d := []byte{zero}
		b := bytes.Join(keys, d)                    //join w/ zeros
		idx.indexed = bytes.Join([][]byte{d, d}, b) //pad w/ zeros
		idx.index = suffixarray.New(idx.indexed)
	}
}

Example #18

0

Show file

File: words_benchmark_test.go Project: eliben/code-for-blog

func TestSuffixArrayFind(t *testing.T) {
	words := getDictWords()
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	buf := &bytes.Buffer{}
	sa.Write(buf)
	fmt.Println("size:", buf.Len())

	indices := sa.Lookup([]byte("yrate"), 1)
	if indices == nil || len(indices) < 1 {
		t.Fatal("not found")
	}
}

Example #19

0

Show file

func test(data, what []byte) {
	s := suffixarray.New(data)
	idx0 := s.Lookup(what, -1)
	idx1 := simple(data, what)
	if len(idx0) != len(idx1) {
		panic(fmt.Sprintf("len mismatch: %+v, %+v", idx0, idx1))
	}
	sort.Ints(idx0)
	for i, x := range idx0 {
		if x != idx1[i] {
			panic(fmt.Sprintf("data mismatch: %+v, %+v", idx0, idx1))
		}
	}
}

Example #20

0

Show file

File: dogberry.go Project: NovemberFoxtrot/dogberry

func main() {
	sometext, err := ioutil.ReadFile(os.Args[1])

	if err != nil {
		log.Println(err)
	}

	index := suffixarray.New(sometext)

	offsets := index.Lookup([]byte("*"), -1)

	for _, value := range offsets[0:100] {
		log.Println(value, string(sometext[value]))
	}
}

Example #21

0

Show file

File: fuzzy.go Project: sparrc/fuzzy

// Takes the known dictionary listing and creates a suffix array
// model for these terms. If a model already existed, it is discarded
func (model *Model) updateSuffixArr() {
	if !model.UseAutocomplete {
		return
	}
	model.RLock()
	termArr := make([]string, 0, 1000)
	for term, count := range model.Data {
		if count.Corpus > model.Threshold || count.Query > 0 { // TODO: query threshold?
			termArr = append(termArr, term)
		}
	}
	model.SuffixArrConcat = "\x00" + strings.Join(termArr, "\x00") + "\x00"
	model.SuffixArr = suffixarray.New([]byte(model.SuffixArrConcat))
	model.SuffDivergence = 0
	model.RUnlock()
}

Example #22

0

Show file

File: ind.go Project: hyndio/hyd.me

// golang.org网站的全文搜索是基于suffix array实现的【http://t.cn/hBJekg】，
// 可能觉得效果不错，就把suffix array添加到golang的标准库里面了。【http://t.cn/hBJekd】
// http://blog.csdn.net/fxsjy/article/details/6297523
func main() {
	fmt.Println("Hello, 世界")
	str := `The Go programming language is an open source project to make programmers more productive. 
	Go is expressive, concise, clean, and efficient. 
	Its concurrency mechanisms make it easy to write programs that get the most out of multicore 
	and networked machines, hyd, while its novel type system enables flexible 
	and modular program construction. 
	Go compiles quickly to machine code yet has the convenience of garbage collection 
	and the power of run-time reflection. It's a fast, statically typed, 
	compiled language that feels like a dynamically typed, interpreted language.`

	index := suffixarray.New([]byte(str))
	offsets1 := index.Lookup([]byte("hyd"), -1)
	for _, i := range offsets1 {
		fmt.Println(str[i:])
	}

}

Example #23

0

Show file

File: hello.go Project: peterwilliams97/go-work

func test_sa() {

	data := []byte("i am a test i am a test i am a test i am a test i am a test")
	s := data[2:4]

	// create index for some data
	index := suffixarray.New(data)

	// lookup byte slice s
	offsets1 := index.Lookup(s, -1) // the list of all indices where s occurs in data
	offsets2 := index.Lookup(s, 3)  // the list of at most 3 indices where s occurs in data
	fmt.Println("test_sa")
	fmt.Println(string(s))
	fmt.Println(offsets1)
	fmt.Println(offsets2)
	for _, i := range offsets1 {
		m := data[i : i+2]
		fmt.Println(string(m))
	}
}

Example #24

0

Show file

File: next.go Project: nise-nabe/misc-pages

func main() {
	str, _ := ioutil.ReadFile("large.in")
	next := func(str string) func() string {
		reg, _ := regexp.Compile("\\S+")
		is := suffixarray.New([]byte(str)).FindAllIndex(reg, -1)
		return func() (result string) {
			if len(is) < 1 {
				return ""
			}
			result = str[is[0][0]:is[0][1]]
			is = is[1:]
			return
		}
	}(string(str))
	t := time.Now().UnixNano()
	for i := 0; i < 100000000; i++ {
		next()
	}
	log.Println(time.Now().UnixNano() - t)
}

Example #25

0

Show file

File: next.go Project: nise-nabe/misc-pages

func main() {
	str := `1234 2 3333 4 5  aaaaa
s
aaa
`
	next := func(str string) func() string {
		reg, _ := regexp.Compile("\\S+")
		is := suffixarray.New([]byte(str)).FindAllIndex(reg, -1)
		return func() (result string) {
			if len(is) < 1 {
				return ""
			}
			result = str[is[0][0]:is[0][1]]
			is = is[1:]
			return
		}
	}(str)
	for x := next(); x != ""; x = next() {
		log.Println(x)
	}
}

Example #26

0

Show file

File: convert.go Project: ReanGD/go-web-search

// Convert ...
func Convert(path string) error {
	file, err := os.Open(path)
	if err != nil {
		return err
	}

	defer file.Close()

	d := createDictReader(createTokenReader(file))

	var buffer bytes.Buffer
	_, _ = buffer.WriteRune('@')
	for !d.isDone() {
		g, err := d.nextGroup()
		if err != nil {
			return err
		}
		for _, w := range g.words {
			_, _ = buffer.WriteString(w.name)
			_, _ = buffer.WriteRune('@')
		}
	}
	sa := suffixarray.New(buffer.Bytes()[:])

	flags := os.O_CREATE | os.O_WRONLY
	dictFile, err := os.OpenFile("morph.dict", flags, 0666)
	if err != nil {
		return err
	}

	defer dictFile.Close()

	err = sa.Write(dictFile)
	if err != nil {
		return err
	}

	return nil

}

Example #27

0

Show file

File: using_suffixarray.go Project: eliben/code-for-blog

func main() {
	words := []string{
		"banana",
		"apple",
		"pear",
		"tangerine",
		"orange",
		"lemon",
		"peach",
		"persimmon",
	}

	// Combine all words into a single byte slice, separated by \x00 bytes (which
	// do not appear in words), adding one on each end too.
	data := []byte("\x00" + strings.Join(words, "\x00") + "\x00")
	sa := suffixarray.New(data)

	indices := sa.Lookup([]byte("an"), -1)
	if len(indices) > 0 {
		fmt.Println("Lookup returns:", indices)
	} else {
		fmt.Println("Lookup: not found")
	}

	// Reconstruct matches from indices found by Lookup.
	for _, idx := range indices {
		fmt.Println(getStringFromIndex(data, idx))
	}

	// Here using a completely "literal" regexp, similar to the usage of Lookup,
	// to compare what the two methods return. FindAllIndex can take an arbitrary
	// regexp - but beware of the caveat discussed in the blog post.
	r := regexp.MustCompile("an")
	matches := sa.FindAllIndex(r, -1)
	fmt.Println("FindAllIndex returns:", matches)
}

Example #28

0

Show file

File: index.go Project: gnanderson/go

// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
func NewIndex(dirnames <-chan string, fulltextIndex bool, throttle float64) *Index {
	var x Indexer
	th := NewThrottle(throttle, 100*time.Millisecond) // run at least 0.1s at a time

	// initialize Indexer
	// (use some reasonably sized maps to start)
	x.fset = token.NewFileSet()
	x.packages = make(map[string]*Pak, 256)
	x.words = make(map[string]*IndexResult, 8192)

	// index all files in the directories given by dirnames
	for dirname := range dirnames {
		list, err := fs.ReadDir(dirname)
		if err != nil {
			continue // ignore this directory
		}
		for _, f := range list {
			if !f.IsDir() {
				x.visitFile(dirname, f, fulltextIndex)
			}
			th.Throttle()
		}
	}

	if !fulltextIndex {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(h.Decls)
		others := reduce(h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist = append(wlist, &wordPair{canonical(w), w})
		th.Throttle()
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < len(alist); i++ {
		a := alist[i].(*AltWords)
		alts[a.Canon] = a
	}

	// create text index
	var suffixes *suffixarray.Index
	if fulltextIndex {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	return &Index{x.fset, suffixes, words, alts, x.snippets, x.stats}
}

Example #29

0

Show file

File: index.go Project: Bosh-for-Cpi/bosh-2605

// NewIndex creates a new index for the .go files provided by the corpus.
func (c *Corpus) NewIndex() *Index {
	// initialize Indexer
	// (use some reasonably sized maps to start)
	x := &Indexer{
		c:           c,
		fset:        token.NewFileSet(),
		fsOpenGate:  make(chan bool, maxOpenFiles),
		strings:     make(map[string]string),
		packages:    make(map[Pak]*Pak, 256),
		words:       make(map[string]*IndexResult, 8192),
		throttle:    util.NewThrottle(c.throttle(), 100*time.Millisecond), // run at least 0.1s at a time
		importCount: make(map[string]int),
		packagePath: make(map[string]map[string]bool),
		exports:     make(map[string]map[string]SpotKind),
		idents:      make(map[SpotKind]map[string][]Ident, 4),
	}

	// index all files in the directories given by dirnames
	var wg sync.WaitGroup // outstanding ReadDir + visitFile
	dirGate := make(chan bool, maxOpenDirs)
	for dirname := range c.fsDirnames() {
		if c.IndexDirectory != nil && !c.IndexDirectory(dirname) {
			continue
		}
		dirGate <- true
		wg.Add(1)
		go func(dirname string) {
			defer func() { <-dirGate }()
			defer wg.Done()

			list, err := c.fs.ReadDir(dirname)
			if err != nil {
				log.Printf("ReadDir(%q): %v; skipping directory", dirname, err)
				return // ignore this directory
			}
			for _, fi := range list {
				wg.Add(1)
				go func(fi os.FileInfo) {
					defer wg.Done()
					x.visitFile(dirname, fi)
				}(fi)
			}
		}(dirname)
	}
	wg.Wait()

	if !c.IndexFullText {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(h.Decls)
		others := reduce(h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist = append(wlist, &wordPair{canonical(w), w})
		x.throttle.Throttle()
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < len(alist); i++ {
		a := alist[i].(*AltWords)
		alts[a.Canon] = a
	}

	// create text index
	var suffixes *suffixarray.Index
	if c.IndexFullText {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	for _, idMap := range x.idents {
		for _, ir := range idMap {
			sort.Sort(byPackage(ir))
		}
	}

	return &Index{
		fset:        x.fset,
		suffixes:    suffixes,
		words:       words,
		alts:        alts,
		snippets:    x.snippets,
		stats:       x.stats,
		importCount: x.importCount,
		packagePath: x.packagePath,
		exports:     x.exports,
		idents:      x.idents,
		opts: indexOptions{
			Docs:       x.c.IndexDocs,
			GoCode:     x.c.IndexGoCode,
			FullText:   x.c.IndexFullText,
			MaxResults: x.c.MaxResults,
		},
	}
}

Example #30

0

Show file

File: index.go Project: go-nosql/golang

// NewIndex creates a new index for the .go files
// in the directories given by dirnames.
//
func NewIndex(dirnames <-chan string, fulltextIndex bool) *Index {
	var x Indexer

	// initialize Indexer
	x.fset = token.NewFileSet()
	x.words = make(map[string]*IndexResult)

	// index all files in the directories given by dirnames
	for dirname := range dirnames {
		list, err := ioutil.ReadDir(dirname)
		if err != nil {
			continue // ignore this directory
		}
		for _, f := range list {
			if !f.IsDirectory() {
				x.visitFile(dirname, f, fulltextIndex)
			}
		}
	}

	if !fulltextIndex {
		// the file set, the current file, and the sources are
		// not needed after indexing if no text index is built -
		// help GC and clear them
		x.fset = nil
		x.sources.Reset()
		x.current = nil // contains reference to fset!
	}

	// for each word, reduce the RunLists into a LookupResult;
	// also collect the word with its canonical spelling in a
	// word list for later computation of alternative spellings
	words := make(map[string]*LookupResult)
	var wlist RunList
	for w, h := range x.words {
		decls := reduce(&h.Decls)
		others := reduce(&h.Others)
		words[w] = &LookupResult{
			Decls:  decls,
			Others: others,
		}
		wlist.Push(&wordPair{canonical(w), w})
	}
	x.stats.Words = len(words)

	// reduce the word list {canonical(w), w} into
	// a list of AltWords runs {canonical(w), {w}}
	alist := wlist.reduce(lessWordPair, newAltWords)

	// convert alist into a map of alternative spellings
	alts := make(map[string]*AltWords)
	for i := 0; i < alist.Len(); i++ {
		a := alist.At(i).(*AltWords)
		alts[a.Canon] = a
	}

	// convert snippet vector into a list
	snippets := make([]*Snippet, x.snippets.Len())
	for i := 0; i < x.snippets.Len(); i++ {
		snippets[i] = x.snippets.At(i).(*Snippet)
	}

	// create text index
	var suffixes *suffixarray.Index
	if fulltextIndex {
		suffixes = suffixarray.New(x.sources.Bytes())
	}

	return &Index{x.fset, suffixes, words, alts, snippets, x.stats}
}