Esempio n. 1
0
func main() {

	f, err := xopen.Ropen(os.Args[1])
	check(err)
	vcf, err := vcfgo.NewReader(f, true)
	check(err)
	tbx, err := bix.New(os.Args[1])
	check(err)
	var rdr io.Reader
	tot := 0
	t0 := time.Now()
	for {
		v := vcf.Read()
		if v == nil {
			break
		}

		rdr, err = tbx.ChunkedReader(location{v.Chrom(), int(v.Start()), int(v.Start()) + 1})
		check(err)
		brdr := bufio.NewReader(rdr)
		for _, err := brdr.ReadString('\n'); err == nil; _, err = brdr.ReadString('\n') {
			tot += 1
		}
	}
	log.Println(tot, time.Since(t0).Seconds())

	main2()
}
Esempio n. 2
0
func benchmarkAnno(b *testing.B) {
	var configs shared.Config
	if _, err := toml.DecodeFile("example/conf.toml", &configs); err != nil {
		panic(err)
	}

	out := bufio.NewWriter(ioutil.Discard)
	Lua, _ := xopen.Ropen("example/custom.lua")
	lbytes, _ := ioutil.ReadAll(Lua)
	l_string := string(lbytes)

	srcs, err := configs.Sources()
	if err != nil {
		log.Fatal(err)
	}
	empty := make([]api.PostAnnotation, 0)
	for n := 0; n < b.N; n++ {
		if err != nil {
			log.Fatal(err)
		}
		a := api.NewAnnotator(srcs, l_string, false, true, empty)
		qrdr, err := xopen.Ropen("example/query.vcf.gz")
		if err != nil {
			log.Fatal(err)
		}
		qstream, query, err := parsers.VCFIterator(qrdr)
		queryables, err := a.Setup(query)
		if err != nil {
			log.Fatal(err)
		}

		//files := []string{"example/cadd.sub.txt.gz", "example/query.vcf.gz", "example/fitcons.bed.gz"}
		//files := []string{"example/query.vcf.gz", "example/fitcons.bed.gz"}

		fn := func(v interfaces.Relatable) {
			a.AnnotateEnds(v, api.INTERVAL)
		}
		stream := irelate.PIRelate(6000, 20000, qstream, false, fn, queryables...)

		for interval := range stream {
			fmt.Fprintf(out, "%s\n", interval)
		}
		out.Flush()
	}
}
Esempio n. 3
0
func benchmarkAnno(b *testing.B) {
	var configs shared.Config
	if _, err := toml.DecodeFile("example/conf.toml", &configs); err != nil {
		panic(err)
	}

	out := bufio.NewWriter(ioutil.Discard)
	Js, _ := xopen.Ropen("example/custom.js")
	jbytes, _ := ioutil.ReadAll(Js)
	js_string := string(jbytes)

	srcs, err := configs.Sources()
	if err != nil {
		log.Fatal(err)
	}
	a := api.NewAnnotator(srcs, js_string, false, true)
	files, err := a.SetupStreams()
	if err != nil {
		log.Fatal(err)
	}
	for n := 0; n < b.N; n++ {
		q, err := bix.New("example/query.vcf.gz", 1)
		if err != nil {
			log.Fatal(err)
		}
		bx, err := q.Query(nil)
		if err != nil {
			log.Fatal(err)
		}
		a.UpdateHeader(q)

		//files := []string{"example/cadd.sub.txt.gz", "example/query.vcf.gz", "example/fitcons.bed.gz"}
		//files := []string{"example/query.vcf.gz", "example/fitcons.bed.gz"}

		queryables := make([]interfaces.Queryable, len(files))
		for i, f := range files {
			q, err := bix.New(f, 1)
			if err != nil {
				log.Fatal(err)
			}
			queryables[i] = q
		}

		fn := func(v interfaces.Relatable) {
			a.AnnotateEnds(v, api.INTERVAL)
		}
		stream := irelate.PIRelate(4000, 20000, bx, false, fn, queryables...)

		for interval := range stream {
			fmt.Fprintf(out, "%s\n", interval)
		}
		out.Flush()
	}
}
Esempio n. 4
0
func main2() {

	/*
		f, err := os.Create("q.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	*/

	t, err := cgotbx.New(os.Args[1])
	if err != nil {
		log.Fatal(err)
	}
	var rdr io.Reader
	for i := 0; i < 1000; i++ {
		tot := 0
		f, err := xopen.Ropen(os.Args[1])
		check(err)
		vcf, err := vcfgo.NewReader(f, true)
		check(err)
		t0 := 0
		for {
			v := vcf.Read()
			if v == nil {
				break
			}

			ts := time.Now()
			for k := 0; k < 100; k++ {
				rdr, err = t.Get(v.Chrom(), int(v.Start()), int(v.Start())+1)
				check(err)
				brdr := bufio.NewReader(rdr)
				//fmt.Fprintln(os.Stderr, v.Chrom(), v.Start(), v.Start()+1)
				j := 0
				for l, err := brdr.ReadString('\n'); err == nil; l, err = brdr.ReadString('\n') {
					//fmt.Fprintln(os.Stderr, "...", l[:20])
					_ = l
					tot += 1
					j += 1
				}
				if j == 0 {
					log.Fatal("should have found something")
				}
			}
			t0 += int(time.Since(ts).Nanoseconds())
		}
		log.Println(tot, float64(t0)*1e-9)
	}
}
Esempio n. 5
0
func ReadJs(js string) string {
	var jsString string
	if js != "" {
		jsReader, err := xopen.Ropen(js)
		if err != nil {
			log.Fatal(err)
		}
		jsBytes, err := ioutil.ReadAll(jsReader)
		if err != nil {
			log.Fatal(err)
		}
		jsString = string(jsBytes)
	} else {
		jsString = ""
	}
	return jsString
}
Esempio n. 6
0
func ReadLua(lua string) string {
	var luaString string
	if lua != "" {
		luaReader, err := xopen.Ropen(lua)
		if err != nil {
			log.Fatal(err)
		}
		luaBytes, err := ioutil.ReadAll(luaReader)
		if err != nil {
			log.Fatal(err)
		}
		luaString = string(luaBytes)
	} else {
		luaString = ""
	}
	return luaString
}
Esempio n. 7
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use.")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml input.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		if strings.Contains(err.Error(), "Expected value but found") {
			fmt.Fprintln(os.Stderr, "\nNOTE: you must quote values in the conf file, e.g. fields=['AC', 'AN'] instead of fields=[AC, AN]")
		}
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
		for _, op := range a.Ops {
			if len(op) > 4 && op[:4] == "lua:" && *lua == "" {
				log.Fatal("ERROR: requested lua op without specifying -lua flag")
			}
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
		if len(r.Op) > 4 && r.Op[:4] == "lua:" && *lua == "" {
			log.Fatal("ERROR: requested lua op without specifying -lua flag")
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	lastMsg := struct {
		sync.RWMutex
		s [10]string
		i int
	}{}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			lastMsg.RLock()
			em := e.Error()
			found := false
			for i := len(lastMsg.s) - 1; i >= 0; i-- {
				if em == lastMsg.s[i] {
					found = true
					break
				}
			}
			if !found {
				log.Println(e, ">> this error/warning may occur many times. reporting once here...")
				lastMsg.RUnlock()
				lastMsg.Lock()
				lastMsg.s[lastMsg.i] = em
				if lastMsg.i == len(lastMsg.s)-1 {
					lastMsg.i = -1
				}
				lastMsg.i++

				lastMsg.Unlock()
			} else {
				lastMsg.RUnlock()
			}
		}
	}

	maxGap := envGet("IRELATE_MAX_GAP", 20000)
	maxChunk := envGet("IRELATE_MAX_CHUNK", 8000)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	stream := irelate.PIRelate(maxChunk, maxGap, qstream, *ends, fn, queryables...)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	/*
		if os.Getenv("IRELATE_PROFILE") == "TRUE" {
			log.Println("profiling to: irelate.pprof")
			f, err := os.Create("irelate.pprof")
			if err != nil {
				panic(err)
			}
			pprof.StartCPUProfile(f)
			defer pprof.StopCPUProfile()
		}
	*/

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}
Esempio n. 8
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use. default is 2")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml intput.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			log.Println(e)
		}
	}

	smaxGap := os.Getenv("IRELATE_MAX_GAP")
	maxGap := 20000
	if smaxGap != "" {
		maxGap, err = strconv.Atoi(smaxGap)
		if err != nil {
			log.Printf("couldn't parse %s using %d\n", smaxGap, maxGap)
		} else {
			log.Printf("using maxGap of %d\n", maxGap)
		}
	}

	stream := irelate.PIRelate(8000, maxGap, qstream, *ends, fn, queryables...)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	if os.Getenv("IRELATE_PROFILE") == "TRUE" {
		log.Println("profiling to: irelate.pprof")
		f, err := os.Create("irelate.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}