Example #1
0
func main() {
	f, err := os.Create("irelate.cpu.pprof")
	if err != nil {
		panic(err)
	}
	pprof.StartCPUProfile(f)
	defer pprof.StopCPUProfile()
	files := os.Args[1:]
	buf := bufio.NewWriter(os.Stdout)
	b, err := bix.New(files[0], 1)
	check(err)
	bx, err := b.Query(nil)
	check(err)

	queryables := make([]I.Queryable, len(files)-1)
	for i, f := range files[1:] {
		q, err := bix.New(f, 1)
		if err != nil {
			log.Fatal(err)
		}
		queryables[i] = q
	}

	for interval := range irelate.PIRelate(80000, 25000, bx, false, func(a I.Relatable) {}, queryables...) {
		fmt.Fprintf(buf, "%s\t%d\t%d\t%d\n", interval.Chrom(), interval.Start(), interval.End(), len(interval.Related()))
	}
	buf.Flush()
}
Example #2
0
func benchmarkAnno(b *testing.B) {
	var configs shared.Config
	if _, err := toml.DecodeFile("example/conf.toml", &configs); err != nil {
		panic(err)
	}

	out := bufio.NewWriter(ioutil.Discard)
	Js, _ := xopen.Ropen("example/custom.js")
	jbytes, _ := ioutil.ReadAll(Js)
	js_string := string(jbytes)

	srcs, err := configs.Sources()
	if err != nil {
		log.Fatal(err)
	}
	a := api.NewAnnotator(srcs, js_string, false, true)
	files, err := a.SetupStreams()
	if err != nil {
		log.Fatal(err)
	}
	for n := 0; n < b.N; n++ {
		q, err := bix.New("example/query.vcf.gz", 1)
		if err != nil {
			log.Fatal(err)
		}
		bx, err := q.Query(nil)
		if err != nil {
			log.Fatal(err)
		}
		a.UpdateHeader(q)

		//files := []string{"example/cadd.sub.txt.gz", "example/query.vcf.gz", "example/fitcons.bed.gz"}
		//files := []string{"example/query.vcf.gz", "example/fitcons.bed.gz"}

		queryables := make([]interfaces.Queryable, len(files))
		for i, f := range files {
			q, err := bix.New(f, 1)
			if err != nil {
				log.Fatal(err)
			}
			queryables[i] = q
		}

		fn := func(v interfaces.Relatable) {
			a.AnnotateEnds(v, api.INTERVAL)
		}
		stream := irelate.PIRelate(4000, 20000, bx, false, fn, queryables...)

		for interval := range stream {
			fmt.Fprintf(out, "%s\n", interval)
		}
		out.Flush()
	}
}
Example #3
0
File: main.go Project: brentp/bix
func main() {

	f := os.Args[1]
	tbx, err := bix.New(f)
	check(err)

	chrom := os.Args[2]

	s, err := strconv.Atoi(os.Args[3])
	check(err)

	e, err := strconv.Atoi(os.Args[4])
	check(err)

	vals, _ := tbx.Query(loc{chrom, s, e})
	i := 0
	for {
		v, err := vals.Next()
		if err != nil {
			break
		}
		fmt.Println(v.(interfaces.IVariant).String()[:40])
		i++
	}
	tbx.Close()

}
Example #4
0
func main() {

	f, err := xopen.Ropen(os.Args[1])
	check(err)
	vcf, err := vcfgo.NewReader(f, true)
	check(err)
	tbx, err := bix.New(os.Args[1])
	check(err)
	var rdr io.Reader
	tot := 0
	t0 := time.Now()
	for {
		v := vcf.Read()
		if v == nil {
			break
		}

		rdr, err = tbx.ChunkedReader(location{v.Chrom(), int(v.Start()), int(v.Start()) + 1})
		check(err)
		brdr := bufio.NewReader(rdr)
		for _, err := brdr.ReadString('\n'); err == nil; _, err = brdr.ReadString('\n') {
			tot += 1
		}
	}
	log.Println(tot, time.Since(t0).Seconds())

	main2()
}
Example #5
0
func benchmarkStreams(nStreams int, b *testing.B) {

	for n := 0; n < b.N; n++ {
		streams := make([]interfaces.RelatableIterator, 0)
		f := "data/test.bed.gz"

		for i := 0; i < nStreams; i++ {
			s, e := bix.New(f)
			if e != nil {
				panic(e)
			}
			q, e := s.Query(nil)
			streams = append(streams, q)
		}

		//for a := range IRelate(CheckRelatedByOverlap, 0, Less, streams...) {
		iter := IRelate(CheckOverlapPrefix, 0, LessPrefix, streams...)
		for {
			a, err := iter.Next()
			if err != nil {
				break
				a.Start()
			}
		}

	}
}
Example #6
0
// Setup reads all the tabix indexes and setups up the Queryables
func (a *Annotator) Setup(query HeaderUpdater) ([]interfaces.Queryable, error) {
	files, fmap, err := a.setupStreams()
	if err != nil {
		return nil, err
	}
	var wg sync.WaitGroup
	wg.Add(len(files))

	queryables := make([]interfaces.Queryable, len(files))
	for i, file := range files {
		go func(idx int, file string) {
			var q interfaces.Queryable
			var err error
			if strings.HasSuffix(file, ".bam") {
				q, err = parsers.NewBamQueryable(file, 2)
			} else {
				q, err = bix.New(file, 1)
			}
			if err != nil {
				log.Fatal(err)
			}
			queryables[idx] = q
			wg.Done()
		}(i, file)

	}
	wg.Wait()

	for i, file := range files {
		if q, ok := queryables[i].(*bix.Bix); ok {
			for _, src := range fmap[file] {
				src.UpdateHeader(query, a.Ends, q.GetHeaderType(src.Field), q.GetHeaderNumber(src.Field))
			}
		} else if _, ok := queryables[i].(*parsers.BamQueryable); ok {
			for _, src := range fmap[file] {
				htype := "String"
				if src.IsNumber() {
					htype = "Float"
				}
				src.UpdateHeader(query, a.Ends, htype, "1")
			}
		} else {
			log.Printf("type not known: %T\n", queryables[i])
		}
	}

	for _, post := range a.PostAnnos {
		query.AddInfoToHeader(post.Name, ".", post.Type, fmt.Sprintf("calculated field: %s", post.Name))
	}
	return queryables, nil
}
Example #7
0
func (a *Annotator) Setup(query HeaderUpdater) ([]interfaces.Queryable, error) {
	queryables := make([]interfaces.Queryable, 0)
	files, fmap, err := a.setupStreams()
	if err != nil {
		return nil, err
	}
	for _, file := range files {
		q, err := bix.New(file, 1)
		if err != nil {
			return nil, err
		}
		queryables = append(queryables, q)
		for _, src := range fmap[file] {
			src.UpdateHeader(query, a.Ends, q.GetHeaderType(src.Field))
		}
	}
	for _, post := range a.PostAnnos {
		query.AddInfoToHeader(post.Name, ".", post.Type, fmt.Sprintf("calculated field: %s", post.Name))
	}
	return queryables, nil
}
Example #8
0
func AsQueryable(f string) (interfaces.Queryable, error) {
	return bix.New(f, 1)
}
Example #9
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	js := flag.String("js", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use. default is 2")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml intput.vcf > annotated.vcf

To run a server:

%s server

`, os.Args[0], os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile)) {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	if !(xopen.Exists(queryFile + ".tbi")) {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find index for query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
	}
	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()

	jsString := ReadJs(*js)
	strict := !*notstrict
	var a = NewAnnotator(sources, jsString, *ends, strict)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	var bx interfaces.RelatableIterator
	b, err := bix.New(queryFile, 1)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file and/or index: %s ... %s", queryFile, err))
	}

	a.UpdateHeader(b)

	if err != nil {
		log.Fatal(err)
	}
	bx, err = b.Query(nil)
	if err != nil {
		log.Fatal(err)
	}

	files, err := a.SetupStreams()
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	fn := func(v interfaces.Relatable) {
		a.AnnotateEnds(v, aends)
	}

	queryables := make([]interfaces.Queryable, len(files))
	for i, f := range files {
		q, err := bix.New(f, 1)
		if err != nil {
			log.Fatal(err)
		}
		queryables[i] = q
	}
	stream := irelate.PIRelate(4000, 20000, bx, *ends, fn, queryables...)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, b.VReader.Header)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	if os.Getenv("IRELATE_PROFILE") == "TRUE" {
		log.Println("profiling to: irelate.pprof")
		f, err := os.Create("irelate.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	for interval := range stream {
		fmt.Fprintf(out, "%s\n", interval)
		n++
	}
	printTime(start, n)
}