func main() { f, err := os.Create("irelate.cpu.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() files := os.Args[1:] buf := bufio.NewWriter(os.Stdout) b, err := bix.New(files[0], 1) check(err) bx, err := b.Query(nil) check(err) queryables := make([]I.Queryable, len(files)-1) for i, f := range files[1:] { q, err := bix.New(f, 1) if err != nil { log.Fatal(err) } queryables[i] = q } for interval := range irelate.PIRelate(80000, 25000, bx, false, func(a I.Relatable) {}, queryables...) { fmt.Fprintf(buf, "%s\t%d\t%d\t%d\n", interval.Chrom(), interval.Start(), interval.End(), len(interval.Related())) } buf.Flush() }
func benchmarkAnno(b *testing.B) { var configs shared.Config if _, err := toml.DecodeFile("example/conf.toml", &configs); err != nil { panic(err) } out := bufio.NewWriter(ioutil.Discard) Js, _ := xopen.Ropen("example/custom.js") jbytes, _ := ioutil.ReadAll(Js) js_string := string(jbytes) srcs, err := configs.Sources() if err != nil { log.Fatal(err) } a := api.NewAnnotator(srcs, js_string, false, true) files, err := a.SetupStreams() if err != nil { log.Fatal(err) } for n := 0; n < b.N; n++ { q, err := bix.New("example/query.vcf.gz", 1) if err != nil { log.Fatal(err) } bx, err := q.Query(nil) if err != nil { log.Fatal(err) } a.UpdateHeader(q) //files := []string{"example/cadd.sub.txt.gz", "example/query.vcf.gz", "example/fitcons.bed.gz"} //files := []string{"example/query.vcf.gz", "example/fitcons.bed.gz"} queryables := make([]interfaces.Queryable, len(files)) for i, f := range files { q, err := bix.New(f, 1) if err != nil { log.Fatal(err) } queryables[i] = q } fn := func(v interfaces.Relatable) { a.AnnotateEnds(v, api.INTERVAL) } stream := irelate.PIRelate(4000, 20000, bx, false, fn, queryables...) for interval := range stream { fmt.Fprintf(out, "%s\n", interval) } out.Flush() } }
func main() { f := os.Args[1] tbx, err := bix.New(f) check(err) chrom := os.Args[2] s, err := strconv.Atoi(os.Args[3]) check(err) e, err := strconv.Atoi(os.Args[4]) check(err) vals, _ := tbx.Query(loc{chrom, s, e}) i := 0 for { v, err := vals.Next() if err != nil { break } fmt.Println(v.(interfaces.IVariant).String()[:40]) i++ } tbx.Close() }
func main() { f, err := xopen.Ropen(os.Args[1]) check(err) vcf, err := vcfgo.NewReader(f, true) check(err) tbx, err := bix.New(os.Args[1]) check(err) var rdr io.Reader tot := 0 t0 := time.Now() for { v := vcf.Read() if v == nil { break } rdr, err = tbx.ChunkedReader(location{v.Chrom(), int(v.Start()), int(v.Start()) + 1}) check(err) brdr := bufio.NewReader(rdr) for _, err := brdr.ReadString('\n'); err == nil; _, err = brdr.ReadString('\n') { tot += 1 } } log.Println(tot, time.Since(t0).Seconds()) main2() }
func benchmarkStreams(nStreams int, b *testing.B) { for n := 0; n < b.N; n++ { streams := make([]interfaces.RelatableIterator, 0) f := "data/test.bed.gz" for i := 0; i < nStreams; i++ { s, e := bix.New(f) if e != nil { panic(e) } q, e := s.Query(nil) streams = append(streams, q) } //for a := range IRelate(CheckRelatedByOverlap, 0, Less, streams...) { iter := IRelate(CheckOverlapPrefix, 0, LessPrefix, streams...) for { a, err := iter.Next() if err != nil { break a.Start() } } } }
// Setup reads all the tabix indexes and setups up the Queryables func (a *Annotator) Setup(query HeaderUpdater) ([]interfaces.Queryable, error) { files, fmap, err := a.setupStreams() if err != nil { return nil, err } var wg sync.WaitGroup wg.Add(len(files)) queryables := make([]interfaces.Queryable, len(files)) for i, file := range files { go func(idx int, file string) { var q interfaces.Queryable var err error if strings.HasSuffix(file, ".bam") { q, err = parsers.NewBamQueryable(file, 2) } else { q, err = bix.New(file, 1) } if err != nil { log.Fatal(err) } queryables[idx] = q wg.Done() }(i, file) } wg.Wait() for i, file := range files { if q, ok := queryables[i].(*bix.Bix); ok { for _, src := range fmap[file] { src.UpdateHeader(query, a.Ends, q.GetHeaderType(src.Field), q.GetHeaderNumber(src.Field)) } } else if _, ok := queryables[i].(*parsers.BamQueryable); ok { for _, src := range fmap[file] { htype := "String" if src.IsNumber() { htype = "Float" } src.UpdateHeader(query, a.Ends, htype, "1") } } else { log.Printf("type not known: %T\n", queryables[i]) } } for _, post := range a.PostAnnos { query.AddInfoToHeader(post.Name, ".", post.Type, fmt.Sprintf("calculated field: %s", post.Name)) } return queryables, nil }
func (a *Annotator) Setup(query HeaderUpdater) ([]interfaces.Queryable, error) { queryables := make([]interfaces.Queryable, 0) files, fmap, err := a.setupStreams() if err != nil { return nil, err } for _, file := range files { q, err := bix.New(file, 1) if err != nil { return nil, err } queryables = append(queryables, q) for _, src := range fmap[file] { src.UpdateHeader(query, a.Ends, q.GetHeaderType(src.Field)) } } for _, post := range a.PostAnnos { query.AddInfoToHeader(post.Name, ".", post.Type, fmt.Sprintf("calculated field: %s", post.Name)) } return queryables, nil }
func AsQueryable(f string) (interfaces.Queryable, error) { return bix.New(f, 1) }
func main() { fmt.Fprintf(os.Stderr, ` ============================================= vcfanno version %s [built with %s] see: https://github.com/brentp/vcfanno ============================================= `, VERSION, runtime.Version()) ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.") notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+ " share the same ref and alt alleles. Default is to require exact match between variants.") js := flag.String("js", "", "optional path to a file containing custom javascript functions to be used as ops") base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config") procs := flag.Int("p", 2, "number of processes to use. default is 2") flag.Parse() inFiles := flag.Args() if len(inFiles) != 2 { fmt.Printf(`Usage: %s config.toml intput.vcf > annotated.vcf To run a server: %s server `, os.Args[0], os.Args[0]) flag.PrintDefaults() return } queryFile := inFiles[1] if !(xopen.Exists(queryFile)) { fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile) os.Exit(2) } if !(xopen.Exists(queryFile + ".tbi")) { fmt.Fprintf(os.Stderr, "\nERROR: can't find index for query file: %s\n", queryFile) os.Exit(2) } runtime.GOMAXPROCS(*procs) var config Config if _, err := toml.DecodeFile(inFiles[0], &config); err != nil { panic(err) } config.Base = *base for _, a := range config.Annotation { err := CheckAnno(&a) if err != nil { log.Fatal("CheckAnno err:", err) } } sources, e := config.Sources() if e != nil { log.Fatal(e) } log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation)) go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() jsString := ReadJs(*js) strict := !*notstrict var a = NewAnnotator(sources, jsString, *ends, strict) var out io.Writer = os.Stdout defer os.Stdout.Close() var err error var bx interfaces.RelatableIterator b, err := bix.New(queryFile, 1) if err != nil { log.Fatal(fmt.Errorf("error opening query file and/or index: %s ... %s", queryFile, err)) } a.UpdateHeader(b) if err != nil { log.Fatal(err) } bx, err = b.Query(nil) if err != nil { log.Fatal(err) } files, err := a.SetupStreams() if err != nil { log.Fatal(err) } aends := INTERVAL if *ends { aends = BOTH } fn := func(v interfaces.Relatable) { a.AnnotateEnds(v, aends) } queryables := make([]interfaces.Queryable, len(files)) for i, f := range files { q, err := bix.New(f, 1) if err != nil { log.Fatal(err) } queryables[i] = q } stream := irelate.PIRelate(4000, 20000, bx, *ends, fn, queryables...) // make a new writer from the string header. out, err = vcfgo.NewWriter(out, b.VReader.Header) if err != nil { log.Fatal(err) } start := time.Now() n := 0 if os.Getenv("IRELATE_PROFILE") == "TRUE" { log.Println("profiling to: irelate.pprof") f, err := os.Create("irelate.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } for interval := range stream { fmt.Fprintf(out, "%s\n", interval) n++ } printTime(start, n) }