func New(path string, n ...int) (*Tbx, error) { if !(xopen.Exists(path) && (xopen.Exists(path+".tbi") || xopen.Exists(path+".csi"))) { return nil, fmt.Errorf("need gz file and .tbi for %s", path) } size := 20 if len(n) > 0 { size = n[0] } t := &Tbx{path: path} cs, mode := C.CString(t.path), C.char('r') defer C.free(unsafe.Pointer(cs)) t.htfs = make(chan *C.htsFile, size) for i := 0; i < cap(t.htfs); i++ { t.htfs <- C.hts_open(cs, &mode) } t.kCache = make(chan C.kstring_t, size*2) for i := 0; i < cap(t.kCache); i++ { t.kCache <- C.kstring_t{} } t.chromCache = make(map[string]C.int) if xopen.Exists(path + ".csi") { csi := C.CString(path + ".csi") defer C.free(unsafe.Pointer(csi)) t.tbx = C.tbx_index_load2(cs, csi) } else { t.tbx = C.tbx_index_load(cs) } runtime.SetFinalizer(t, _close) return t, nil }
// Flatten turns an annotation into a slice of Sources. Pass in the index of the file. // having it as a Source makes the code cleaner, but it's simpler for the user to // specify multiple ops per file in the toml config. func (a *Annotation) Flatten(index int, basepath string) ([]*Source, error) { if len(a.Ops) == 0 { if !strings.HasSuffix(a.File, ".bam") { return nil, fmt.Errorf("no ops specified for %s\n", a.File) } // auto-fill bam to count. a.Ops = make([]string, len(a.Names)) for i := range a.Names { a.Ops[i] = "count" } } if len(a.Columns) == 0 && len(a.Fields) == 0 { if !strings.HasSuffix(a.File, ".bam") { return nil, fmt.Errorf("no columns or fields specified for %s\n", a.File) } // auto-fill bam to count. if len(a.Fields) == 0 { a.Columns = make([]int, len(a.Names)) for i := range a.Names { a.Columns[i] = 1 } } } if !(xopen.Exists(a.File) || a.File == "-") { if basepath != "" { a.File = basepath + "/" + a.File } if !(xopen.Exists(a.File) || a.File == "-") { return nil, fmt.Errorf("[Flatten] unable to open file: %s in %s\n", a.File, basepath) } } n := len(a.Ops) sources := make([]*Source, n) for i := 0; i < n; i++ { isjs := strings.HasPrefix(a.Ops[i], "js:") if !isjs { if _, ok := Reducers[a.Ops[i]]; !ok { return nil, fmt.Errorf("requested op not found: %s for %s\n", a.Ops[i], a.File) } } op := a.Ops[i] if len(a.Names) == 0 { a.Names = a.Fields } sources[i] = &Source{File: a.File, Op: op, Name: a.Names[i], Index: index} if nil != a.Fields { sources[i].Field = a.Fields[i] sources[i].Column = -1 } else { sources[i].Column = a.Columns[i] } } return sources, nil }
// New takes a path to a bgziped (and tabixed file) and returns the tabix struct. func New(path string) (*Tabix, error) { if !(xopen.Exists(path) && xopen.Exists(path+".tbi")) { return nil, fmt.Errorf("need gz file and .tbi for %s", path) } t := &Tabix{path: path} cs := C.CString(t.path) defer C.free(unsafe.Pointer(cs)) mode := C.char('r') t.htf = C.hts_open(cs, &mode) t.tbx = C.tbx_index_load(cs) runtime.SetFinalizer(t, tabixCloser) if strings.HasSuffix(path, ".bed.gz") { t.typ = BED } else if strings.HasSuffix(path, ".vcf.gz") { t.hdr = C.bcf_hdr_read(t.htf) t.typ = VCF } else { t.typ = OTHER } t.original_header = true return t, nil }
func (c Config) Sources() ([]*Source, error) { annos := c.Annotation for i, a := range annos { if !xopen.Exists(a.File) && a.File != "-" { a.File = c.Base + "/" + a.File annos[i] = a } } s := make([]*Source, 0) for i, a := range annos { flats, err := a.Flatten(i, c.Base) if err != nil { return nil, err } s = append(s, flats...) } return s, nil }
func main() { fmt.Fprintf(os.Stderr, ` ============================================= vcfanno version %s [built with %s] see: https://github.com/brentp/vcfanno ============================================= `, VERSION, runtime.Version()) ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.") notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+ " share the same ref and alt alleles. Default is to require exact match between variants.") lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops") base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config") procs := flag.Int("p", 2, "number of processes to use.") flag.Parse() inFiles := flag.Args() if len(inFiles) != 2 { fmt.Printf(`Usage: %s config.toml input.vcf > annotated.vcf `, os.Args[0]) flag.PrintDefaults() return } queryFile := inFiles[1] if !(xopen.Exists(queryFile) || queryFile == "") { fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile) os.Exit(2) } runtime.GOMAXPROCS(*procs) var config Config if _, err := toml.DecodeFile(inFiles[0], &config); err != nil { if strings.Contains(err.Error(), "Expected value but found") { fmt.Fprintln(os.Stderr, "\nNOTE: you must quote values in the conf file, e.g. fields=['AC', 'AN'] instead of fields=[AC, AN]") } panic(err) } config.Base = *base for _, a := range config.Annotation { err := CheckAnno(&a) if err != nil { log.Fatal("CheckAnno err:", err) } for _, op := range a.Ops { if len(op) > 4 && op[:4] == "lua:" && *lua == "" { log.Fatal("ERROR: requested lua op without specifying -lua flag") } } } for i := range config.PostAnnotation { r := config.PostAnnotation[i] err := CheckPostAnno(&r) if err != nil { log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err)) } if len(r.Op) > 4 && r.Op[:4] == "lua:" && *lua == "" { log.Fatal("ERROR: requested lua op without specifying -lua flag") } } sources, e := config.Sources() if e != nil { log.Fatal(e) } log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation)) /* go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() */ luaString := ReadLua(*lua) strict := !*notstrict var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation) var out io.Writer = os.Stdout defer os.Stdout.Close() var err error qrdr, err := xopen.Ropen(queryFile) if err != nil { log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err)) } qstream, query, err := parsers.VCFIterator(qrdr) if err != nil { log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err)) } queryables, err := a.Setup(query) if err != nil { log.Fatal(err) } aends := INTERVAL if *ends { aends = BOTH } lastMsg := struct { sync.RWMutex s [10]string i int }{} fn := func(v interfaces.Relatable) { e := a.AnnotateEnds(v, aends) if e != nil { lastMsg.RLock() em := e.Error() found := false for i := len(lastMsg.s) - 1; i >= 0; i-- { if em == lastMsg.s[i] { found = true break } } if !found { log.Println(e, ">> this error/warning may occur many times. reporting once here...") lastMsg.RUnlock() lastMsg.Lock() lastMsg.s[lastMsg.i] = em if lastMsg.i == len(lastMsg.s)-1 { lastMsg.i = -1 } lastMsg.i++ lastMsg.Unlock() } else { lastMsg.RUnlock() } } } maxGap := envGet("IRELATE_MAX_GAP", 20000) maxChunk := envGet("IRELATE_MAX_CHUNK", 8000) // make a new writer from the string header. out, err = vcfgo.NewWriter(out, query.Header) stream := irelate.PIRelate(maxChunk, maxGap, qstream, *ends, fn, queryables...) if err != nil { log.Fatal(err) } start := time.Now() n := 0 /* if os.Getenv("IRELATE_PROFILE") == "TRUE" { log.Println("profiling to: irelate.pprof") f, err := os.Create("irelate.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } */ for interval := range stream { //log.Printf("%v\n", interval) fmt.Fprintln(out, interval) n++ } printTime(start, n) }
func main() { fmt.Fprintf(os.Stderr, ` ============================================= vcfanno version %s [built with %s] see: https://github.com/brentp/vcfanno ============================================= `, VERSION, runtime.Version()) ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.") notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+ " share the same ref and alt alleles. Default is to require exact match between variants.") lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops") base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config") procs := flag.Int("p", 2, "number of processes to use. default is 2") flag.Parse() inFiles := flag.Args() if len(inFiles) != 2 { fmt.Printf(`Usage: %s config.toml intput.vcf > annotated.vcf `, os.Args[0]) flag.PrintDefaults() return } queryFile := inFiles[1] if !(xopen.Exists(queryFile) || queryFile == "") { fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile) os.Exit(2) } runtime.GOMAXPROCS(*procs) var config Config if _, err := toml.DecodeFile(inFiles[0], &config); err != nil { panic(err) } config.Base = *base for _, a := range config.Annotation { err := CheckAnno(&a) if err != nil { log.Fatal("CheckAnno err:", err) } } for i := range config.PostAnnotation { r := config.PostAnnotation[i] err := CheckPostAnno(&r) if err != nil { log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err)) } } sources, e := config.Sources() if e != nil { log.Fatal(e) } log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation)) /* go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() */ luaString := ReadLua(*lua) strict := !*notstrict var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation) var out io.Writer = os.Stdout defer os.Stdout.Close() var err error qrdr, err := xopen.Ropen(queryFile) if err != nil { log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err)) } qstream, query, err := parsers.VCFIterator(qrdr) if err != nil { log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err)) } queryables, err := a.Setup(query) if err != nil { log.Fatal(err) } aends := INTERVAL if *ends { aends = BOTH } fn := func(v interfaces.Relatable) { e := a.AnnotateEnds(v, aends) if e != nil { log.Println(e) } } smaxGap := os.Getenv("IRELATE_MAX_GAP") maxGap := 20000 if smaxGap != "" { maxGap, err = strconv.Atoi(smaxGap) if err != nil { log.Printf("couldn't parse %s using %d\n", smaxGap, maxGap) } else { log.Printf("using maxGap of %d\n", maxGap) } } stream := irelate.PIRelate(8000, maxGap, qstream, *ends, fn, queryables...) // make a new writer from the string header. out, err = vcfgo.NewWriter(out, query.Header) if err != nil { log.Fatal(err) } start := time.Now() n := 0 if os.Getenv("IRELATE_PROFILE") == "TRUE" { log.Println("profiling to: irelate.pprof") f, err := os.Create("irelate.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } for interval := range stream { //log.Printf("%v\n", interval) fmt.Fprintln(out, interval) n++ } printTime(start, n) }
func main() { fmt.Fprintf(os.Stderr, ` ============================================= vcfanno version %s [built with %s] see: https://github.com/brentp/vcfanno ============================================= `, VERSION, runtime.Version()) ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.") notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+ " share the same ref and alt alleles. Default is to require exact match between variants.") js := flag.String("js", "", "optional path to a file containing custom javascript functions to be used as ops") base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config") procs := flag.Int("p", 2, "number of processes to use. default is 2") flag.Parse() inFiles := flag.Args() if len(inFiles) != 2 { fmt.Printf(`Usage: %s config.toml intput.vcf > annotated.vcf To run a server: %s server `, os.Args[0], os.Args[0]) flag.PrintDefaults() return } queryFile := inFiles[1] if !(xopen.Exists(queryFile)) { fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile) os.Exit(2) } if !(xopen.Exists(queryFile + ".tbi")) { fmt.Fprintf(os.Stderr, "\nERROR: can't find index for query file: %s\n", queryFile) os.Exit(2) } runtime.GOMAXPROCS(*procs) var config Config if _, err := toml.DecodeFile(inFiles[0], &config); err != nil { panic(err) } config.Base = *base for _, a := range config.Annotation { err := CheckAnno(&a) if err != nil { log.Fatal("CheckAnno err:", err) } } sources, e := config.Sources() if e != nil { log.Fatal(e) } log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation)) go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }() jsString := ReadJs(*js) strict := !*notstrict var a = NewAnnotator(sources, jsString, *ends, strict) var out io.Writer = os.Stdout defer os.Stdout.Close() var err error var bx interfaces.RelatableIterator b, err := bix.New(queryFile, 1) if err != nil { log.Fatal(fmt.Errorf("error opening query file and/or index: %s ... %s", queryFile, err)) } a.UpdateHeader(b) if err != nil { log.Fatal(err) } bx, err = b.Query(nil) if err != nil { log.Fatal(err) } files, err := a.SetupStreams() if err != nil { log.Fatal(err) } aends := INTERVAL if *ends { aends = BOTH } fn := func(v interfaces.Relatable) { a.AnnotateEnds(v, aends) } queryables := make([]interfaces.Queryable, len(files)) for i, f := range files { q, err := bix.New(f, 1) if err != nil { log.Fatal(err) } queryables[i] = q } stream := irelate.PIRelate(4000, 20000, bx, *ends, fn, queryables...) // make a new writer from the string header. out, err = vcfgo.NewWriter(out, b.VReader.Header) if err != nil { log.Fatal(err) } start := time.Now() n := 0 if os.Getenv("IRELATE_PROFILE") == "TRUE" { log.Println("profiling to: irelate.pprof") f, err := os.Create("irelate.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } for interval := range stream { fmt.Fprintf(out, "%s\n", interval) n++ } printTime(start, n) }