Ejemplo n.º 1
0
func New(path string, n ...int) (*Tbx, error) {
	if !(xopen.Exists(path) && (xopen.Exists(path+".tbi") || xopen.Exists(path+".csi"))) {
		return nil, fmt.Errorf("need gz file and .tbi for %s", path)
	}
	size := 20
	if len(n) > 0 {
		size = n[0]
	}

	t := &Tbx{path: path}
	cs, mode := C.CString(t.path), C.char('r')
	defer C.free(unsafe.Pointer(cs))

	t.htfs = make(chan *C.htsFile, size)
	for i := 0; i < cap(t.htfs); i++ {
		t.htfs <- C.hts_open(cs, &mode)
	}

	t.kCache = make(chan C.kstring_t, size*2)
	for i := 0; i < cap(t.kCache); i++ {
		t.kCache <- C.kstring_t{}
	}
	t.chromCache = make(map[string]C.int)

	if xopen.Exists(path + ".csi") {
		csi := C.CString(path + ".csi")
		defer C.free(unsafe.Pointer(csi))
		t.tbx = C.tbx_index_load2(cs, csi)
	} else {
		t.tbx = C.tbx_index_load(cs)
	}
	runtime.SetFinalizer(t, _close)

	return t, nil
}
Ejemplo n.º 2
0
// Flatten turns an annotation into a slice of Sources. Pass in the index of the file.
// having it as a Source makes the code cleaner, but it's simpler for the user to
// specify multiple ops per file in the toml config.
func (a *Annotation) Flatten(index int, basepath string) ([]*Source, error) {
	if len(a.Ops) == 0 {
		if !strings.HasSuffix(a.File, ".bam") {
			return nil, fmt.Errorf("no ops specified for %s\n", a.File)
		}
		// auto-fill bam to count.
		a.Ops = make([]string, len(a.Names))
		for i := range a.Names {
			a.Ops[i] = "count"
		}
	}
	if len(a.Columns) == 0 && len(a.Fields) == 0 {
		if !strings.HasSuffix(a.File, ".bam") {
			return nil, fmt.Errorf("no columns or fields specified for %s\n", a.File)
		}
		// auto-fill bam to count.
		if len(a.Fields) == 0 {
			a.Columns = make([]int, len(a.Names))
			for i := range a.Names {
				a.Columns[i] = 1
			}
		}
	}
	if !(xopen.Exists(a.File) || a.File == "-") {
		if basepath != "" {
			a.File = basepath + "/" + a.File
		}
		if !(xopen.Exists(a.File) || a.File == "-") {
			return nil, fmt.Errorf("[Flatten] unable to open file: %s in %s\n", a.File, basepath)
		}
	}

	n := len(a.Ops)
	sources := make([]*Source, n)
	for i := 0; i < n; i++ {
		isjs := strings.HasPrefix(a.Ops[i], "js:")
		if !isjs {
			if _, ok := Reducers[a.Ops[i]]; !ok {
				return nil, fmt.Errorf("requested op not found: %s for %s\n", a.Ops[i], a.File)
			}
		}
		op := a.Ops[i]
		if len(a.Names) == 0 {
			a.Names = a.Fields
		}
		sources[i] = &Source{File: a.File, Op: op, Name: a.Names[i], Index: index}
		if nil != a.Fields {
			sources[i].Field = a.Fields[i]
			sources[i].Column = -1
		} else {
			sources[i].Column = a.Columns[i]
		}
	}
	return sources, nil
}
Ejemplo n.º 3
0
// New takes a path to a bgziped (and tabixed file) and returns the tabix struct.
func New(path string) (*Tabix, error) {
	if !(xopen.Exists(path) && xopen.Exists(path+".tbi")) {
		return nil, fmt.Errorf("need gz file and .tbi for %s", path)
	}

	t := &Tabix{path: path}
	cs := C.CString(t.path)
	defer C.free(unsafe.Pointer(cs))
	mode := C.char('r')
	t.htf = C.hts_open(cs, &mode)
	t.tbx = C.tbx_index_load(cs)
	runtime.SetFinalizer(t, tabixCloser)
	if strings.HasSuffix(path, ".bed.gz") {
		t.typ = BED
	} else if strings.HasSuffix(path, ".vcf.gz") {
		t.hdr = C.bcf_hdr_read(t.htf)
		t.typ = VCF
	} else {
		t.typ = OTHER
	}
	t.original_header = true
	return t, nil
}
Ejemplo n.º 4
0
func (c Config) Sources() ([]*Source, error) {
	annos := c.Annotation
	for i, a := range annos {
		if !xopen.Exists(a.File) && a.File != "-" {
			a.File = c.Base + "/" + a.File
			annos[i] = a
		}
	}
	s := make([]*Source, 0)
	for i, a := range annos {
		flats, err := a.Flatten(i, c.Base)
		if err != nil {
			return nil, err
		}
		s = append(s, flats...)
	}
	return s, nil
}
Ejemplo n.º 5
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use.")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml input.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		if strings.Contains(err.Error(), "Expected value but found") {
			fmt.Fprintln(os.Stderr, "\nNOTE: you must quote values in the conf file, e.g. fields=['AC', 'AN'] instead of fields=[AC, AN]")
		}
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
		for _, op := range a.Ops {
			if len(op) > 4 && op[:4] == "lua:" && *lua == "" {
				log.Fatal("ERROR: requested lua op without specifying -lua flag")
			}
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
		if len(r.Op) > 4 && r.Op[:4] == "lua:" && *lua == "" {
			log.Fatal("ERROR: requested lua op without specifying -lua flag")
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	lastMsg := struct {
		sync.RWMutex
		s [10]string
		i int
	}{}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			lastMsg.RLock()
			em := e.Error()
			found := false
			for i := len(lastMsg.s) - 1; i >= 0; i-- {
				if em == lastMsg.s[i] {
					found = true
					break
				}
			}
			if !found {
				log.Println(e, ">> this error/warning may occur many times. reporting once here...")
				lastMsg.RUnlock()
				lastMsg.Lock()
				lastMsg.s[lastMsg.i] = em
				if lastMsg.i == len(lastMsg.s)-1 {
					lastMsg.i = -1
				}
				lastMsg.i++

				lastMsg.Unlock()
			} else {
				lastMsg.RUnlock()
			}
		}
	}

	maxGap := envGet("IRELATE_MAX_GAP", 20000)
	maxChunk := envGet("IRELATE_MAX_CHUNK", 8000)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	stream := irelate.PIRelate(maxChunk, maxGap, qstream, *ends, fn, queryables...)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	/*
		if os.Getenv("IRELATE_PROFILE") == "TRUE" {
			log.Println("profiling to: irelate.pprof")
			f, err := os.Create("irelate.pprof")
			if err != nil {
				panic(err)
			}
			pprof.StartCPUProfile(f)
			defer pprof.StopCPUProfile()
		}
	*/

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}
Ejemplo n.º 6
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use. default is 2")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml intput.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			log.Println(e)
		}
	}

	smaxGap := os.Getenv("IRELATE_MAX_GAP")
	maxGap := 20000
	if smaxGap != "" {
		maxGap, err = strconv.Atoi(smaxGap)
		if err != nil {
			log.Printf("couldn't parse %s using %d\n", smaxGap, maxGap)
		} else {
			log.Printf("using maxGap of %d\n", maxGap)
		}
	}

	stream := irelate.PIRelate(8000, maxGap, qstream, *ends, fn, queryables...)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	if os.Getenv("IRELATE_PROFILE") == "TRUE" {
		log.Println("profiling to: irelate.pprof")
		f, err := os.Create("irelate.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}
Ejemplo n.º 7
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	js := flag.String("js", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use. default is 2")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml intput.vcf > annotated.vcf

To run a server:

%s server

`, os.Args[0], os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile)) {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	if !(xopen.Exists(queryFile + ".tbi")) {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find index for query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
	}
	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()

	jsString := ReadJs(*js)
	strict := !*notstrict
	var a = NewAnnotator(sources, jsString, *ends, strict)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	var bx interfaces.RelatableIterator
	b, err := bix.New(queryFile, 1)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file and/or index: %s ... %s", queryFile, err))
	}

	a.UpdateHeader(b)

	if err != nil {
		log.Fatal(err)
	}
	bx, err = b.Query(nil)
	if err != nil {
		log.Fatal(err)
	}

	files, err := a.SetupStreams()
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	fn := func(v interfaces.Relatable) {
		a.AnnotateEnds(v, aends)
	}

	queryables := make([]interfaces.Queryable, len(files))
	for i, f := range files {
		q, err := bix.New(f, 1)
		if err != nil {
			log.Fatal(err)
		}
		queryables[i] = q
	}
	stream := irelate.PIRelate(4000, 20000, bx, *ends, fn, queryables...)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, b.VReader.Header)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	if os.Getenv("IRELATE_PROFILE") == "TRUE" {
		log.Println("profiling to: irelate.pprof")
		f, err := os.Create("irelate.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	for interval := range stream {
		fmt.Fprintf(out, "%s\n", interval)
		n++
	}
	printTime(start, n)
}