示例#1
0
func benchmarkAnno(b *testing.B) {
	var configs shared.Config
	if _, err := toml.DecodeFile("example/conf.toml", &configs); err != nil {
		panic(err)
	}

	out := bufio.NewWriter(ioutil.Discard)
	Lua, _ := xopen.Ropen("example/custom.lua")
	lbytes, _ := ioutil.ReadAll(Lua)
	l_string := string(lbytes)

	srcs, err := configs.Sources()
	if err != nil {
		log.Fatal(err)
	}
	empty := make([]api.PostAnnotation, 0)
	for n := 0; n < b.N; n++ {
		if err != nil {
			log.Fatal(err)
		}
		a := api.NewAnnotator(srcs, l_string, false, true, empty)
		qrdr, err := xopen.Ropen("example/query.vcf.gz")
		if err != nil {
			log.Fatal(err)
		}
		qstream, query, err := parsers.VCFIterator(qrdr)
		queryables, err := a.Setup(query)
		if err != nil {
			log.Fatal(err)
		}

		//files := []string{"example/cadd.sub.txt.gz", "example/query.vcf.gz", "example/fitcons.bed.gz"}
		//files := []string{"example/query.vcf.gz", "example/fitcons.bed.gz"}

		fn := func(v interfaces.Relatable) {
			a.AnnotateEnds(v, api.INTERVAL)
		}
		stream := irelate.PIRelate(6000, 20000, qstream, false, fn, queryables...)

		for interval := range stream {
			fmt.Fprintf(out, "%s\n", interval)
		}
		out.Flush()
	}
}
示例#2
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use.")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml input.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		if strings.Contains(err.Error(), "Expected value but found") {
			fmt.Fprintln(os.Stderr, "\nNOTE: you must quote values in the conf file, e.g. fields=['AC', 'AN'] instead of fields=[AC, AN]")
		}
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
		for _, op := range a.Ops {
			if len(op) > 4 && op[:4] == "lua:" && *lua == "" {
				log.Fatal("ERROR: requested lua op without specifying -lua flag")
			}
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
		if len(r.Op) > 4 && r.Op[:4] == "lua:" && *lua == "" {
			log.Fatal("ERROR: requested lua op without specifying -lua flag")
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	lastMsg := struct {
		sync.RWMutex
		s [10]string
		i int
	}{}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			lastMsg.RLock()
			em := e.Error()
			found := false
			for i := len(lastMsg.s) - 1; i >= 0; i-- {
				if em == lastMsg.s[i] {
					found = true
					break
				}
			}
			if !found {
				log.Println(e, ">> this error/warning may occur many times. reporting once here...")
				lastMsg.RUnlock()
				lastMsg.Lock()
				lastMsg.s[lastMsg.i] = em
				if lastMsg.i == len(lastMsg.s)-1 {
					lastMsg.i = -1
				}
				lastMsg.i++

				lastMsg.Unlock()
			} else {
				lastMsg.RUnlock()
			}
		}
	}

	maxGap := envGet("IRELATE_MAX_GAP", 20000)
	maxChunk := envGet("IRELATE_MAX_CHUNK", 8000)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	stream := irelate.PIRelate(maxChunk, maxGap, qstream, *ends, fn, queryables...)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	/*
		if os.Getenv("IRELATE_PROFILE") == "TRUE" {
			log.Println("profiling to: irelate.pprof")
			f, err := os.Create("irelate.pprof")
			if err != nil {
				panic(err)
			}
			pprof.StartCPUProfile(f)
			defer pprof.StopCPUProfile()
		}
	*/

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}
示例#3
0
func main() {
	fmt.Fprintf(os.Stderr, `
=============================================
vcfanno version %s [built with %s]

see: https://github.com/brentp/vcfanno
=============================================
`, VERSION, runtime.Version())

	ends := flag.Bool("ends", false, "annotate the start and end as well as the interval itself.")
	notstrict := flag.Bool("permissive-overlap", false, "annotate with an overlapping variant even it doesn't"+
		" share the same ref and alt alleles. Default is to require exact match between variants.")
	lua := flag.String("lua", "", "optional path to a file containing custom javascript functions to be used as ops")
	base := flag.String("base-path", "", "optional base-path to prepend to annotation files in the config")
	procs := flag.Int("p", 2, "number of processes to use. default is 2")
	flag.Parse()
	inFiles := flag.Args()
	if len(inFiles) != 2 {
		fmt.Printf(`Usage:
%s config.toml intput.vcf > annotated.vcf

`, os.Args[0])
		flag.PrintDefaults()
		return
	}
	queryFile := inFiles[1]
	if !(xopen.Exists(queryFile) || queryFile == "") {
		fmt.Fprintf(os.Stderr, "\nERROR: can't find query file: %s\n", queryFile)
		os.Exit(2)
	}
	runtime.GOMAXPROCS(*procs)

	var config Config
	if _, err := toml.DecodeFile(inFiles[0], &config); err != nil {
		panic(err)
	}
	config.Base = *base
	for _, a := range config.Annotation {
		err := CheckAnno(&a)
		if err != nil {
			log.Fatal("CheckAnno err:", err)
		}
	}
	for i := range config.PostAnnotation {
		r := config.PostAnnotation[i]
		err := CheckPostAnno(&r)
		if err != nil {
			log.Fatal(fmt.Sprintf("error in postannotaion section %s err: %s", r.Name, err))
		}
	}

	sources, e := config.Sources()
	if e != nil {
		log.Fatal(e)
	}

	log.Printf("found %d sources from %d files\n", len(sources), len(config.Annotation))
	/*
		go func() {
			log.Println(http.ListenAndServe("localhost:6060", nil))
		}()
	*/

	luaString := ReadLua(*lua)
	strict := !*notstrict
	var a = NewAnnotator(sources, luaString, *ends, strict, config.PostAnnotation)

	var out io.Writer = os.Stdout
	defer os.Stdout.Close()

	var err error
	qrdr, err := xopen.Ropen(queryFile)
	if err != nil {
		log.Fatal(fmt.Errorf("error opening query file %s: %s", queryFile, err))
	}
	qstream, query, err := parsers.VCFIterator(qrdr)
	if err != nil {
		log.Fatal(fmt.Errorf("error parsing VCF query file %s: %s", queryFile, err))
	}

	queryables, err := a.Setup(query)
	if err != nil {
		log.Fatal(err)
	}
	aends := INTERVAL
	if *ends {
		aends = BOTH
	}

	fn := func(v interfaces.Relatable) {
		e := a.AnnotateEnds(v, aends)
		if e != nil {
			log.Println(e)
		}
	}

	smaxGap := os.Getenv("IRELATE_MAX_GAP")
	maxGap := 20000
	if smaxGap != "" {
		maxGap, err = strconv.Atoi(smaxGap)
		if err != nil {
			log.Printf("couldn't parse %s using %d\n", smaxGap, maxGap)
		} else {
			log.Printf("using maxGap of %d\n", maxGap)
		}
	}

	stream := irelate.PIRelate(8000, maxGap, qstream, *ends, fn, queryables...)

	// make a new writer from the string header.
	out, err = vcfgo.NewWriter(out, query.Header)

	if err != nil {
		log.Fatal(err)
	}

	start := time.Now()
	n := 0

	if os.Getenv("IRELATE_PROFILE") == "TRUE" {
		log.Println("profiling to: irelate.pprof")
		f, err := os.Create("irelate.pprof")
		if err != nil {
			panic(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	for interval := range stream {
		//log.Printf("%v\n", interval)
		fmt.Fprintln(out, interval)
		n++
	}
	printTime(start, n)
}