func (s *VariantSuite) TestVariantGetInt(c *C) { rdr, err := vcfgo.NewReader(s.reader, true) c.Assert(err, IsNil) v := rdr.Read().(*vcfgo.Variant) ns, err := v.Info_.Get("NS") c.Assert(err, IsNil) c.Assert(ns, Equals, 3) dp, err := v.Info_.Get("DP") c.Assert(dp, Equals, 14) c.Assert(err, IsNil) nsf, err := v.Info_.Get("NS") c.Assert(err, IsNil) c.Assert(nsf, Equals, int(3)) dpf, err := v.Info_.Get("DP") c.Assert(err, IsNil) c.Assert(dpf, Equals, int(14)) hqs, err := v.Info_.Get("AF") c.Assert(hqs, DeepEquals, []float32{0.5}) c.Assert(err, IsNil) dpfs, err := v.Info_.Get("DP") c.Assert(err, IsNil) c.Assert(dpfs, DeepEquals, 14) }
func (s *VariantSuite) TestInfoField(c *C) { rdr, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := rdr.Read().(*vcfgo.Variant) vstr := fmt.Sprintf("%s", v.Info_) c.Assert(vstr, Equals, "NS=3;DP=14;AF=0.5;DB;H2") }
func main() { f, err := xopen.Ropen(os.Args[1]) check(err) vcf, err := vcfgo.NewReader(f, true) check(err) tbx, err := bix.New(os.Args[1]) check(err) var rdr io.Reader tot := 0 t0 := time.Now() for { v := vcf.Read() if v == nil { break } rdr, err = tbx.ChunkedReader(location{v.Chrom(), int(v.Start()), int(v.Start()) + 1}) check(err) brdr := bufio.NewReader(rdr) for _, err := brdr.ReadString('\n'); err == nil; _, err = brdr.ReadString('\n') { tot += 1 } } log.Println(tot, time.Since(t0).Seconds()) main2() }
func (s *VariantSuite) TestStartEnd(c *C) { rdr, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := rdr.Read() c.Assert(int(v.Start()), Equals, 14369) c.Assert(int(v.End()), Equals, 14370) }
func (s *HeaderSuite) TestReaderHeaderParseSample(c *C) { r, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := r.Read().(*vcfgo.Variant) c.Assert(r.Error(), IsNil) fmt := v.Format c.Assert(fmt, DeepEquals, []string{"GT", "GQ", "DP", "HQ"}) }
func (s *RegressionSuite) TestRegr2(c *C) { rdr, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := rdr.Read().(*vcfgo.Variant) v = rdr.Read().(*vcfgo.Variant) str := fmt.Sprintf("%s", v) c.Assert(str, Equals, "1\t98685\t.\tG\tA\t610.5\t.\tAB=0;ABP=0") }
func (s *CNVSuite) TestDupIns(c *C) { r, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) var v *vcfgo.Variant v = r.Read().(*vcfgo.Variant) c.Assert(int(v.End()), Equals, 321887) left, right, ok := v.CIPos() c.Assert(ok, Equals, true) c.Assert(int(left), Equals, 321682-56-1) c.Assert(int(right), Equals, 321682+20) eleft, eright, ok := v.CIEnd() c.Assert(ok, Equals, true) c.Assert(int(eleft), Equals, 321887-10-1) c.Assert(int(eright), Equals, 321887+62) v = r.Read().(*vcfgo.Variant) c.Assert(int(v.End()), Equals, 14477381) v = r.Read().(*vcfgo.Variant) c.Assert(int(v.Start()), Equals, 9425915) c.Assert(int(v.End()), Equals, 9425916) v = r.Read().(*vcfgo.Variant) c.Assert(int(v.End()), Equals, 12686200) v = r.Read().(*vcfgo.Variant) c.Assert(int(v.End()), Equals, 18665204) v = r.Read().(*vcfgo.Variant) // INS c.Assert(int(v.End()), Equals, 18665204) v = r.Read().(*vcfgo.Variant) // CNV c.Assert(int(v.End()), Equals, 18665204) v = r.Read().(*vcfgo.Variant) // CNV c.Assert(int(v.End()), Equals, 43266825) left, right, ok = v.CIPos() c.Assert(left, Equals, v.Start()) c.Assert(right, Equals, v.Start()+uint32(1)) c.Assert(ok, Equals, false) eleft, eright, ok = v.CIEnd() c.Assert(eleft, Equals, v.End()-1) c.Assert(right, Equals, v.End()) c.Assert(ok, Equals, false) v = r.Read().(*vcfgo.Variant) // BND c.Assert(int(v.Start()), Equals, 755891) c.Assert(int(v.End()), Equals, 755891+1) }
func (s *RegressionSuite) TestRegr1(c *C) { rdr, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := rdr.Read().(*vcfgo.Variant) snp, err := v.Info().Get("TYPE") c.Assert(err, IsNil) c.Assert(snp, DeepEquals, []string{"snp"}) str := fmt.Sprintf("%s", v) c.Assert(str, Equals, "1\t98683\t.\tG\tA\t610.5\t.\tAB=0.282443;ABP=56.8661;AC=11;AF=0.34375;AN=32;AO=45;CIGAR=1X;TYPE=snp;XX=0.44,0.88") }
func main2() { /* f, err := os.Create("q.pprof") if err != nil { panic(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() */ t, err := cgotbx.New(os.Args[1]) if err != nil { log.Fatal(err) } var rdr io.Reader for i := 0; i < 1000; i++ { tot := 0 f, err := xopen.Ropen(os.Args[1]) check(err) vcf, err := vcfgo.NewReader(f, true) check(err) t0 := 0 for { v := vcf.Read() if v == nil { break } ts := time.Now() for k := 0; k < 100; k++ { rdr, err = t.Get(v.Chrom(), int(v.Start()), int(v.Start())+1) check(err) brdr := bufio.NewReader(rdr) //fmt.Fprintln(os.Stderr, v.Chrom(), v.Start(), v.Start()+1) j := 0 for l, err := brdr.ReadString('\n'); err == nil; l, err = brdr.ReadString('\n') { //fmt.Fprintln(os.Stderr, "...", l[:20]) _ = l tot += 1 j += 1 } if j == 0 { log.Fatal("should have found something") } } t0 += int(time.Since(ts).Nanoseconds()) } log.Println(tot, float64(t0)*1e-9) } }
func Example() { f, _ := os.Open("examples/test.auto_dom.no_parents.vcf") rdr, err := vcfgo.NewReader(f, false) if err != nil { panic(err) } for { variant := rdr.Read().(*vcfgo.Variant) if variant == nil { break } fmt.Printf("%s\t%d\t%s\t%s\n", variant.Chromosome, variant.Pos, variant.Ref, variant.Alt) dp, _ := variant.Info().Get("DP") fmt.Printf("%v", dp.(int) > 10) // Output: asdf } // Print all accumulated errors to stderr fmt.Fprintln(os.Stderr, rdr.Error()) }
func (s *VariantSuite) TestInfoMap(c *C) { rdr, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := rdr.Read().(*vcfgo.Variant) vstr := fmt.Sprintf("%s", v) c.Assert(vstr, Equals, "20\t14370\trs6054257\tG\tA\t29.0\tPASS\tNS=3;DP=14;AF=0.5;DB;H2\tGT:GQ:DP:HQ\t0|0:48:1:51,51\t1|0:48:8:51,51\t1/1:43:5:.,.") v.Info_.Set("asdf", 123) v.Info_.Set("float", 123.2001) has, err := v.Info_.Get("asdf") c.Assert(has, Equals, 123) val, err := v.Info_.Get("float") vv, ok := val.(float64) c.Assert(ok, Equals, true) c.Assert(vv-123.2001 < 1e-4 || 123.2001-vv < 1e-4, Equals, true) c.Assert(err, IsNil) c.Assert(fmt.Sprintf("%s", v.Info_), Equals, "NS=3;DP=14;AF=0.5;DB;H2;asdf=123;float=123.2001") rdr.Clear() }
func (s *HeaderSuite) TestSamples(c *C) { r, err := vcfgo.NewReader(s.reader, false) c.Assert(err, IsNil) v := r.Read().(*vcfgo.Variant) samp := v.Samples[0] c.Assert(samp.DP, Equals, 1) c.Assert(samp.GQ, Equals, 48) f, err := v.GetGenotypeField(samp, "HQ", -1) c.Assert(err, IsNil) c.Assert(f, DeepEquals, []int{51, 51}) samp2 := v.Samples[2] f, err = v.GetGenotypeField(samp2, "HQ", -1) c.Assert(err, IsNil) c.Assert(f, DeepEquals, []int{-1, -1}) c.Assert(samp.GT, DeepEquals, []int{0, 0}) c.Assert(samp.Phased, DeepEquals, true) c.Assert(samp2.GT, DeepEquals, []int{1, 1}) c.Assert(samp2.Phased, DeepEquals, false) var lastV *vcfgo.Variant var vv interfaces.IVariant for vv = r.Read(); vv != nil; vv = r.Read() { v := vv.(*vcfgo.Variant) if v == nil { break } c.Assert(v.Chromosome, Equals, "20") lastV = v } c.Assert(int(lastV.Pos), Equals, int(1234567)) c.Assert(lastV.Filter, Equals, "PASS") }
func (b *BadVcfSuite) TestReaderHeaderParseSample(c *C) { r, err := vcfgo.NewReader(b.reader, false) c.Assert(r, IsNil) c.Assert(err, NotNil) }
// New returns a &Bix func New(path string, workers ...int) (*Bix, error) { f, err := os.Open(path + ".tbi") if err != nil { return nil, err } defer f.Close() gz, err := gzip.NewReader(f) if err != nil { return nil, err } defer gz.Close() idx, err := tabix.ReadFrom(gz) if err != nil { return nil, err } n := 1 if len(workers) > 0 { n = workers[0] } b, err := os.Open(path) if err != nil { return nil, err } bgz, err := bgzf.NewReader(b, n) if err != nil { return nil, err } var h []string tbx := &Bix{bgzf: bgz, path: path, file: b} buf := bufio.NewReader(bgz) l, err := buf.ReadString('\n') if err != nil { return tbx, err } for i := 0; i < int(idx.Skip) || rune(l[0]) == idx.MetaChar; i++ { h = append(h, l) l, err = buf.ReadString('\n') if err != nil { return tbx, err } } header := strings.Join(h, "") if len(h) > 0 && strings.HasSuffix(tbx.path, ".vcf.gz") { var err error h := strings.NewReader(header) tbx.VReader, err = vcfgo.NewReader(h, true) if err != nil { return nil, err } } else if len(h) > 0 { htab := strings.Split(strings.TrimSpace(h[len(h)-1]), "\t") // try to find ref and alternate columns to make an IREFALT for i, hdr := range htab { if l := strings.ToLower(hdr); l == "ref" || l == "reference" { tbx.refalt = append(tbx.refalt, i) break } } for i, hdr := range htab { if l := strings.ToLower(hdr); l == "alt" || l == "alternate" { tbx.refalt = append(tbx.refalt, i) break } } if len(tbx.refalt) != 2 { tbx.refalt = nil } } tbx.buf = buf tbx.Index = idx return tbx, nil }
func Vopen(rdr io.Reader, hdr *vcfgo.Header) (*vcfgo.Reader, error) { if hdr == nil { return vcfgo.NewReader(rdr, true) } return vcfgo.NewWithHeader(rdr, hdr, true) }
func main() { flag.Parse() files := flag.Args() f, err := os.Open(files[0]) r := io.Reader(f) vr, err := vcfgo.NewReader(r, false) if err != nil { panic(err) } fmt.Printf("%v\n", vr) variant := vr.Read() fmt.Println(vr.Error()) fmt.Println("variant:", variant) if len(variant.Samples) > 0 { if _, ok := vr.Header.SampleFormats["PL"]; ok { if vr.Header.SampleFormats["PL"].Type == "Integer" { fmt.Println(variant.GetGenotypeField(variant.Samples[0], "PL", int(-1))) } else { fmt.Println(variant.GetGenotypeField(variant.Samples[0], "PL", float32(-1))) } } } fmt.Println(vr.Error()) vr.Clear() for { variant = vr.Read() if variant == nil { if e := vr.Error(); e != io.EOF && e != nil { vr.Clear() } break } if vr.Error() != nil { fmt.Println(vr.Error()) } vr.Clear() if len(variant.Samples) > 0 { var pl interface{} if _, ok := vr.Header.SampleFormats["PL"]; ok { if vr.Header.SampleFormats["PL"].Type == "Integer" { pl, err = variant.GetGenotypeField(variant.Samples[0], "PL", int(-1)) } else { pl, err = variant.GetGenotypeField(variant.Samples[0], "PL", float32(-1)) } } fmt.Println("ERR:", err) fmt.Println(variant.Samples[0]) if err != nil && variant.Samples[0] != nil { log.Println("BBBBBBBBBBBBBBBBBBB") if _, ok := vr.Header.SampleFormats["PL"]; ok { fmt.Println("\n") fmt.Println(variant.Samples[0]) log.Println("DDDDDDDDDDDDDDDDD") log.Fatal(err) } } if variant.Samples[0] != nil { fmt.Println("PL:", pl, "GQ:", variant.Samples[0].GQ, "DP:", variant.Samples[0].DP) } } } fmt.Println("OK") }