func BenchmarkRoundtripFileBoom(b *testing.B) {
	if *file == "" {
		b.Skip("no file specified")
	}

	for i := 0; i < b.N; i++ {
		br, err := boom.OpenBAM(*file)
		if err != nil {
			b.Fatalf("Open failed: %v", err)
		}
		f, err := os.OpenFile("/dev/null", os.O_APPEND|os.O_RDWR, 0666)
		if err != nil {
			b.Fatalf("Open failed: %v", err)
		}
		bw, err := boom.OpenBAMFile(f, "bw", br.Header())
		if err != nil {
			b.Fatalf("NewWriter failed: %v", err)
		}
		for {
			r, _, err := br.Read()
			if err != nil {
				break
			}
			_, err = bw.Write(r)
			if err != nil {
				b.Fatalf("Write failed: %v", err)
			}
		}
		br.Close()
		f.Close()
	}
}
Beispiel #2
0
func filterBam(in, out string, minId, minQ int, mapQb byte, minAvQ float64) error {
	bf, err := boom.OpenBAM(in)
	if err != nil {
		return err
	}
	defer bf.Close()

	bo, err := boom.CreateBAM(out, bf.Header(), true)
	if err != nil {
		return err
	}
	defer bo.Close()

	for {
		r, _, err := bf.Read()
		if err != nil {
			if err == io.EOF {
				return nil
			}
			return err
		}
		if r.Score() >= mapQb && qualOk(r, minId, minQ, minAvQ) {
			_, err = bo.Write(r)
			if err != nil {
				return err
			}
		}
	}
}
Beispiel #3
0
func main() {
	bf, err := boom.OpenBAM(in)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}
	defer bf.Close()
	names := bf.RefNames()

	var classFilt []interval.IntTree
	if annot != "" {
		classFilt, err = filterFeats(annot, classes, names, thresh)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}
	}

	var reads, totals int

	for {
		r, _, err := bf.Read()
		if err != nil {
			if err == io.EOF {
				break
			}
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}
		if r.Flags()&boom.Unmapped == 0 {
			if qualOk(r, minId, minQ, minAvQ) {
				totals++
				if classFilt != nil && len(classFilt[r.RefID()].Get(intBam{r})) == 0 {
					continue
				}
				if s := r.Score(); s >= mapQb && s != 0xff {
					reads++
				}
			}
		}
	}

	fmt.Println(in, reads, totals, float64(reads)/float64(totals))
}
func BenchmarkReadFileBoom(b *testing.B) {
	if *file == "" {
		b.Skip("no file specified")
	}

	for i := 0; i < b.N; i++ {
		br, err := boom.OpenBAM(*file)
		if err != nil {
			b.Fatalf("Open failed: %v", err)
		}
		for {
			_, _, err := br.Read()
			if err != nil {
				break
			}
		}
		br.Close()
	}
}
Beispiel #5
0
func checkNames(files set) ([]string, error) {
	var names []string
	for _, in := range files {
		bf, err := boom.OpenBAM(in)
		if err != nil {
			return nil, err
		}
		if names != nil {
			for i, n := range bf.RefNames() {
				if names[i] != n {
					return nil, errors.New("header mismatch")
				}
			}
		}
		names = bf.RefNames()
		bf.Close()
	}
	return names, nil
}
Beispiel #6
0
func main() {
	names, err := checkNames(reads)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	feats, err := annotFeats(annot, classes, names)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	var rc int
	for _, in := range reads {
		fmt.Fprintf(os.Stderr, "Reading %q\n", in)
		bf, err := boom.OpenBAM(in)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}

		for id := uintptr(0); ; id++ {
			r, _, err := bf.Read()
			if err != nil {
				if err == io.EOF {
					break
				}
				fmt.Fprintln(os.Stderr, err)
				os.Exit(1)
			}
			rc++
			if r.Flags()&boom.Unmapped == 0 {
				feats[r.RefID()].DoMatching(func(iv interval.IntInterface) (done bool) {
					f := iv.(intGff)
					if f.FeatScore == nil {
						f.FeatScore = new(float64)
					}
					*f.FeatScore += float64(r.Len())
					return
				}, intBam{r, id})
			}
		}

		bf.Close()
	}

	var exp gffFeatures
	for _, chr := range feats {
		chr.Do(func(iv interval.IntInterface) (done bool) {
			f := iv.(intGff)
			if f.FeatScore == nil {
				return
			}
			*f.FeatScore /= float64(rc) * float64(f.Len())
			exp = append(exp, f.Feature)
			return
		})
	}
	sort.Sort(exp)

	w := gff.NewWriter(os.Stdout, 60, false)
	for _, f := range exp {
		w.Write(f)
	}
}
Beispiel #7
0
func main() {
	names, err := checkNames(reads)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	feats, err := annotFeats(annot, classes, names)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	vm := make(map[string]*vector)
	fm := make(map[string]string)
	seen := make(map[*gff.Feature]struct{})

	for _, in := range reads {
		fmt.Fprintf(os.Stderr, "Reading %q\n", in)
		bf, err := boom.OpenBAM(in)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}

	loop:
		for {
			r, _, err := bf.Read()
			if err != nil {
				if err == io.EOF {
					break
				}
				fmt.Fprintln(os.Stderr, err)
				os.Exit(1)
			}

			switch filter {
			case all:
			case primary:
				if !isPrimary(r) {
					continue loop
				}
				if strict && isSecondary(r) {
					continue loop
				}
			case secondary:
				if !isSecondary(r) {
					continue loop
				}
				if strict && isPrimary(r) {
					continue loop
				}
			default:
				panic(fmt.Errorf("illegal filter %d", filter))
			}

			if r.Flags()&boom.Unmapped == 0 {
				feats[r.RefID()].DoMatching(func(iv interval.IntInterface) (done bool) {
					f := iv.(intGff)

					if _, ok := seen[f.Feature]; ok {
						return
					}
					seen[f.Feature] = struct{}{}

					if f.Feature.Feature != "repeat" {
						return
					}
					att := f.FeatAttributes.Get("repeat")
					if att == "" {
						return
					}
					fields := strings.Fields(att)

					s := mustAtoi(fields[2])
					e := mustAtoi(fields[3])
					v, ok := vm[fields[0]]
					if !ok {
						v = &vector{}
						vm[fields[0]] = v
						fm[fields[0]] = fields[1]
					}
					for i := e; i >= s; i-- {
						v.inc(i)
					}
					return
				}, intBam{r})
			}
		}

		bf.Close()
	}

	for typ, vec := range vm {
		for pos, val := range *vec {
			fmt.Printf("%s\t%s\t%d\t%d\n", fm[typ], typ, pos, val)
		}
	}
}
Beispiel #8
0
func main() {
	names, err := checkNames(reads)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	feats, err := annotFeats(annot, classes, names)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	vm := make(map[string]*vector)
	fm := make(map[string]string)

	for _, in := range reads {
		fmt.Fprintf(os.Stderr, "Reading %q\n", in)
		bf, err := boom.OpenBAM(in)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}

	loop:
		for {
			r, _, err := bf.Read()
			if err != nil {
				if err == io.EOF {
					break
				}
				fmt.Fprintln(os.Stderr, err)
				os.Exit(1)
			}

			switch filter {
			case all:
			case primary:
				if !isPrimary(r) {
					continue loop
				}
				if strict && isSecondary(r) {
					continue loop
				}
			case secondary:
				if !isSecondary(r) {
					continue loop
				}
				if strict && isPrimary(r) {
					continue loop
				}
			default:
				panic(fmt.Errorf("illegal filter %d", filter))
			}

			if r.Flags()&boom.Unmapped == 0 {
				feats[r.RefID()].DoMatching(func(iv interval.IntInterface) (done bool) {
					f := iv.(intGff)
					repeatFields := strings.Fields(f.FeatAttributes.Get("repeat"))
					if len(repeatFields) == 0 {
						return
					}
					from, err := strconv.Atoi(repeatFields[2])
					if err != nil {
						panic(err)
					}
					v, ok := vm[repeatFields[0]]
					if !ok {
						v = &vector{}
						vm[repeatFields[0]] = v
						fm[repeatFields[0]] = repeatFields[1]
					}
					for pos := r.Start(); pos < r.End(); pos++ {
						var loc int
						switch f.FeatStrand {
						case seq.Plus:
							loc = pos - f.FeatStart + from
						case seq.Minus:
							loc = f.FeatEnd - pos + from
						default:
							return
						}
						var st seq.Strand
						if r.Flags()&boom.Reverse != 0 {
							st = -f.FeatStrand
						} else {
							st = f.FeatStrand
						}
						v.inc(loc, st)
					}
					return
				}, intBam{r})
			}
		}

		bf.Close()
	}

	for typ, vec := range vm {
		for pos, val := range *vec {
			if val[0] != 0 {
				fmt.Printf("%s\t%s\tminus\t%d\t%d\n", fm[typ], typ, pos, val[0])
			}
			if val[1] != 0 {
				fmt.Printf("%s\t%s\tplus\t%d\t%d\n", fm[typ], typ, pos, val[1])
			}
		}
	}
}