Beispiel #1
0
// writeResults writes out the results of the analysis in a format similar to the
// Pacific Biosciences bridgemapper program (29 tab separated fields). It also writes
// candidate discordances to the discords gff.Writer if it is not nil. Flanks less than
// flank long are not considered and primay mappings less than length long are omitted.
func writeResults(core, left, right hitSet, out io.Writer, length, flank int, discords *gff.Writer) error {
	for id, c := range core {
		if c.qEnd-c.qStart < length {
			continue
		}
		l, ok := left[id]
		if ok && abs(l.tEnd-l.tStart) < flank {
			l = nil
		}
		r, ok := right[id]
		if ok && abs(r.tEnd-r.tStart) < flank {
			r = nil
		}
		if l == nil && r == nil {
			continue
		}
		_, err := fmt.Fprintf(out, "%s\t%d\t%v\t%v\t%v\n", id, c.qLen, l, c, r)
		if err != nil {
			return err
		}
		if discords != nil {
			for _, f := range [2]*blasrHit{l, r} {
				if f == nil {
					continue
				}
				if f.tName != c.tName {
					_, err = discords.Write(&gff.Feature{
						SeqName:    f.tName,
						Feature:    "flank",
						Source:     "loopy",
						FeatStart:  f.tStart,
						FeatEnd:    f.tEnd,
						FeatScore:  floatPtr(float64(f.score)),
						FeatStrand: f.qStrand,
						FeatFrame:  gff.NoFrame,
					})
					if err != nil {
						return err
					}
				} else if f.tStrand == c.tStrand {
					for _, g := range gapOrOverlap(f, c, flank) {
						_, err = discords.Write(g)
						if err != nil {
							return err
						}
					}
				}
			}
		}
	}
	return nil
}
Beispiel #2
0
func main() {
	flag.Parse()
	if *exclude == "" {
		flag.Usage()
		os.Exit(1)
	}

	nameSet := make(map[string]struct{})
	f, err := os.Open(*exclude)
	if err != nil {
		log.Fatalf("failed to open exclude file %q: %v", *exclude, err)
	}
	ls := bufio.NewScanner(f)
	for ls.Scan() {
		nameSet[ls.Text()] = struct{}{}
	}
	err = ls.Err()
	if err != nil {
		log.Fatalf("failed to read exclude file: %v", err)
	}

	w := gff.NewWriter(os.Stdout, 60, true)
	var excl *gff.Writer
	if *retain {
		excl = gff.NewWriter(os.Stderr, 60, true)
	}
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		n := f.FeatAttributes.Get("Read")
		if _, ok := nameSet[n]; ok {
			if excl != nil {
				_, err := excl.Write(f)
				if err != nil {
					log.Fatalf("failed to write feature: %v", err)
				}
			}
			continue
		}
		_, err := w.Write(f)
		if err != nil {
			log.Fatalf("failed to write feature: %v", err)
		}
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
}
Beispiel #3
0
// deletions analyses *sam.Records from mapping reads to the given reference
// using the suffix array file if provided. If run is false, blasr is not
// run and the existing blasr output is used to provide the *sam.Records.
// procs specifies the number of blasr threads to use.
func deletions(reads, ref, suff, ext string, procs int, run bool, window, min int, br *refiner, w *gff.Writer) error {
	base := filepath.Base(reads)
	b := blasr.BLASR{
		Cmd: *blasrPath,

		Reads: reads, Genome: ref, SuffixArray: suff,
		BestN: 1,

		SAM:           true,
		Clipping:      "soft",
		SAMQV:         true,
		CIGARSeqMatch: true,

		Aligned:   base + ".blasr." + ext,
		Unaligned: base + ".blasr.unmapped.fasta",

		Procs: procs,
	}
	if run {
		cmd, err := b.BuildCommand()
		if err != nil {
			return err
		}
		cmd.Stdout = errStream
		cmd.Stderr = errStream
		err = cmd.Run()
		if err != nil {
			return err
		}
	}

	f, err := os.Open(b.Aligned)
	if err != nil {
		return err
	}
	defer f.Close()

	cost := [...]float64{
		sam.CigarInsertion: -2,
		sam.CigarDeletion:  -2,
		sam.CigarEqual:     1,
		sam.CigarMismatch:  -1,

		// Included for explicitness
		sam.CigarSoftClipped: 0,

		// Included to ensure no bounds panic.
		// All CIGAR operations not listed above
		// are given a zero cost.
		sam.CigarBack: 0,
	}

	_, err = w.WriteComment(fmt.Sprintf("smoothing window=%d", window))
	if err != nil {
		return nil
	}
	_, err = w.WriteComment(fmt.Sprintf("minimum feature length=%d", min))
	if err != nil {
		return nil
	}
	gf := &gff.Feature{
		Source:         "reefer",
		Feature:        "discordance",
		FeatFrame:      gff.NoFrame,
		FeatAttributes: gff.Attributes{{Tag: "Read"}, {Tag: "Dup"}},
	}
	var sr interface {
		Read() (*sam.Record, error)
	}
	switch ext {
	case "sam":
		sr, err = sam.NewReader(f)
		if err != nil {
			return err
		}
	case "bam":
		var br *bam.Reader
		br, err = bam.NewReader(f, 0)
		if err != nil {
			return err
		}
		defer br.Close()
		sr = br
	default:
		panic("reefer: invalid extension")
	}
	for {
		r, err := sr.Read()
		if err != nil {
			if err != io.EOF {
				return err
			}
			break
		}

		var (
			scores []costPos
			ref    = r.Start()
			query  int
		)
		for _, co := range r.Cigar {
			for i := 0; i < co.Len(); i++ {
				scores = append(scores, costPos{
					ref:   ref,
					query: query,
					cost:  cost[co.Type()],
				})
				consume := co.Type().Consumes()
				ref += consume.Reference
				query += consume.Query
			}
		}
		if len(scores) <= window {
			continue
		}
		smoothed := make([]costPos, len(scores)-window)
		for i := range scores[:len(scores)-window] {
			smoothed[i] = mean(scores[i : i+window])
		}

		var d deletion
		for i, v := range smoothed[1:] {
			switch {
			case d.record == nil && v.cost < 0 && smoothed[i].cost >= 0:
				d = deletion{record: r, rstart: v.ref + 1, qstart: v.query + 1}
			case d.record != nil && v.cost >= 0 && smoothed[i].cost < 0:
				d.rend = v.ref
				d.qend = v.query
				if d.rend-d.rstart >= min || d.qend-d.qstart >= min {
					gf.SeqName = d.record.Ref.Name()
					gf.FeatStrand = strandFor(d.record)
					if gf.FeatStrand == seq.Minus {
						len := d.record.Seq.Length
						d.qstart, d.qend = len-d.qend, len-d.qstart
					}

					// Adjust ends based on paired SW alignments.
					var refined bool
					d, refined, err = br.adjust(d)
					if err != nil && *verbose {
						log.Printf("failed alignment %s: %v", d.record.Name, err)
					}

					gf.FeatStart = d.rstart
					gf.FeatEnd = d.rend
					if gf.FeatStart == gf.FeatEnd {
						// This is disgusting garbage resulting from
						// GFF not allowing zero length features.
						gf.FeatEnd++
					}

					if refined {
						gf.FeatAttributes = gf.FeatAttributes[:2]
						gf.FeatAttributes[1].Value = strconv.Itoa(d.dup)
					} else {
						gf.FeatAttributes = gf.FeatAttributes[:1]
					}
					gf.FeatAttributes[0].Value = fmt.Sprintf("%s %d %d", d.record.Name, feat.ZeroToOne(d.qstart), d.qend)
					_, err = w.Write(gf)
					if err != nil {
						return err
					}
				}
				d.record = nil
			}
		}
	}
	return nil
}