Beispiel #1
0
func main() {
	flag.Parse()
	if *exclude == "" {
		flag.Usage()
		os.Exit(1)
	}

	nameSet := make(map[string]struct{})
	f, err := os.Open(*exclude)
	if err != nil {
		log.Fatalf("failed to open exclude file %q: %v", *exclude, err)
	}
	ls := bufio.NewScanner(f)
	for ls.Scan() {
		nameSet[ls.Text()] = struct{}{}
	}
	err = ls.Err()
	if err != nil {
		log.Fatalf("failed to read exclude file: %v", err)
	}

	w := gff.NewWriter(os.Stdout, 60, true)
	var excl *gff.Writer
	if *retain {
		excl = gff.NewWriter(os.Stderr, 60, true)
	}
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		n := f.FeatAttributes.Get("Read")
		if _, ok := nameSet[n]; ok {
			if excl != nil {
				_, err := excl.Write(f)
				if err != nil {
					log.Fatalf("failed to write feature: %v", err)
				}
			}
			continue
		}
		_, err := w.Write(f)
		if err != nil {
			log.Fatalf("failed to write feature: %v", err)
		}
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
}
Beispiel #2
0
func main() {
	flag.Parse()

	w := gff.NewWriter(os.Stdout, 60, false)
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		r := f.FeatAttributes.Get("Repeat")
		fields := strings.Fields(r)
		if len(fields) < 4 {
			log.Fatal("invalid repeat attribute")
		}
		end, err := strconv.Atoi(fields[3])
		if err != nil {
			log.Fatalf("failed to parse end coordinate: %v", err)
		}
		remainder, err := strconv.Atoi(fields[4])
		if err != nil {
			log.Fatalf("failed to parse remains coordinate: %v", err)
		}
		length := end + remainder
		if length < *thresh {
			continue
		}
		w.Write(f)
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
}
Beispiel #3
0
// NewWriter returns a new PALS writer that write PALS alignment features to the io.Writer w.
func NewWriter(w io.Writer, prec, width int, header bool) *Writer {
	gw := gff.NewWriter(w, width, header)
	gw.Precision = prec
	return &Writer{
		w: gw,
		t: &gff.Feature{Source: "pals", Feature: "hit"},
	}
}
Beispiel #4
0
func main() {
	flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters for breakpoint refinement")
	flag.Parse()
	if *reads == "" || (*ref == "" && *run) {
		fmt.Fprintln(os.Stderr, "invalid argument: must have reads, reference and block size set")
		flag.Usage()
		os.Exit(1)
	}

	var err error
	if *errFile != "" {
		errStream, err = os.Create(*errFile)
		if err != nil {
			// Oh, the irony.
			log.Fatalf("failed to create log file: %v", err)
		}
		defer errStream.Close()
		log.SetOutput(errStream)
	}

	// Set up breakpoint refiner.
	var br *refiner
	if *refine {
		refSeq, err := readContigs(*ref)
		if err != nil {
			log.Fatalf("failed to read reference sequences: %v", err)
		}
		br = &refiner{
			refWindow:   *refWindow,
			queryWindow: *queryWindow,
			minQueryGap: *minQueryGap,
			minRefFlank: *minRefFlank,
			ref:         refSeq,
			sw:          makeTable(alnmat),
		}
	}

	out := filepath.Base(*reads)
	f, err := os.Create(out + ".gff")
	if err != nil {
		log.Fatalf("failed to create GFF outfile: %q", out+".gff")
	}
	w := gff.NewWriter(f, 60, true)
	defer f.Close()
	log.Printf("finding alignments for reads in %q", *reads)
	ext := "sam"
	if *useBam && !*run {
		ext = "bam"
	}
	err = deletions(*reads, *ref, *suff, ext, *procs, *run, *window, *minSize, br, w)
	if err != nil {
		log.Fatalf("failed mapping: %v", err)
	}
}
Beispiel #5
0
func main() {
	names, err := checkNames(reads)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	feats, err := annotFeats(annot, classes, names)
	if err != nil {
		fmt.Fprintln(os.Stderr, err)
		os.Exit(1)
	}

	var rc int
	for _, in := range reads {
		fmt.Fprintf(os.Stderr, "Reading %q\n", in)
		bf, err := boom.OpenBAM(in)
		if err != nil {
			fmt.Fprintln(os.Stderr, err)
			os.Exit(1)
		}

		for id := uintptr(0); ; id++ {
			r, _, err := bf.Read()
			if err != nil {
				if err == io.EOF {
					break
				}
				fmt.Fprintln(os.Stderr, err)
				os.Exit(1)
			}
			rc++
			if r.Flags()&boom.Unmapped == 0 {
				feats[r.RefID()].DoMatching(func(iv interval.IntInterface) (done bool) {
					f := iv.(intGff)
					if f.FeatScore == nil {
						f.FeatScore = new(float64)
					}
					*f.FeatScore += float64(r.Len())
					return
				}, intBam{r, id})
			}
		}

		bf.Close()
	}

	var exp gffFeatures
	for _, chr := range feats {
		chr.Do(func(iv interval.IntInterface) (done bool) {
			f := iv.(intGff)
			if f.FeatScore == nil {
				return
			}
			*f.FeatScore /= float64(rc) * float64(f.Len())
			exp = append(exp, f.Feature)
			return
		})
	}
	sort.Sort(exp)

	w := gff.NewWriter(os.Stdout, 60, false)
	for _, f := range exp {
		w.Write(f)
	}
}
Beispiel #6
0
func main() {
	flag.Parse()
	if *in == "" || *ref == "" || *mapfile == "" || *contigs == "" {
		flag.Usage()
		os.Exit(0)
	}

	refTrees, err := readAnnotations(*ref)
	if err != nil {
		log.Fatalf("failed to read annotation trees: %v", err)
	}
	mapping, err := readMappings(*mapfile)
	if err != nil {
		log.Fatalf("failed to read mapping file: %v", err)
	}
	contigLength, err := readContigs(*contigs)
	if err != nil {
		log.Fatalf("failed to read contig file: %v", err)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}

	w := gff.NewWriter(os.Stdout, 60, true)

	sc := featio.NewScanner(gff.NewReader(f))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		ok, err := within(*buf, f.SeqName)
		if err != nil {
			log.Fatalf("failed to parse sequence name: %s: %v", f.SeqName, err)
		}
		if !ok {
			log.Printf("too close to read end: excluding %+v", f)
			continue
		}

		repeat := f.FeatAttributes.Get("Repeat")
		if repeat == "" {
			continue
		}
		fields := strings.Fields(repeat)

		name := strings.Split(f.SeqName, "//")
		if len(name) != 2 {
			log.Fatalf("unexpected sequence name in input: %q", f.SeqName)
		}
		contigSide, ok := mapping[name[0]]
		if !ok {
			log.Fatalf("unexpected sequence name in input: %q", f.SeqName)
		}
		if contigSide.FeatStart+f.FeatStart < *buf {
			log.Printf("too close to contig start:\n\texcluding %#v\n\tcontig %#v\n\n%d < %d", f, contigSide, contigSide.FeatStart, *buf)
			continue
		}
		length, ok := contigLength[contigSide.SeqName]
		if !ok {
			log.Fatalf("unexpected sequence name in contig mapping: %q", contigSide.SeqName)
		}
		if length-((contigSide.FeatEnd-contigSide.FeatStart)+f.FeatEnd) < *buf {
			log.Printf("too close to contig end:\n\texcluding %#v\n\tcontig %#v", f, contigSide)
			continue
		}
		t, ok := refTrees[contigSide.SeqName]
		if !ok {
			log.Fatalf("no tree for %v mapped by %v", contigSide.SeqName, f.SeqName)
		}
		var n int
		hits := t.Get(gffInterval{Feature: contigSide})
		for _, h := range hits {
			f := h.(gffInterval)
			repeat := f.FeatAttributes.Get("Repeat")
			if repeat == "" {
				continue
			}
			hitClass := strings.Fields(repeat)[1]
			if fields[1] == hitClass {
				n++
			}
		}
		if n != 0 {
			log.Printf("too many hits: excluding %+v", f)
			for _, h := range hits {
				log.Printf("\t%+v", h.(gffInterval).Feature)
			}
			continue
		}
		w.Write(f)
	}
	err = sc.Error()
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}
}
Beispiel #7
0
func main() {
	flag.Parse()
	if *reads == "" || *ref == "" {
		fmt.Fprintln(os.Stderr, "invalid argument: must have reads, reference and block size set")
		flag.Usage()
		os.Exit(1)
	}

	var err error
	if *errFile != "" {
		errStream, err = os.Create(*errFile)
		if err != nil {
			// Oh, the irony.
			log.Fatalf("failed to create log file: %v", err)
		}
		defer errStream.Close()
		log.SetOutput(errStream)
	}
	if *outFile != "" {
		outStream, err = os.Create(*outFile)
		if err != nil {
			log.Fatalf("failed to create out file: %v", err)
		}
		defer outStream.Close()
	}

	log.Printf("finding flanks of reads in %q", *reads)
	core, err := hitSetFrom(*reads, *ref, *suff, *procs, *run)
	if err != nil {
		log.Fatalf("failed initial mapping: %v", err)
	}

	// Prepare flank sequences and remap them.
	out := filepath.Base(*reads)
	leftSeqs := out + ".left.in.fa"
	rightSeqs := out + ".right.in.fa"

	log.Printf("writing flanks to %q and %q", leftSeqs, rightSeqs)
	err = writeFlankSeqs(*reads, core, *flank, leftSeqs, rightSeqs)
	if err != nil {
		log.Fatalf("failed to write flanks: %v", err)
	}

	log.Printf("remapping left flanks of reads from %q", leftSeqs)
	left, err := hitSetFrom(leftSeqs, *ref, *suff, *procs, *run)
	if err != nil {
		log.Fatalf("failed left flank remapping: %v", err)
	}

	log.Printf("remapping right flanks of reads from %q", rightSeqs)
	right, err := hitSetFrom(rightSeqs, *ref, *suff, *procs, *run)
	if err != nil {
		log.Fatalf("failed right flank remapping: %v", err)
	}

	var w *gff.Writer
	if *discords {
		f, err := os.Create(out + ".gff")
		if err != nil {
			log.Fatalf("failed to create GFF outfile: %q", out+".gff")
		}
		w = gff.NewWriter(f, 60, true)
		defer f.Close()
	}
	err = writeResults(core, left, right, outStream, *length, *flank, w)
	if err != nil {
		log.Fatalf("failed to write results: %v", err)
	}
}
Beispiel #8
0
func main() {
	flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters")
	flag.Parse()
	if *in == "" {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}
	events := make(map[string][]*gff.Feature)
	fsc := featio.NewScanner(gff.NewReader(f))
	for fsc.Next() {
		f := fsc.Feat().(*gff.Feature)
		fields := strings.Fields(f.FeatAttributes.Get("Read"))
		if len(fields) != 3 {
			log.Fatalf("bad record: %+v", f)
		}
		events[fields[0]] = append(events[fields[0]], f)
	}
	if err := fsc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
	f.Close()

	w := gff.NewWriter(os.Stdout, 60, true)
	w.WriteComment("Right coordinates (field 5) and strand (field 7) are hypothetical.")

	var out *os.File
	if *fastaOut != "" {
		out, err = os.Create(*fastaOut)
		if err != nil {
			log.Fatalf("failed to create fasta insertion output file %q: %v", *fastaOut, err)
		}
		defer out.Close()
	}

	hw := *window / 2
	sw := makeTable(alphabet.DNAgapped, alnmat)
	for _, ref := range flag.Args() {
		f, err = os.Open(ref)
		if err != nil {
			log.Fatalf("failed to open reference %q: %v", ref, err)
		}
		ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNAgapped)))
	loop:
		for ssc.Next() {
			seq := ssc.Seq().(*linear.Seq)
			for _, f := range events[seq.Name()] {
				fields := strings.Fields(f.FeatAttributes.Get("Read"))
				if len(fields) != 3 {
					log.Fatalf("bad record: %+v", f)
				}
				start, err := strconv.Atoi(fields[1])
				if err != nil {
					log.Fatalf("failed to get start coordinate: %v", err)
				}
				end, err := strconv.Atoi(fields[2])
				if err != nil {
					log.Fatalf("failed to get end coordinate: %v", err)
				}

				if out != nil {
					insert := *seq
					if insert.Desc != "" {
						insert.Desc += " "
					}
					insert.Desc += fmt.Sprintf("[%d,%d)", start, end)
					insert.Seq = insert.Seq[start:end]
					fmt.Fprintf(out, "%60a\n", &insert)
				}

				var lOff, lEnd, rOff, rEnd int
				// If we have refined ends, use them.
				if dup := f.FeatAttributes.Get("Dup"); dup != "" {
					d, err := strconv.Atoi(dup)
					if err != nil {
						log.Fatalf("failed to get duplication length: %v", err)
					}
					lOff = max(0, start-d)
					lEnd = start
					rOff = end
					rEnd = min(len(seq.Seq), end+d)
				} else {
					lOff = max(0, start-hw)
					lEnd = min(len(seq.Seq), start+hw)
					rOff = max(0, end-hw)
					rEnd = min(len(seq.Seq), end+hw)

					// Ensure windows don't overlap.
					if lEnd > rOff {
						lEnd = (lEnd + rOff) / 2
						rOff = lEnd
					}
				}

				if lEnd-lOff < *thresh || rEnd-rOff < *thresh {
					// Don't do fruitless work.
					continue loop
				}

				left := *seq
				left.ID = "prefix"
				left.Seq = left.Seq[lOff:lEnd]
				right := *seq
				right.ID = "postfix"
				right.Seq = right.Seq[rOff:rEnd]

				aln, err := sw.Align(&right, &left)
				if err != nil {
					log.Fatal(err)
				}

				fa := align.Format(&right, &left, aln, '-')
				for _, seg := range fa {
					var n int
					for _, l := range seg.(alphabet.Letters) {
						if l != '-' {
							n++
						}
					}
					if n < *thresh {
						continue loop
					}
				}

				var sc int
				for _, seg := range aln {
					type scorer interface {
						Score() int
					}
					sc += seg.(scorer).Score()
				}
				f.FeatAttributes = append(f.FeatAttributes, gff.Attribute{
					Tag: "TSD", Value: fmt.Sprintf(`%v %d %d %v "%v" %d`,
						fa[0], aln[len(aln)-1].Features()[0].End()+lOff,
						aln[0].Features()[1].Start()+rOff, fa[1],
						aln, sc),
				})
				w.Write(f)
			}
		}
		if err := ssc.Error(); err != nil {
			log.Fatalf("error during fasta read: %v", err)
		}
		f.Close()
	}
}