func main() { flag.Parse() if *exclude == "" { flag.Usage() os.Exit(1) } nameSet := make(map[string]struct{}) f, err := os.Open(*exclude) if err != nil { log.Fatalf("failed to open exclude file %q: %v", *exclude, err) } ls := bufio.NewScanner(f) for ls.Scan() { nameSet[ls.Text()] = struct{}{} } err = ls.Err() if err != nil { log.Fatalf("failed to read exclude file: %v", err) } w := gff.NewWriter(os.Stdout, 60, true) var excl *gff.Writer if *retain { excl = gff.NewWriter(os.Stderr, 60, true) } sc := featio.NewScanner(gff.NewReader(os.Stdin)) for sc.Next() { f := sc.Feat().(*gff.Feature) n := f.FeatAttributes.Get("Read") if _, ok := nameSet[n]; ok { if excl != nil { _, err := excl.Write(f) if err != nil { log.Fatalf("failed to write feature: %v", err) } } continue } _, err := w.Write(f) if err != nil { log.Fatalf("failed to write feature: %v", err) } } if err := sc.Error(); err != nil { log.Fatalf("error during gff read: %v", err) } }
func main() { flag.Parse() w := gff.NewWriter(os.Stdout, 60, false) sc := featio.NewScanner(gff.NewReader(os.Stdin)) for sc.Next() { f := sc.Feat().(*gff.Feature) r := f.FeatAttributes.Get("Repeat") fields := strings.Fields(r) if len(fields) < 4 { log.Fatal("invalid repeat attribute") } end, err := strconv.Atoi(fields[3]) if err != nil { log.Fatalf("failed to parse end coordinate: %v", err) } remainder, err := strconv.Atoi(fields[4]) if err != nil { log.Fatalf("failed to parse remains coordinate: %v", err) } length := end + remainder if length < *thresh { continue } w.Write(f) } if err := sc.Error(); err != nil { log.Fatalf("error during gff read: %v", err) } }
// NewWriter returns a new PALS writer that write PALS alignment features to the io.Writer w. func NewWriter(w io.Writer, prec, width int, header bool) *Writer { gw := gff.NewWriter(w, width, header) gw.Precision = prec return &Writer{ w: gw, t: &gff.Feature{Source: "pals", Feature: "hit"}, } }
func main() { flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters for breakpoint refinement") flag.Parse() if *reads == "" || (*ref == "" && *run) { fmt.Fprintln(os.Stderr, "invalid argument: must have reads, reference and block size set") flag.Usage() os.Exit(1) } var err error if *errFile != "" { errStream, err = os.Create(*errFile) if err != nil { // Oh, the irony. log.Fatalf("failed to create log file: %v", err) } defer errStream.Close() log.SetOutput(errStream) } // Set up breakpoint refiner. var br *refiner if *refine { refSeq, err := readContigs(*ref) if err != nil { log.Fatalf("failed to read reference sequences: %v", err) } br = &refiner{ refWindow: *refWindow, queryWindow: *queryWindow, minQueryGap: *minQueryGap, minRefFlank: *minRefFlank, ref: refSeq, sw: makeTable(alnmat), } } out := filepath.Base(*reads) f, err := os.Create(out + ".gff") if err != nil { log.Fatalf("failed to create GFF outfile: %q", out+".gff") } w := gff.NewWriter(f, 60, true) defer f.Close() log.Printf("finding alignments for reads in %q", *reads) ext := "sam" if *useBam && !*run { ext = "bam" } err = deletions(*reads, *ref, *suff, ext, *procs, *run, *window, *minSize, br, w) if err != nil { log.Fatalf("failed mapping: %v", err) } }
func main() { names, err := checkNames(reads) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } feats, err := annotFeats(annot, classes, names) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } var rc int for _, in := range reads { fmt.Fprintf(os.Stderr, "Reading %q\n", in) bf, err := boom.OpenBAM(in) if err != nil { fmt.Fprintln(os.Stderr, err) os.Exit(1) } for id := uintptr(0); ; id++ { r, _, err := bf.Read() if err != nil { if err == io.EOF { break } fmt.Fprintln(os.Stderr, err) os.Exit(1) } rc++ if r.Flags()&boom.Unmapped == 0 { feats[r.RefID()].DoMatching(func(iv interval.IntInterface) (done bool) { f := iv.(intGff) if f.FeatScore == nil { f.FeatScore = new(float64) } *f.FeatScore += float64(r.Len()) return }, intBam{r, id}) } } bf.Close() } var exp gffFeatures for _, chr := range feats { chr.Do(func(iv interval.IntInterface) (done bool) { f := iv.(intGff) if f.FeatScore == nil { return } *f.FeatScore /= float64(rc) * float64(f.Len()) exp = append(exp, f.Feature) return }) } sort.Sort(exp) w := gff.NewWriter(os.Stdout, 60, false) for _, f := range exp { w.Write(f) } }
func main() { flag.Parse() if *in == "" || *ref == "" || *mapfile == "" || *contigs == "" { flag.Usage() os.Exit(0) } refTrees, err := readAnnotations(*ref) if err != nil { log.Fatalf("failed to read annotation trees: %v", err) } mapping, err := readMappings(*mapfile) if err != nil { log.Fatalf("failed to read mapping file: %v", err) } contigLength, err := readContigs(*contigs) if err != nil { log.Fatalf("failed to read contig file: %v", err) } f, err := os.Open(*in) if err != nil { log.Fatalf("failed to open %q: %v", *in, err) } w := gff.NewWriter(os.Stdout, 60, true) sc := featio.NewScanner(gff.NewReader(f)) for sc.Next() { f := sc.Feat().(*gff.Feature) ok, err := within(*buf, f.SeqName) if err != nil { log.Fatalf("failed to parse sequence name: %s: %v", f.SeqName, err) } if !ok { log.Printf("too close to read end: excluding %+v", f) continue } repeat := f.FeatAttributes.Get("Repeat") if repeat == "" { continue } fields := strings.Fields(repeat) name := strings.Split(f.SeqName, "//") if len(name) != 2 { log.Fatalf("unexpected sequence name in input: %q", f.SeqName) } contigSide, ok := mapping[name[0]] if !ok { log.Fatalf("unexpected sequence name in input: %q", f.SeqName) } if contigSide.FeatStart+f.FeatStart < *buf { log.Printf("too close to contig start:\n\texcluding %#v\n\tcontig %#v\n\n%d < %d", f, contigSide, contigSide.FeatStart, *buf) continue } length, ok := contigLength[contigSide.SeqName] if !ok { log.Fatalf("unexpected sequence name in contig mapping: %q", contigSide.SeqName) } if length-((contigSide.FeatEnd-contigSide.FeatStart)+f.FeatEnd) < *buf { log.Printf("too close to contig end:\n\texcluding %#v\n\tcontig %#v", f, contigSide) continue } t, ok := refTrees[contigSide.SeqName] if !ok { log.Fatalf("no tree for %v mapped by %v", contigSide.SeqName, f.SeqName) } var n int hits := t.Get(gffInterval{Feature: contigSide}) for _, h := range hits { f := h.(gffInterval) repeat := f.FeatAttributes.Get("Repeat") if repeat == "" { continue } hitClass := strings.Fields(repeat)[1] if fields[1] == hitClass { n++ } } if n != 0 { log.Printf("too many hits: excluding %+v", f) for _, h := range hits { log.Printf("\t%+v", h.(gffInterval).Feature) } continue } w.Write(f) } err = sc.Error() if err != nil { log.Fatalf("error during GFF read: %v", err) } }
func main() { flag.Parse() if *reads == "" || *ref == "" { fmt.Fprintln(os.Stderr, "invalid argument: must have reads, reference and block size set") flag.Usage() os.Exit(1) } var err error if *errFile != "" { errStream, err = os.Create(*errFile) if err != nil { // Oh, the irony. log.Fatalf("failed to create log file: %v", err) } defer errStream.Close() log.SetOutput(errStream) } if *outFile != "" { outStream, err = os.Create(*outFile) if err != nil { log.Fatalf("failed to create out file: %v", err) } defer outStream.Close() } log.Printf("finding flanks of reads in %q", *reads) core, err := hitSetFrom(*reads, *ref, *suff, *procs, *run) if err != nil { log.Fatalf("failed initial mapping: %v", err) } // Prepare flank sequences and remap them. out := filepath.Base(*reads) leftSeqs := out + ".left.in.fa" rightSeqs := out + ".right.in.fa" log.Printf("writing flanks to %q and %q", leftSeqs, rightSeqs) err = writeFlankSeqs(*reads, core, *flank, leftSeqs, rightSeqs) if err != nil { log.Fatalf("failed to write flanks: %v", err) } log.Printf("remapping left flanks of reads from %q", leftSeqs) left, err := hitSetFrom(leftSeqs, *ref, *suff, *procs, *run) if err != nil { log.Fatalf("failed left flank remapping: %v", err) } log.Printf("remapping right flanks of reads from %q", rightSeqs) right, err := hitSetFrom(rightSeqs, *ref, *suff, *procs, *run) if err != nil { log.Fatalf("failed right flank remapping: %v", err) } var w *gff.Writer if *discords { f, err := os.Create(out + ".gff") if err != nil { log.Fatalf("failed to create GFF outfile: %q", out+".gff") } w = gff.NewWriter(f, 60, true) defer f.Close() } err = writeResults(core, left, right, outStream, *length, *flank, w) if err != nil { log.Fatalf("failed to write results: %v", err) } }
func main() { flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters") flag.Parse() if *in == "" { flag.Usage() os.Exit(1) } f, err := os.Open(*in) if err != nil { log.Fatalf("failed to open %q: %v", *in, err) } events := make(map[string][]*gff.Feature) fsc := featio.NewScanner(gff.NewReader(f)) for fsc.Next() { f := fsc.Feat().(*gff.Feature) fields := strings.Fields(f.FeatAttributes.Get("Read")) if len(fields) != 3 { log.Fatalf("bad record: %+v", f) } events[fields[0]] = append(events[fields[0]], f) } if err := fsc.Error(); err != nil { log.Fatalf("error during gff read: %v", err) } f.Close() w := gff.NewWriter(os.Stdout, 60, true) w.WriteComment("Right coordinates (field 5) and strand (field 7) are hypothetical.") var out *os.File if *fastaOut != "" { out, err = os.Create(*fastaOut) if err != nil { log.Fatalf("failed to create fasta insertion output file %q: %v", *fastaOut, err) } defer out.Close() } hw := *window / 2 sw := makeTable(alphabet.DNAgapped, alnmat) for _, ref := range flag.Args() { f, err = os.Open(ref) if err != nil { log.Fatalf("failed to open reference %q: %v", ref, err) } ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNAgapped))) loop: for ssc.Next() { seq := ssc.Seq().(*linear.Seq) for _, f := range events[seq.Name()] { fields := strings.Fields(f.FeatAttributes.Get("Read")) if len(fields) != 3 { log.Fatalf("bad record: %+v", f) } start, err := strconv.Atoi(fields[1]) if err != nil { log.Fatalf("failed to get start coordinate: %v", err) } end, err := strconv.Atoi(fields[2]) if err != nil { log.Fatalf("failed to get end coordinate: %v", err) } if out != nil { insert := *seq if insert.Desc != "" { insert.Desc += " " } insert.Desc += fmt.Sprintf("[%d,%d)", start, end) insert.Seq = insert.Seq[start:end] fmt.Fprintf(out, "%60a\n", &insert) } var lOff, lEnd, rOff, rEnd int // If we have refined ends, use them. if dup := f.FeatAttributes.Get("Dup"); dup != "" { d, err := strconv.Atoi(dup) if err != nil { log.Fatalf("failed to get duplication length: %v", err) } lOff = max(0, start-d) lEnd = start rOff = end rEnd = min(len(seq.Seq), end+d) } else { lOff = max(0, start-hw) lEnd = min(len(seq.Seq), start+hw) rOff = max(0, end-hw) rEnd = min(len(seq.Seq), end+hw) // Ensure windows don't overlap. if lEnd > rOff { lEnd = (lEnd + rOff) / 2 rOff = lEnd } } if lEnd-lOff < *thresh || rEnd-rOff < *thresh { // Don't do fruitless work. continue loop } left := *seq left.ID = "prefix" left.Seq = left.Seq[lOff:lEnd] right := *seq right.ID = "postfix" right.Seq = right.Seq[rOff:rEnd] aln, err := sw.Align(&right, &left) if err != nil { log.Fatal(err) } fa := align.Format(&right, &left, aln, '-') for _, seg := range fa { var n int for _, l := range seg.(alphabet.Letters) { if l != '-' { n++ } } if n < *thresh { continue loop } } var sc int for _, seg := range aln { type scorer interface { Score() int } sc += seg.(scorer).Score() } f.FeatAttributes = append(f.FeatAttributes, gff.Attribute{ Tag: "TSD", Value: fmt.Sprintf(`%v %d %d %v "%v" %d`, fa[0], aln[len(aln)-1].Features()[0].End()+lOff, aln[0].Features()[1].Start()+rOff, fa[1], aln, sc), }) w.Write(f) } } if err := ssc.Error(); err != nil { log.Fatalf("error during fasta read: %v", err) } f.Close() } }