func alinha(seq_aa, seqpdb_aa seq.Sequence, surf []float64) (string, string) { nw := align.NWAffine{ Matrix: matrix.MATCH, GapOpen: -1, } aln, err := nw.Align(seq_aa, seqpdb_aa) var f_aa [2]alphabet.Slice if err == nil { f_aa = align.Format(seq_aa, seqpdb_aa, aln, '_') // f_ss = align.Format(seq_aa, ss_ss, aln, '_') } else { fmt.Println("O ERRO E:", err) } return fmt.Sprint(f_aa[0]), fmt.Sprint(f_aa[1]) }
func (s *S) TestAlignment(c *check.C) { l := [...]byte{'A', 'C', 'G', 'T'} Q := len(l) a := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k))} a.Alpha = alphabet.DNA for _, i := range util.DeBruijn(byte(Q), k) { a.Seq = append(a.Seq, alphabet.Letter(l[i])) } b := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k-1))} b.Alpha = alphabet.DNA for _, i := range util.DeBruijn(byte(Q), k-1) { b.Seq = append(b.Seq, alphabet.Letter(l[i])) } aligner := NewAligner(a, b, int(k), 50, 0.80) aligner.Costs = &Costs{ MaxIGap: maxIGap, DiffCost: diffCost, SameCost: sameCost, MatchCost: matchCost, BlockCost: blockCost, RMatchCost: rMatchCost, } hits := aligner.AlignTraps(T) c.Check(hits, check.DeepEquals, H) la, lb, err := hits.Sum() c.Check(la, check.Equals, 791) c.Check(lb, check.Equals, 664) c.Check(err, check.Equals, nil) for _, h := range H { sa, sb := &linear.Seq{Seq: a.Seq[h.Abpos:h.Aepos]}, &linear.Seq{Seq: b.Seq[h.Bbpos:h.Bepos]} sa.Alpha = alphabet.DNAgapped sb.Alpha = alphabet.DNAgapped smith := align.SW{ {0, -1, -1, -1, -1}, {-1, 2, -1, -1, -1}, {-1, -1, 2, -1, -1}, {-1, -1, -1, 2, -1}, {-1, -1, -1, -1, 2}, } swa, _ := smith.Align(sa, sb) fa := align.Format(sa, sb, swa, sa.Alpha.Gap()) c.Logf("%v\n", swa) c.Logf("%s\n%s\n", fa[0], fa[1]) } }
func main() { flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters") flag.Parse() if *in == "" { flag.Usage() os.Exit(1) } f, err := os.Open(*in) if err != nil { log.Fatalf("failed to open %q: %v", *in, err) } events := make(map[string][]*gff.Feature) fsc := featio.NewScanner(gff.NewReader(f)) for fsc.Next() { f := fsc.Feat().(*gff.Feature) fields := strings.Fields(f.FeatAttributes.Get("Read")) if len(fields) != 3 { log.Fatalf("bad record: %+v", f) } events[fields[0]] = append(events[fields[0]], f) } if err := fsc.Error(); err != nil { log.Fatalf("error during gff read: %v", err) } f.Close() w := gff.NewWriter(os.Stdout, 60, true) w.WriteComment("Right coordinates (field 5) and strand (field 7) are hypothetical.") var out *os.File if *fastaOut != "" { out, err = os.Create(*fastaOut) if err != nil { log.Fatalf("failed to create fasta insertion output file %q: %v", *fastaOut, err) } defer out.Close() } hw := *window / 2 sw := makeTable(alphabet.DNAgapped, alnmat) for _, ref := range flag.Args() { f, err = os.Open(ref) if err != nil { log.Fatalf("failed to open reference %q: %v", ref, err) } ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNAgapped))) loop: for ssc.Next() { seq := ssc.Seq().(*linear.Seq) for _, f := range events[seq.Name()] { fields := strings.Fields(f.FeatAttributes.Get("Read")) if len(fields) != 3 { log.Fatalf("bad record: %+v", f) } start, err := strconv.Atoi(fields[1]) if err != nil { log.Fatalf("failed to get start coordinate: %v", err) } end, err := strconv.Atoi(fields[2]) if err != nil { log.Fatalf("failed to get end coordinate: %v", err) } if out != nil { insert := *seq if insert.Desc != "" { insert.Desc += " " } insert.Desc += fmt.Sprintf("[%d,%d)", start, end) insert.Seq = insert.Seq[start:end] fmt.Fprintf(out, "%60a\n", &insert) } var lOff, lEnd, rOff, rEnd int // If we have refined ends, use them. if dup := f.FeatAttributes.Get("Dup"); dup != "" { d, err := strconv.Atoi(dup) if err != nil { log.Fatalf("failed to get duplication length: %v", err) } lOff = max(0, start-d) lEnd = start rOff = end rEnd = min(len(seq.Seq), end+d) } else { lOff = max(0, start-hw) lEnd = min(len(seq.Seq), start+hw) rOff = max(0, end-hw) rEnd = min(len(seq.Seq), end+hw) // Ensure windows don't overlap. if lEnd > rOff { lEnd = (lEnd + rOff) / 2 rOff = lEnd } } if lEnd-lOff < *thresh || rEnd-rOff < *thresh { // Don't do fruitless work. continue loop } left := *seq left.ID = "prefix" left.Seq = left.Seq[lOff:lEnd] right := *seq right.ID = "postfix" right.Seq = right.Seq[rOff:rEnd] aln, err := sw.Align(&right, &left) if err != nil { log.Fatal(err) } fa := align.Format(&right, &left, aln, '-') for _, seg := range fa { var n int for _, l := range seg.(alphabet.Letters) { if l != '-' { n++ } } if n < *thresh { continue loop } } var sc int for _, seg := range aln { type scorer interface { Score() int } sc += seg.(scorer).Score() } f.FeatAttributes = append(f.FeatAttributes, gff.Attribute{ Tag: "TSD", Value: fmt.Sprintf(`%v %d %d %v "%v" %d`, fa[0], aln[len(aln)-1].Features()[0].End()+lOff, aln[0].Features()[1].Start()+rOff, fa[1], aln, sc), }) w.Write(f) } } if err := ssc.Error(); err != nil { log.Fatalf("error during fasta read: %v", err) } f.Close() } }