Esempio n. 1
0
func alinha(seq_aa, seqpdb_aa seq.Sequence, surf []float64) (string, string) {
	nw := align.NWAffine{
		Matrix:  matrix.MATCH,
		GapOpen: -1,
	}

	aln, err := nw.Align(seq_aa, seqpdb_aa)
	var f_aa [2]alphabet.Slice
	if err == nil {
		f_aa = align.Format(seq_aa, seqpdb_aa, aln, '_')
		// f_ss = align.Format(seq_aa, ss_ss, aln, '_')
	} else {
		fmt.Println("O ERRO E:", err)
	}
	return fmt.Sprint(f_aa[0]), fmt.Sprint(f_aa[1])
}
Esempio n. 2
0
func (s *S) TestAlignment(c *check.C) {
	l := [...]byte{'A', 'C', 'G', 'T'}
	Q := len(l)
	a := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k))}
	a.Alpha = alphabet.DNA
	for _, i := range util.DeBruijn(byte(Q), k) {
		a.Seq = append(a.Seq, alphabet.Letter(l[i]))
	}
	b := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k-1))}
	b.Alpha = alphabet.DNA
	for _, i := range util.DeBruijn(byte(Q), k-1) {
		b.Seq = append(b.Seq, alphabet.Letter(l[i]))
	}
	aligner := NewAligner(a, b, int(k), 50, 0.80)
	aligner.Costs = &Costs{
		MaxIGap:    maxIGap,
		DiffCost:   diffCost,
		SameCost:   sameCost,
		MatchCost:  matchCost,
		BlockCost:  blockCost,
		RMatchCost: rMatchCost,
	}
	hits := aligner.AlignTraps(T)
	c.Check(hits, check.DeepEquals, H)
	la, lb, err := hits.Sum()
	c.Check(la, check.Equals, 791)
	c.Check(lb, check.Equals, 664)
	c.Check(err, check.Equals, nil)
	for _, h := range H {
		sa, sb := &linear.Seq{Seq: a.Seq[h.Abpos:h.Aepos]}, &linear.Seq{Seq: b.Seq[h.Bbpos:h.Bepos]}
		sa.Alpha = alphabet.DNAgapped
		sb.Alpha = alphabet.DNAgapped
		smith := align.SW{
			{0, -1, -1, -1, -1},
			{-1, 2, -1, -1, -1},
			{-1, -1, 2, -1, -1},
			{-1, -1, -1, 2, -1},
			{-1, -1, -1, -1, 2},
		}
		swa, _ := smith.Align(sa, sb)
		fa := align.Format(sa, sb, swa, sa.Alpha.Gap())
		c.Logf("%v\n", swa)
		c.Logf("%s\n%s\n", fa[0], fa[1])
	}
}
Esempio n. 3
0
func main() {
	flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters")
	flag.Parse()
	if *in == "" {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}
	events := make(map[string][]*gff.Feature)
	fsc := featio.NewScanner(gff.NewReader(f))
	for fsc.Next() {
		f := fsc.Feat().(*gff.Feature)
		fields := strings.Fields(f.FeatAttributes.Get("Read"))
		if len(fields) != 3 {
			log.Fatalf("bad record: %+v", f)
		}
		events[fields[0]] = append(events[fields[0]], f)
	}
	if err := fsc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
	f.Close()

	w := gff.NewWriter(os.Stdout, 60, true)
	w.WriteComment("Right coordinates (field 5) and strand (field 7) are hypothetical.")

	var out *os.File
	if *fastaOut != "" {
		out, err = os.Create(*fastaOut)
		if err != nil {
			log.Fatalf("failed to create fasta insertion output file %q: %v", *fastaOut, err)
		}
		defer out.Close()
	}

	hw := *window / 2
	sw := makeTable(alphabet.DNAgapped, alnmat)
	for _, ref := range flag.Args() {
		f, err = os.Open(ref)
		if err != nil {
			log.Fatalf("failed to open reference %q: %v", ref, err)
		}
		ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNAgapped)))
	loop:
		for ssc.Next() {
			seq := ssc.Seq().(*linear.Seq)
			for _, f := range events[seq.Name()] {
				fields := strings.Fields(f.FeatAttributes.Get("Read"))
				if len(fields) != 3 {
					log.Fatalf("bad record: %+v", f)
				}
				start, err := strconv.Atoi(fields[1])
				if err != nil {
					log.Fatalf("failed to get start coordinate: %v", err)
				}
				end, err := strconv.Atoi(fields[2])
				if err != nil {
					log.Fatalf("failed to get end coordinate: %v", err)
				}

				if out != nil {
					insert := *seq
					if insert.Desc != "" {
						insert.Desc += " "
					}
					insert.Desc += fmt.Sprintf("[%d,%d)", start, end)
					insert.Seq = insert.Seq[start:end]
					fmt.Fprintf(out, "%60a\n", &insert)
				}

				var lOff, lEnd, rOff, rEnd int
				// If we have refined ends, use them.
				if dup := f.FeatAttributes.Get("Dup"); dup != "" {
					d, err := strconv.Atoi(dup)
					if err != nil {
						log.Fatalf("failed to get duplication length: %v", err)
					}
					lOff = max(0, start-d)
					lEnd = start
					rOff = end
					rEnd = min(len(seq.Seq), end+d)
				} else {
					lOff = max(0, start-hw)
					lEnd = min(len(seq.Seq), start+hw)
					rOff = max(0, end-hw)
					rEnd = min(len(seq.Seq), end+hw)

					// Ensure windows don't overlap.
					if lEnd > rOff {
						lEnd = (lEnd + rOff) / 2
						rOff = lEnd
					}
				}

				if lEnd-lOff < *thresh || rEnd-rOff < *thresh {
					// Don't do fruitless work.
					continue loop
				}

				left := *seq
				left.ID = "prefix"
				left.Seq = left.Seq[lOff:lEnd]
				right := *seq
				right.ID = "postfix"
				right.Seq = right.Seq[rOff:rEnd]

				aln, err := sw.Align(&right, &left)
				if err != nil {
					log.Fatal(err)
				}

				fa := align.Format(&right, &left, aln, '-')
				for _, seg := range fa {
					var n int
					for _, l := range seg.(alphabet.Letters) {
						if l != '-' {
							n++
						}
					}
					if n < *thresh {
						continue loop
					}
				}

				var sc int
				for _, seg := range aln {
					type scorer interface {
						Score() int
					}
					sc += seg.(scorer).Score()
				}
				f.FeatAttributes = append(f.FeatAttributes, gff.Attribute{
					Tag: "TSD", Value: fmt.Sprintf(`%v %d %d %v "%v" %d`,
						fa[0], aln[len(aln)-1].Features()[0].End()+lOff,
						aln[0].Features()[1].Start()+rOff, fa[1],
						aln, sc),
				})
				w.Write(f)
			}
		}
		if err := ssc.Error(); err != nil {
			log.Fatalf("error during fasta read: %v", err)
		}
		f.Close()
	}
}