Beispiel #1
0
func (s *S) TestAlignment(c *check.C) {
	l := [...]byte{'A', 'C', 'G', 'T'}
	Q := len(l)
	a := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k))}
	a.Alpha = alphabet.DNA
	for _, i := range util.DeBruijn(byte(Q), k) {
		a.Seq = append(a.Seq, alphabet.Letter(l[i]))
	}
	b := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k-1))}
	b.Alpha = alphabet.DNA
	for _, i := range util.DeBruijn(byte(Q), k-1) {
		b.Seq = append(b.Seq, alphabet.Letter(l[i]))
	}
	aligner := NewAligner(a, b, int(k), 50, 0.80)
	aligner.Costs = &Costs{
		MaxIGap:    maxIGap,
		DiffCost:   diffCost,
		SameCost:   sameCost,
		MatchCost:  matchCost,
		BlockCost:  blockCost,
		RMatchCost: rMatchCost,
	}
	hits := aligner.AlignTraps(T)
	c.Check(hits, check.DeepEquals, H)
	la, lb, err := hits.Sum()
	c.Check(la, check.Equals, 791)
	c.Check(lb, check.Equals, 664)
	c.Check(err, check.Equals, nil)
	for _, h := range H {
		sa, sb := &linear.Seq{Seq: a.Seq[h.Abpos:h.Aepos]}, &linear.Seq{Seq: b.Seq[h.Bbpos:h.Bepos]}
		sa.Alpha = alphabet.DNAgapped
		sb.Alpha = alphabet.DNAgapped
		smith := align.SW{
			{0, -1, -1, -1, -1},
			{-1, 2, -1, -1, -1},
			{-1, -1, 2, -1, -1},
			{-1, -1, -1, 2, -1},
			{-1, -1, -1, -1, 2},
		}
		swa, _ := smith.Align(sa, sb)
		fa := align.Format(sa, sb, swa, sa.Alpha.Gap())
		c.Logf("%v\n", swa)
		c.Logf("%s\n%s\n", fa[0], fa[1])
	}
}
Beispiel #2
0
// Finalise the sequence packing.
func (pa *Packer) FinalisePack() *Packed {
	lastPad := 0
	seq := make(alphabet.Letters, 0, pa.length)
	for _, c := range pa.packed.seqMap.contigs {
		padding := binSize - c.Len()%binSize
		if padding < minPadding {
			padding += binSize
		}
		seq = append(seq, alphabet.Letter('N').Repeat(lastPad)...)
		seq = append(seq, c.Seq.Seq...)
		lastPad = padding
	}
	pa.packed.Seq.Seq = seq

	return pa.packed
}
Beispiel #3
0
func (s *S) TestPack(c *check.C) {
	p := NewPacker("")
	for k := byte(1); k <= maxk; k++ {
		a := &linear.Seq{
			Annotation: seq.Annotation{
				ID: fmt.Sprintf("deBruijn%d", k),
			},
			Seq: make(alphabet.Letters, 0, util.Pow(Q, k)),
		}
		for _, i := range util.DeBruijn(byte(Q), k) {
			a.Seq = append(a.Seq, alphabet.Letter(l[i]))
		}
		ps, _ := p.Pack(a)
		c.Logf("%d: %s", k, ps)
	}
	c.Check(p.FinalisePack().Len(), check.Equals, 94208)
}
Beispiel #4
0
func (s *S) SetUpSuite(c *check.C) {
	p := NewPacker("")
	for k := byte(1); k <= maxk; k++ {
		a := &linear.Seq{
			Annotation: seq.Annotation{
				ID: fmt.Sprintf("deBruijn%d", k),
			},
			Seq: make(alphabet.Letters, 0, util.Pow(Q, k)),
		}
		for _, i := range util.DeBruijn(byte(Q), k) {
			a.Seq = append(a.Seq, alphabet.Letter(l[i]))
		}
		a.Loc = Contig(a.ID)
		p.Pack(a)
	}
	ps = p.FinalisePack()
}
Beispiel #5
0
func (s *S) TestFilterAndMerge(c *check.C) {
	l := [...]byte{'A', 'C', 'G', 'T'}
	Q := len(l)
	a := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k))}
	a.Alpha = alphabet.DNA
	for _, i := range util.DeBruijn(byte(Q), k) {
		a.Seq = append(a.Seq, alphabet.Letter(l[i]))
	}
	b := &linear.Seq{Seq: make(alphabet.Letters, 0, util.Pow(Q, k-1))}
	// b.Alpha = alphabet.DNA // Not actually required for this use.
	for _, i := range util.DeBruijn(byte(Q), k-1) {
		b.Seq = append(b.Seq, alphabet.Letter(l[i]))
	}
	i, err := kmerindex.New(int(k), a)
	if err != nil {
		c.Fatalf("Failed to create kmerindex: %v", err)
	}
	i.Build()
	p := &Params{WordSize: int(k), MinMatch: 50, MaxError: 4, TubeOffset: 32}
	f := New(i, p)
	var sorter *morass.Morass
	if sorter, err = morass.New(Hit{}, "", "", 2<<20, false); err != nil {
		c.Fatalf("Failed to create morass: %v", err)
	}
	f.Filter(b, false, false, sorter)
	c.Check(sorter.Len(), check.Equals, int64(12))
	r := make([]Hit, 1, sorter.Len())
	for {
		err = sorter.Pull(&r[len(r)-1])
		if err != nil {
			r = r[:len(r)-1]
			break
		}
		r = append(r, Hit{})
	}
	want := map[Hit]bool{
		Hit{From: 0, To: 163, Diagonal: 32}:      true,
		Hit{From: 141, To: 247, Diagonal: 64}:    true,
		Hit{From: 237, To: 433, Diagonal: 1120}:  true,
		Hit{From: 241, To: 347, Diagonal: 96}:    true,
		Hit{From: 341, To: 452, Diagonal: 128}:   true,
		Hit{From: 447, To: 565, Diagonal: 1952}:  true,
		Hit{From: 542, To: 628, Diagonal: 1984}:  true,
		Hit{From: 627, To: 814, Diagonal: 2592}:  true,
		Hit{From: 786, To: 898, Diagonal: 2624}:  true,
		Hit{From: 868, To: 939, Diagonal: 2880}:  true,
		Hit{From: 938, To: 997, Diagonal: 3040}:  true,
		Hit{From: 938, To: 1024, Diagonal: 3072}: true,
	}
	got := make(map[Hit]bool)
	for _, h := range r {
		got[h] = true
	}
	c.Check(got, check.DeepEquals, want)
	c.Check(sort.IsSorted(hits(r)), check.Equals, true)
	m := NewMerger(i, b, p, 5, false)
	for _, h := range r {
		m.MergeFilterHit(&h)
	}
	t := m.FinaliseMerge()
	sorter.CleanUp()
	c.Check(len(t), check.Equals, 6)
	la, lb := t.Sum()
	c.Check(la, check.Equals, 1257)
	c.Check(lb, check.Equals, 402)
	c.Check(t, check.DeepEquals, Trapezoids{
		{Top: 452, Bottom: 0, Left: -128, Right: 3},
		{Top: 433, Bottom: 237, Left: -1120, Right: -1085},
		{Top: 628, Bottom: 447, Left: -1984, Right: -1917},
		{Top: 898, Bottom: 627, Left: -2624, Right: -2557},
		{Top: 939, Bottom: 868, Left: -2880, Right: -2845},
		{Top: 1024, Bottom: 938, Left: -3072, Right: -3005},
	})
}
Beispiel #6
0
// Read a single sequence and return it  and potentially an error. Note that
// a non-nil returned error may be associated with a valid sequence, so it is
// the responsibility of the caller to examine the error to determine whether
// the read was successful.
// Note that if the Reader's template type returns different non-nil error
// values from calls to SetName and SetDescription, a new error string will be
// returned on each call to Read. So to allow direct error comparison these
// methods should return the same error.
// TODO: Does not read multi-line fastq.
func (r *Reader) Read() (seq.Sequence, error) {
	const (
		id1 = iota
		letters
		id2
		quality
	)

	var (
		buff, line, label []byte
		isPrefix          bool

		seqBuff []alphabet.QLetter
		t       seqio.SequenceAppender

		state int
		err   error
	)

loop:
	for {
		buff, isPrefix, err = r.r.ReadLine()
		if err != nil {
			if t != nil && state == quality && err == io.EOF {
				err = nil
				break
			}
			return nil, err
		}
		line = append(line, buff...)
		if isPrefix {
			continue
		}

		line = bytes.TrimSpace(line)
		switch {
		case state == id1 && maybeID1(line):
			state = letters
			var _err error
			t, _err = r.readHeader(line)
			if err == nil && _err != nil {
				err = _err
			}
			label = append([]byte(nil), line...)
		case state == id2 && maybeID2(line):
			state = quality
			if len(label) == 0 {
				return nil, errors.New("fastq: no header line parsed before +line in fastq format")
			}
			if len(line) != 1 && bytes.Compare(label[1:], line[1:]) != 0 {
				return nil, errors.New("fastq: quality header does not match sequence header")
			}
		case state == letters && len(line) > 0:
			if maybeID2(line) && (len(line) == 1 || bytes.Compare(label[1:], line[1:]) == 0) {
				state = quality
				break
			}
			state = id2
			seqBuff = make([]alphabet.QLetter, len(line))
			var i int
			for _, l := range line {
				if isSpace(l) {
					continue
				}
				seqBuff[i].L = alphabet.Letter(l)
				i++
			}
			seqBuff = seqBuff[:i]
		case state == quality:
			if len(line) == 0 && len(seqBuff) != 0 {
				continue
			}
			break loop
		}
		line = line[:0]
	}

	line = bytes.Join(bytes.Fields(line), nil)
	if len(line) != len(seqBuff) {
		return nil, errors.New("fastq: sequence/quality length mismatch")
	}
	for i := range line {
		seqBuff[i].Q = r.enc.DecodeToQphred(line[i])
	}
	t.AppendQLetters(seqBuff...)

	return t, err
}