Beispiel #1
0
// AlignFrom performs filtering and alignment for one strand of query using the
// provided filter trapezoids as seeds.
func (p *PALS) AlignFrom(traps filter.Trapezoids, complement bool) (dp.Hits, error) {
	if p.err != nil {
		return nil, p.err
	}
	var (
		working *linear.Seq
		err     error
	)
	if complement {
		p.notify("Complementing query")
		working = p.query.Clone().(*linear.Seq)
		working.RevComp()
		p.notify("Complemented query")
	} else {
		working = p.query
	}

	p.notify("Aligning")
	aligner := dp.NewAligner(
		p.target, working,
		p.FilterParams.WordSize, p.DPParams.MinHitLength, p.DPParams.MinId,
	)
	aligner.Costs = &p.Costs
	hits := aligner.AlignTraps(traps)
	hitCoverageA, hitCoverageB, err := hits.Sum()
	if err != nil {
		return nil, err
	}
	p.notifyf("Aligned %d hits covering %d x %d", len(hits), hitCoverageA, hitCoverageB)

	return hits, nil
}
Beispiel #2
0
// Create a new Merger using the provided kmerindex, query sequence, filter parameters and maximum inter-segment gap length.
// If selfCompare is true only the upper diagonal of the comparison matrix is examined.
func NewMerger(ki *kmerindex.Index, query *linear.Seq, filterParams *Params, maxIGap int, selfCompare bool) *Merger {
	tubeWidth := filterParams.TubeOffset + filterParams.MaxError
	binWidth := tubeWidth - 1
	leftPadding := diagonalPadding + binWidth

	eoTerm := &trapezoid{Trapezoid: Trapezoid{
		Left:   query.Len() + 1 + leftPadding,
		Right:  query.Len() + 1,
		Bottom: -1,
		Top:    query.Len() + 1,
	}}

	return &Merger{
		target:         ki.Seq(),
		filterParams:   filterParams,
		maxIGap:        maxIGap,
		query:          query,
		selfComparison: selfCompare,
		bottomPadding:  ki.K() + 2,
		leftPadding:    leftPadding,
		binWidth:       binWidth,
		eoTerm:         eoTerm,
		trapOrder:      eoTerm,
		valueToCode:    ki.Seq().Alpha.LetterIndex(),
	}
}
Beispiel #3
0
// Pack a sequence into the Packed sequence. Returns a string giving diagnostic information.
func (pa *Packer) Pack(seq *linear.Seq) (string, error) {
	if pa.packed.Alpha == nil {
		pa.packed.Alpha = seq.Alpha
	} else if pa.packed.Alpha != seq.Alpha {
		return "", errors.New("pals: alphabet mismatch")
	}

	c := contig{Seq: seq}

	padding := binSize - seq.Len()%binSize
	if padding < minPadding {
		padding += binSize
	}

	pa.length += pa.lastPad
	c.from = pa.length
	pa.length += seq.Len()
	pa.lastPad = padding

	m := &pa.packed.seqMap
	bins := make([]int, (padding+seq.Len())/binSize)
	for i := 0; i < len(bins); i++ {
		bins[i] = len(m.contigs)
	}
	m.binMap = append(m.binMap, bins...)
	m.contigs = append(m.contigs, c)

	return fmt.Sprintf("%20s\t%10d\t%7d-%-d", seq.ID[:util.Min(20, len(seq.ID))], seq.Len(), len(m.binMap)-len(bins), len(m.binMap)-1), nil
}
Beispiel #4
0
// Align performs filtering and alignment for one strand of query.
func (p *PALS) Align(complement bool) (dp.Hits, error) {
	if p.err != nil {
		return nil, p.err
	}
	var (
		working *linear.Seq
		err     error
	)
	if complement {
		p.notify("Complementing query")
		working = p.query.Clone().(*linear.Seq)
		working.RevComp()
		p.notify("Complemented query")
	} else {
		working = p.query
	}

	p.notify("Filtering")
	err = p.hitFilter.Filter(working, p.selfCompare, complement, p.morass)
	if err != nil {
		return nil, err
	}
	p.notifyf("Identified %d filter hits", p.morass.Len())

	p.notify("Merging")
	merger := filter.NewMerger(p.index, working, p.FilterParams, p.MaxIGap, p.selfCompare)
	var h filter.Hit
	for {
		if err = p.morass.Pull(&h); err != nil {
			break
		}
		merger.MergeFilterHit(&h)
	}
	if err != nil && err != io.EOF {
		return nil, err
	}
	p.err = p.morass.Clear()
	p.trapezoids = merger.FinaliseMerge()
	lt, lq := p.trapezoids.Sum()
	p.notifyf("Merged %d trapezoids covering %d x %d", len(p.trapezoids), lt, lq)

	p.notify("Aligning")
	aligner := dp.NewAligner(
		p.target, working,
		p.FilterParams.WordSize, p.DPParams.MinHitLength, p.DPParams.MinId,
	)
	aligner.Costs = &p.Costs
	hits := aligner.AlignTraps(p.trapezoids)
	hitCoverageA, hitCoverageB, err := hits.Sum()
	if err != nil {
		return nil, err
	}
	p.notifyf("Aligned %d hits covering %d x %d", len(hits), hitCoverageA, hitCoverageB)

	return hits, nil
}
Beispiel #5
0
// Create a new Kmer Index with a word size k based on sequence
func New(k int, s *linear.Seq) (*Index, error) {
	switch {
	case k > MaxKmerLen:
		return nil, ErrKTooLarge
	case k < MinKmerLen:
		return nil, ErrKTooSmall
	case k+1 > s.Len():
		return nil, ErrShortSeq
	case s.Alpha.Len() != 4:
		return nil, ErrBadAlphabet
	}

	ki := &Index{
		finger:  make([]Kmer, util.Pow4(k)+1), // Need a Tn+1 finger position so that Tn can be recognised
		k:       k,
		kMask:   Kmer(util.Pow4(k) - 1),
		seq:     s,
		lookUp:  s.Alpha.LetterIndex(),
		indexed: false,
	}
	ki.buildKmerTable()

	return ki, nil
}
Beispiel #6
0
// Filter a query sequence against the stored index. If query and the target are the same sequence,
// selfAlign can be used to avoid double seaching - behavior is undefined if the the sequences are not the same.
// A morass is used to store and sort individual filter hits.
func (f *Filter) Filter(query *linear.Seq, selfAlign, complement bool, morass *morass.Morass) error {
	f.selfAlign = selfAlign
	f.complement = complement
	f.morass = morass
	f.k = f.ki.K()

	// Ukonnen's Lemma
	f.minKmersPerHit = MinWordsPerFilterHit(f.minMatch, f.k, f.maxError)

	// Maximum distance between SeqQ positions of two k-mers in a match
	// (More stringent bounds may be possible, but not a big problem
	// if two adjacent matches get merged).
	f.maxKmerDist = f.minMatch - f.k

	tubeWidth := f.tubeOffset + f.maxError

	if f.tubeOffset < f.maxError {
		return errors.New("filter: TubeOffset < MaxError")
	}

	maxActiveTubes := (f.target.Len()+tubeWidth-1)/f.tubeOffset + 1
	f.tubes = make([]tubeState, maxActiveTubes)

	// Ticker tracks cycling of circular list of active tubes.
	ticker := tubeWidth

	var err error
	err = f.ki.ForEachKmerOf(query, 0, query.Len(), func(ki *kmerindex.Index, position, kmer int) {
		from := 0
		if kmer > 0 {
			from = ki.FingerAt(kmer - 1)
		}
		to := ki.FingerAt(kmer)
		for i := from; i < to; i++ {
			f.commonKmer(ki.PosAt(i), position)
		}

		if ticker--; ticker == 0 {
			if e := f.tubeEnd(position); e != nil {
				panic(e) // Caught by fastkmerindex.ForEachKmerOf and returned
			}
			ticker = f.tubeOffset
		}
	})
	if err != nil {
		return err
	}

	err = f.tubeEnd(query.Len() - 1)
	if err != nil {
		return err
	}

	diagFrom := f.diagIndex(f.target.Len()-1, query.Len()-1) - tubeWidth
	diagTo := f.diagIndex(0, query.Len()-1) + tubeWidth

	tubeFrom := f.tubeIndex(diagFrom)
	if tubeFrom < 0 {
		tubeFrom = 0
	}

	tubeTo := f.tubeIndex(diagTo)

	for tubeIndex := tubeFrom; tubeIndex <= tubeTo; tubeIndex++ {
		err = f.tubeFlush(tubeIndex)
		if err != nil {
			return err
		}
	}

	f.tubes = nil

	return f.morass.Finalise()
}