// Pack a sequence into the Packed sequence. Returns a string giving diagnostic information. func (pa *Packer) Pack(seq *linear.Seq) (string, error) { if pa.packed.Alpha == nil { pa.packed.Alpha = seq.Alpha } else if pa.packed.Alpha != seq.Alpha { return "", errors.New("pals: alphabet mismatch") } c := contig{Seq: seq} padding := binSize - seq.Len()%binSize if padding < minPadding { padding += binSize } pa.length += pa.lastPad c.from = pa.length pa.length += seq.Len() pa.lastPad = padding m := &pa.packed.seqMap bins := make([]int, (padding+seq.Len())/binSize) for i := 0; i < len(bins); i++ { bins[i] = len(m.contigs) } m.binMap = append(m.binMap, bins...) m.contigs = append(m.contigs, c) return fmt.Sprintf("%20s\t%10d\t%7d-%-d", seq.ID[:util.Min(20, len(seq.ID))], seq.Len(), len(m.binMap)-len(bins), len(m.binMap)-1), nil }
// Create a new Merger using the provided kmerindex, query sequence, filter parameters and maximum inter-segment gap length. // If selfCompare is true only the upper diagonal of the comparison matrix is examined. func NewMerger(ki *kmerindex.Index, query *linear.Seq, filterParams *Params, maxIGap int, selfCompare bool) *Merger { tubeWidth := filterParams.TubeOffset + filterParams.MaxError binWidth := tubeWidth - 1 leftPadding := diagonalPadding + binWidth eoTerm := &trapezoid{Trapezoid: Trapezoid{ Left: query.Len() + 1 + leftPadding, Right: query.Len() + 1, Bottom: -1, Top: query.Len() + 1, }} return &Merger{ target: ki.Seq(), filterParams: filterParams, maxIGap: maxIGap, query: query, selfComparison: selfCompare, bottomPadding: ki.K() + 2, leftPadding: leftPadding, binWidth: binWidth, eoTerm: eoTerm, trapOrder: eoTerm, valueToCode: ki.Seq().Alpha.LetterIndex(), } }
// Create a new Kmer Index with a word size k based on sequence func New(k int, s *linear.Seq) (*Index, error) { switch { case k > MaxKmerLen: return nil, ErrKTooLarge case k < MinKmerLen: return nil, ErrKTooSmall case k+1 > s.Len(): return nil, ErrShortSeq case s.Alpha.Len() != 4: return nil, ErrBadAlphabet } ki := &Index{ finger: make([]Kmer, util.Pow4(k)+1), // Need a Tn+1 finger position so that Tn can be recognised k: k, kMask: Kmer(util.Pow4(k) - 1), seq: s, lookUp: s.Alpha.LetterIndex(), indexed: false, } ki.buildKmerTable() return ki, nil }
// Filter a query sequence against the stored index. If query and the target are the same sequence, // selfAlign can be used to avoid double seaching - behavior is undefined if the the sequences are not the same. // A morass is used to store and sort individual filter hits. func (f *Filter) Filter(query *linear.Seq, selfAlign, complement bool, morass *morass.Morass) error { f.selfAlign = selfAlign f.complement = complement f.morass = morass f.k = f.ki.K() // Ukonnen's Lemma f.minKmersPerHit = MinWordsPerFilterHit(f.minMatch, f.k, f.maxError) // Maximum distance between SeqQ positions of two k-mers in a match // (More stringent bounds may be possible, but not a big problem // if two adjacent matches get merged). f.maxKmerDist = f.minMatch - f.k tubeWidth := f.tubeOffset + f.maxError if f.tubeOffset < f.maxError { return errors.New("filter: TubeOffset < MaxError") } maxActiveTubes := (f.target.Len()+tubeWidth-1)/f.tubeOffset + 1 f.tubes = make([]tubeState, maxActiveTubes) // Ticker tracks cycling of circular list of active tubes. ticker := tubeWidth var err error err = f.ki.ForEachKmerOf(query, 0, query.Len(), func(ki *kmerindex.Index, position, kmer int) { from := 0 if kmer > 0 { from = ki.FingerAt(kmer - 1) } to := ki.FingerAt(kmer) for i := from; i < to; i++ { f.commonKmer(ki.PosAt(i), position) } if ticker--; ticker == 0 { if e := f.tubeEnd(position); e != nil { panic(e) // Caught by fastkmerindex.ForEachKmerOf and returned } ticker = f.tubeOffset } }) if err != nil { return err } err = f.tubeEnd(query.Len() - 1) if err != nil { return err } diagFrom := f.diagIndex(f.target.Len()-1, query.Len()-1) - tubeWidth diagTo := f.diagIndex(0, query.Len()-1) + tubeWidth tubeFrom := f.tubeIndex(diagFrom) if tubeFrom < 0 { tubeFrom = 0 } tubeTo := f.tubeIndex(diagTo) for tubeIndex := tubeFrom; tubeIndex <= tubeTo; tubeIndex++ { err = f.tubeFlush(tubeIndex) if err != nil { return err } } f.tubes = nil return f.morass.Finalise() }