// Stitch provides a function that may be used by polymer types to implement Stitcher. // It makes use of reflection and so may be slower than type-specific implementations. // This is the reference implementation and should be used to compare type-specific // implementation against in testing. func Stitch(pol interface{}, offset int, f feat.FeatureSet) (s interface{}, err error) { t := interval.NewTree() var i *interval.Interval for _, feature := range f { i, err = interval.New(emptyString, feature.Start, feature.End, 0, nil) if err != nil { return } else { t.Insert(i) } } pv := reflect.ValueOf(pol) pLen := pv.Len() end := pLen + offset span, err := interval.New(emptyString, offset, end, 0, nil) if err != nil { panic("Sequence.End() < Sequence.Start()") } fs, _ := t.Flatten(span, 0, 0) l := 0 for _, seg := range fs { l += util.Min(seg.End(), end) - util.Max(seg.Start(), offset) } tv := reflect.MakeSlice(pv.Type(), 0, l) for _, seg := range fs { tv = reflect.AppendSlice(tv, pv.Slice(util.Max(seg.Start()-offset, 0), util.Min(seg.End()-offset, pLen))) } return tv.Interface(), nil }
// Join segments of the sequence, returning any error. func (self *Seq) Compose(f feat.FeatureSet) (err error) { l := 0 for _, seg := range f { if seg.End < seg.Start { return bio.NewError("Feature end < start", 0, seg) } l += util.Min(seg.End, self.End()) - util.Max(seg.Start, self.Start()) } t := &Seq{} *t = *self t.S = &Packing{Letters: make([]alphabet.Pack, 0, (l+3)/4)} var tseg seq.Sequence for _, seg := range f { tseg, err = self.Subseq(util.Max(seg.Start, self.Start()), util.Min(seg.End, self.End())) if err != nil { return } tseg := tseg.(*Seq) if seg.Strand == -1 { tseg.RevComp() } tseg.S.Align(seq.Start) t.S.Align(seq.End) t.S.Letters = append(t.S.Letters, tseg.S.Letters...) t.S.RightPad = tseg.S.RightPad } *self = *t return }
func (self *Interval) adjustRange() { if self.left != nil && self.right != nil { self.minStart = util.Min(self.start, self.left.minStart) self.maxEnd = util.Max(self.end, self.left.maxEnd, self.right.maxEnd) } else if self.left != nil { self.minStart = util.Min(self.start, self.left.minStart) self.maxEnd = util.Max(self.end, self.left.maxEnd) } else if self.right != nil { self.minStart = util.Min(self.start, self.right.minStart) self.maxEnd = util.Max(self.end, self.right.maxEnd) } }
func (self *Multi) Stitch(f feat.FeatureSet) (err error) { tr := interval.NewTree() var i *interval.Interval for _, feature := range f { i, err = interval.New(emptyString, feature.Start, feature.End, 0, nil) if err != nil { return } else { tr.Insert(i) } } span, err := interval.New(emptyString, self.Start(), self.End(), 0, nil) if err != nil { panic("Sequence.End() < Sequence.Start()") } fs, _ := tr.Flatten(span, 0, 0) ff := feat.FeatureSet{} for _, seg := range fs { ff = append(ff, &feat.Feature{ Start: util.Max(seg.Start(), self.Start()), End: util.Min(seg.End(), self.End()), }) } return self.Compose(ff) }
func (self *Seq) stitch(f []*interval.Interval) (ts []byte) { for _, seg := range f { ts = append(ts, self.Seq[util.Max(seg.Start()-self.Offset, 0):util.Min(seg.End()-self.Offset, len(self.Seq))]...) } return }
// Merge a range of intervals provided by r. Returns merged intervals in a slice and // intervals contributing to merged intervals groups in a slice of slices. func (self *Interval) flatten(r chan *Interval, tolerance int) (flat []*Interval, rich [][]*Interval) { flat = []*Interval{} rich = [][]*Interval{{}} min, max := util.MaxInt, util.MinInt var last *Interval for current := range r { if last != nil && current.start-tolerance > max { n, _ := New(current.seg, min, max, 0, nil) flat = append(flat, n) min = current.start max = current.end rich = append(rich, []*Interval{}) } else { min = util.Min(min, current.start) max = util.Max(max, current.end) } rich[len(rich)-1] = append(rich[len(rich)-1], current) last = current } n, _ := New(last.seg, min, max, 0, nil) flat = append(flat, n) return }
func (self *Quality) stitch(fs []*interval.Interval) (tq []Qsanger) { for _, seg := range fs { tq = append(tq, self.Qual[util.Max(seg.Start()-self.Offset, 0):util.Min(seg.End()-self.Offset, len(self.Qual))]...) } return }
// Join sequentially order disjunct segments of the sequence, returning any error. func (self *Seq) Stitch(f feat.FeatureSet) (err error) { tr := interval.NewTree() var i *interval.Interval for _, feature := range f { i, err = interval.New(emptyString, feature.Start, feature.End, 0, nil) if err != nil { return } else { tr.Insert(i) } } span, err := interval.New(emptyString, self.offset, self.End(), 0, nil) if err != nil { panic("packed: Sequence.End() < Sequence.Start()") } fs, _ := tr.Flatten(span, 0, 0) l := 0 for _, seg := range fs { l += util.Min(seg.End(), self.End()) - util.Max(seg.Start(), self.Start()) } t := &Seq{} *t = *self t.S = &Packing{Letters: make([]alphabet.Pack, 0, (l+3)/4)} var tseg seq.Sequence for _, seg := range fs { tseg, err = self.Subseq(util.Max(seg.Start(), self.Start()), util.Min(seg.End(), self.End())) if err != nil { return } s := tseg.(*Seq).S s.Align(seq.Start) t.S.Align(seq.End) t.S.Letters = append(t.S.Letters, s.Letters...) t.S.RightPad = s.RightPad } *self = *t return }
// Compose provides a function that may be used by polymer types to implement Composer. // It makes use of reflection and so may be slower than type-specific implementations. // This is the reference implementation and should be used to compare type-specific // implementation against in testing. func Compose(pol interface{}, offset int, f feat.FeatureSet) (s []interface{}, err error) { pv := reflect.ValueOf(pol) pLen := pv.Len() end := pLen + offset tv := make([]reflect.Value, len(f)) for i, seg := range f { if seg.End < seg.Start { return nil, bio.NewError("Feature End < Start", 0, f) } l := util.Min(seg.End, end) - util.Max(seg.Start, offset) tv[i] = reflect.MakeSlice(pv.Type(), l, l) reflect.Copy(tv[i], pv.Slice(util.Max(seg.Start-offset, 0), util.Min(seg.End-offset, pLen))) } s = make([]interface{}, len(tv)) for i := range tv { s[i] = tv[i].Interface() } return }
func (self *Interval) merge(i *Interval, overlap int) (inserted *Interval, removed []*Interval) { r := make(chan *Interval) removed = []*Interval{} wait := make(chan struct{}) go func() { defer close(wait) min, max := util.MaxInt, util.MinInt for old := range r { min, max = util.Min(min, old.start), util.Max(max, old.end) removed = append(removed, old) } i.start, i.end = util.Min(i.start, min), util.Max(i.end, max) inserted = i // TODO: Do something sensible when only one interval is found and the only action is to extend or ignore }() self.intersect(i, overlap, r) close(r) <-wait return }
// Create a new KmerRainbow defined by the rectangle r, kmerindex index and background color. func NewKmerRainbow(r image.Rectangle, index *kmerindex.Index, background color.HSVA) *KmerRainbow { // should generalise the BG color h := r.Dy() kmers := make([]int, h) kmask := util.Pow4(index.GetK()) kmaskf := float64(kmask) f := func(index *kmerindex.Index, _, kmer int) { kmers[int(float64(kmer)*float64(h)/kmaskf)]++ } index.ForEachKmerOf(index.Seq, 0, index.Seq.Len(), f) max := util.Max(kmers...) return &KmerRainbow{ RGBA: image.NewRGBA(r), Index: index, Max: max, BackGround: background, } }
// Method to align two sequences using the Smith-Waterman algorithm. Returns an alignment or an error // if the scoring matrix is not square. func (a *Aligner) Align(reference, query *seq.Seq) (aln seq.Alignment, err error) { gap := len(a.Matrix) - 1 for _, row := range a.Matrix { if len(row) != gap+1 { return nil, bio.NewError("Scoring matrix is not square.", 0, a.Matrix) } } r, c := reference.Len()+1, query.Len()+1 table := make([][]int, r) for i := range table { table[i] = make([]int, c) } max, maxI, maxJ := 0, 0, 0 var ( score int scores [3]int ) for i := 1; i < r; i++ { for j := 1; j < c; j++ { if rVal, qVal := a.LookUp.ValueToCode[reference.Seq[i-1]], a.LookUp.ValueToCode[query.Seq[j-1]]; rVal < 0 || qVal < 0 { continue } else { scores[diag] = table[i-1][j-1] + a.Matrix[rVal][qVal] scores[up] = table[i-1][j] + a.Matrix[rVal][gap] scores[left] = table[i][j-1] + a.Matrix[gap][qVal] score = util.Max(scores[:]...) if score < 0 { score = 0 } if score >= max { // greedy so make farthest down and right max, maxI, maxJ = score, i, j } table[i][j] = score } } } refAln := &seq.Seq{ID: reference.ID, Seq: make([]byte, 0, reference.Len())} queryAln := &seq.Seq{ID: query.ID, Seq: make([]byte, 0, query.Len())} for i, j := maxI, maxJ; table[i][j] != 0 && i > 0 && j > 0; { if rVal, qVal := a.LookUp.ValueToCode[reference.Seq[i-1]], a.LookUp.ValueToCode[query.Seq[j-1]]; rVal < 0 || qVal < 0 { continue } else { scores[diag] = table[i-1][j-1] + a.Matrix[rVal][qVal] scores[up] = table[i-1][j] + a.Matrix[gap][qVal] scores[left] = table[i][j-1] + a.Matrix[rVal][gap] switch d := maxIndex(scores[:]); d { case diag: i-- j-- refAln.Seq = append(refAln.Seq, reference.Seq[i]) queryAln.Seq = append(queryAln.Seq, query.Seq[j]) case up: i-- refAln.Seq = append(refAln.Seq, reference.Seq[i]) queryAln.Seq = append(queryAln.Seq, a.GapChar) case left: j-- refAln.Seq = append(refAln.Seq, a.GapChar) queryAln.Seq = append(queryAln.Seq, query.Seq[j]) } } } for i, j := 0, len(refAln.Seq)-1; i < j; i, j = i+1, j-1 { refAln.Seq[i], refAln.Seq[j] = refAln.Seq[j], refAln.Seq[i] } for i, j := 0, len(queryAln.Seq)-1; i < j; i, j = i+1, j-1 { queryAln.Seq[i], queryAln.Seq[j] = queryAln.Seq[j], queryAln.Seq[i] } aln = seq.Alignment{refAln, queryAln} return }