func checkPackedAlpha(alpha alphabet.Nucleic) error { if alpha.Len() != 4 { return bio.NewError("Cannot create packed sequence with alphabet length != 4", 0, alpha) } for _, v := range alphabet.BytesToLetters([]byte(alpha.String())) { if c, ok := alpha.Complement(v); ok && alpha.IndexOf(v) != alpha.IndexOf(c)^0x3 { // TODO: Resolution to the following problem: // Normal nucleotide alphabets (ACGT/ACGU) are safe with this in either case sensitive or // insensitive. Other alphabets may not be, in this case specify case sensitive. return bio.NewError("alphabet order not consistent with bit operations for packed.", 0, alpha) } } return nil }
// Pack bytes that conform to a into a slice of alphabet.Pack. Panics if a byte in s does not conform. func PackLetters(a alphabet.Nucleic, s ...alphabet.Letter) (p *Packing) { p = &Packing{ Letters: make([]alphabet.Pack, (len(s)+3)/4), RightPad: int8(4-len(s)&3) & 3, } for i, c := range s { if !a.IsValid(c) { panic("packed: invalid letter") } p.Letters[i/4] <<= 2 p.Letters[i/4] |= alphabet.Pack(a.IndexOf(c)) } if sc := uint(len(s)) & 3; sc != 0 { p.Letters[len(p.Letters)-1] <<= (4 - sc) << 1 } return }
// Create a new Seq with the given id, letter sequence and alphabet. func NewSeq(id string, b []alphabet.Letter, alpha alphabet.Nucleic) (p *Seq, err error) { defer func() { if r := recover(); r != nil { _, pos := alpha.AllValid(b) err = bio.NewError(fmt.Sprintf("Encoding error: %s %q at position %d.", r, b[pos], pos), 1, b) } }() err = checkPackedAlpha(alpha) if err != nil { return } p = &Seq{ ID: id, S: PackLetters(alpha, b...), alphabet: alpha, Strand: 1, Stringify: Stringify, } return }