func probStr(p seq.Prob) string { if p.IsMin() { return "*" } scaled := int(-seq.Prob(hmmScale) * p) return fmt.Sprintf("%d", scaled) }
// readNeff reads a diversity value. func readNeff(fstr string) (seq.Prob, error) { f, err := seq.NewProb(fstr) if err != nil { return f, fmt.Errorf("Error reading neff '%s': %s", fstr, err) } if f.IsMin() { return f, nil } return f / seq.Prob(hmmScale), nil }
// AlignmentProb computes the probability of the sequence `s` aligning // with the profile in `frag`. The sequence must have length equivalent // to the fragment size. func (lib *sequenceProfile) AlignmentProb(fragi int, s seq.Sequence) seq.Prob { frag := lib.Fragments[fragi] if s.Len() != frag.Len() { panic(fmt.Sprintf("Sequence length %d != fragment size %d", s.Len(), frag.Len())) } prob := seq.Prob(0.0) for c := 0; c < s.Len(); c++ { prob += frag.Emissions[c].Lookup(s.Residues[c]) } return prob }
// Slice dices up an entire HHM file. The slice indices should be in terms of // the number of match/delete states in the underlying HMM. // All secondary structure annotations are also sliced. // The multiple sequence alignment is also sliced. // The NEFF for the HHM is also re-computed as the average of all NeffM scores // in each HMM column. func (hhm *HHM) Slice(start, end int) *HHM { hmm := hhm.HMM.Slice(start, end) meta := hhm.Meta meta.Neff = 0 for _, node := range hmm.Nodes { meta.Neff += node.NeffM } meta.Neff /= seq.Prob(len(hmm.Nodes)) return &HHM{ Meta: meta, Secondary: hhm.Secondary.Slice(start, end), MSA: hhm.MSA.Slice(start, end), HMM: hmm, } }
func readMeta(buf *bytes.Buffer) (*HHR, error) { hhr := &HHR{} for { line, err := buf.ReadBytes('\n') if err == io.EOF && len(line) == 0 { break } if err != nil && err != io.EOF { return nil, err } line = trim(line) switch { case hasPrefix(line, "Query"): hhr.Query = str(line[5:]) case hasPrefix(line, "Match_columns"): hhr.MatchColumns, err = strconv.Atoi(str(line[13:])) if err != nil { return nil, err } case hasPrefix(line, "No_of_seqs"): hhr.NumSeqs = str(line[10:]) case hasPrefix(line, "Neff"): f, err := strconv.ParseFloat(str(line[4:]), 64) if err != nil { return nil, err } hhr.Neff = seq.Prob(f) case hasPrefix(line, "Searched_HMMs"): hhr.SearchedHMMs, err = strconv.Atoi(str(line[13:])) if err != nil { return nil, err } case hasPrefix(line, "Date"): hhr.Date = str(line[4:]) case hasPrefix(line, "Command"): hhr.Command = str(line[7:]) } } return hhr, nil }
func readMeta(buf *bytes.Buffer) (Meta, error) { meta := Meta{} for { line, err := buf.ReadBytes('\n') if err == io.EOF && len(line) == 0 { break } if err != nil && err != io.EOF { return Meta{}, err } line = trim(line) switch { case hasPrefix(line, "HH"): meta.FormatVersion = str(line) case hasPrefix(line, "NAME"): meta.Name = str(line[4:]) case hasPrefix(line, "FAM"): meta.Fam = str(line[3:]) case hasPrefix(line, "FILE"): meta.File = str(line[4:]) case hasPrefix(line, "LENG"): meta.Leng = str(line[4:]) case hasPrefix(line, "FILT"): meta.Filt = str(line[4:]) case hasPrefix(line, "NEFF"): // You'd think we could use readNeff here, but does the HHM // format store all Neff values equally? NOOOOOOOOOOOOOOOOOOOO. f, err := strconv.ParseFloat(str(line[4:]), 64) if err != nil { return Meta{}, err } meta.Neff = seq.Prob(f) case hasPrefix(line, "EVD"): fields := bytes.Fields(bytes.TrimSpace(line[3:])) if len(fields) != 2 { return Meta{}, fmt.Errorf("Invalid EVD format: '%s'", line) } lambda, err := strconv.ParseFloat(string(fields[0]), 64) if err != nil { return Meta{}, fmt.Errorf("Error EVD lambda '%s': %s", string(fields[0]), err) } meta.EvdLambda = lambda mu, err := strconv.ParseFloat(string(fields[1]), 64) if err != nil { return Meta{}, fmt.Errorf("Error EVD mu '%s': %s", string(fields[1]), err) } meta.EvdMu = mu case hasPrefix(line, "PCT"): meta.Pct = true case hasPrefix(line, "DESC"): meta.Desc = str(line[4:]) case hasPrefix(line, "COM"): meta.Com = str(line[3:]) case hasPrefix(line, "DATE"): meta.Date = str(line[4:]) } } return meta, nil }
func neffStr(p seq.Prob) string { scaled := int(seq.Prob(hmmScale) * p) return fmt.Sprintf("%d", scaled) }