/* KullbackLeiblerDivergence comput and return the divergence of two string based on their character probabability. */ func KullbackLeiblerDivergence(a, b string) (divergence float64) { aCharsd, aValuesd := tekstus.CountAlnumDistribution(a) bCharsd, bValuesd := tekstus.CountAlnumDistribution(b) sumValuesA := numerus.IntsSum(aValuesd) sumValuesB := numerus.IntsSum(bValuesd) charsDiff := tekstus.RunesDiff(aCharsd, bCharsd) aMin, _, _ := numerus.IntsFindMin(aValuesd) bMin, _, _ := numerus.IntsFindMin(bValuesd) min := aMin if bMin < aMin { min = bMin } epsilon := float64(min) * 0.001 gamma := 1.0 - (float64(len(charsDiff)) * epsilon) // Check if sum of a up to 1. var sum float64 for _, v := range aValuesd { sum += float64(v) / float64(sumValuesA) } sumDiff := 1 - math.Abs(sum) if sumDiff > 0.000009 { return 0 } sum = 0 for _, v := range bValuesd { sum += float64(v) / float64(sumValuesB) } sumDiff = 1 - math.Abs(sum) if sumDiff > 0.000009 { return 0 } for x, v := range aCharsd { probA := float64(aValuesd[x]) / float64(sumValuesA) probB := epsilon contain, atIdx := tekstus.RunesContain(bCharsd, v) if contain { probB = gamma * (float64(bValuesd[atIdx]) / float64(sumValuesB)) } divergence += (probA - probB) * math.Log(probA/probB) } return divergence }
func doRunesDiff(t *testing.T, l, r, exp []rune) { got := tekstus.RunesDiff(l, r) assert(t, string(exp), string(got), true) }