// Here we use all the TF samples for the class and convert // them to TF-IDF https://en.wikipedia.org/wiki/Tf%E2%80%93idf // once we have finished learning all the classes and have the totals. func (c *Classifier) ConvertTermsFreqToTfIdf() { if c.DidConvertTfIdf { panic("Cannot call ConvertTermsFreqToTfIdf more than once. Reset and relearn to reconvert.") } for className, _ := range c.datas { for wIndex, _ := range c.datas[className].FreqTfs { tfIdfAdder := float64(0) for tfSampleIndex, _ := range c.datas[className].FreqTfs[wIndex] { // we always want a possitive TF-IDF score. tf := c.datas[className].FreqTfs[wIndex][tfSampleIndex] c.datas[className].FreqTfs[wIndex][tfSampleIndex] = math.Log1p(tf) * math.Log1p(float64(c.learned)/float64(c.datas[className].Total)) tfIdfAdder += c.datas[className].FreqTfs[wIndex][tfSampleIndex] } // convert the 'counts' to TF-IDF's c.datas[className].Freqs[wIndex] = tfIdfAdder } } // sanity check c.DidConvertTfIdf = true }
// logaddexp performs log(exp(x) + exp(y)) func logaddexp(x, y float64) float64 { tmp := x - y if tmp > 0 { return x + math.Log1p(math.Exp(-tmp)) } else if tmp <= 0 { return y + math.Log1p(math.Exp(tmp)) } else { // Nans, or infinities of the same sign involved log.Printf("logaddexp %f %f", x, y) return x + y } }
// Random gamma variable when shape<1 // See Kundu and Gupta 2007 // "A convenient way of generating gamma random variables using generalized exponential distribution" func (rg RandGen) rgamma2(shape float64) float64 { if shape <= 0.0 || shape >= 1.0 { panic("Illegal parameter. Shape must be positive and no greater than one") } d := 1.0334 - 0.0766*math.Exp(2.2942*shape) // Constants from paper a := math.Exp2(shape) * math.Pow(-math.Expm1(-d/2), shape) pdsh := math.Pow(d, shape-1.0) b := shape * pdsh * math.Exp(-d) c := a + b start: u := rg.Float64() var x float64 if u <= a/c { x = -2.0 * math.Log1p(-math.Pow(c*u, 1.0/shape)/2.0) } else { x = -math.Log(c * (1.0 - u) / (shape * pdsh)) } v := rg.Float64() if x <= d { p := math.Pow(x, shape-1.0) * math.Exp(-x/2.0) / (math.Exp2(shape-1.0) * math.Pow(-math.Expm1(-x/2.0), shape-1.0)) if v > p { goto start } } else { if v > math.Pow(d/x, 1.0-shape) { goto start } } return x }
func (node *Node) recalc() { if node.Visits == 10 { node.value = 1 + 0.1*rand.Float64() return } node.Mean = node.Wins / node.Visits node.blendedMean = node.Mean rave := node.config.AMAF || node.config.Neighbors || node.config.Ancestor if rave { beta := math.Sqrt(node.config.RAVE / (3*node.Visits + node.config.RAVE)) if beta > 0 { node.amafMean = node.amafWins / node.amafVisits node.ancestorMean = node.ancestorWins / node.ancestorVisits if math.IsNaN(node.Mean) { node.Mean = 0 } if math.IsNaN(node.amafMean) { node.amafMean = 0 } if math.IsNaN(node.ancestorMean) { node.ancestorMean = 0 } estimatedMean := 0.0 Samples := 0.0 if node.config.AMAF { estimatedMean += node.amafMean Samples++ } if node.config.Neighbors { neighborWins := 0.0 neighborVisits := 0.0 for sibling := node.parent.Child; sibling != nil; sibling = sibling.Sibling { if sibling.Vertex != node.Vertex { neighborWins += sibling.Wins neighborVisits += sibling.Visits } } estimatedMean += neighborWins / neighborVisits } if node.config.Ancestor { estimatedMean += node.ancestorMean Samples++ } estimatedMean /= Samples if math.IsNaN(estimatedMean) { estimatedMean = 0 } node.blendedMean = beta*estimatedMean + (1-beta)*node.Mean } } r := math.Log1p(node.parent.Visits) / node.Visits v := 1.0 if node.config.Var { v = math.Fmin(0.25, node.blendedMean-(node.blendedMean*node.blendedMean)+math.Sqrt(2*r)) } node.value = node.blendedMean + node.config.Explore*math.Sqrt(r*v) }
//Natural logarithm of the Gamma function func LnΓ(x float64) (res float64) { res = (x-0.5)*math.Log(x+4.5) - (x + 4.5) res += logsqrt2pi res += math.Log1p( 76.1800917300/(x+0) - 86.5053203300/(x+1) + 24.0140982200/(x+2) - 1.23173951600/(x+3) + 0.00120858003/(x+4) - 0.00000536382/(x+5)) return }
func expm1(x float64) float64 { var y float64 a := math.Abs(x) if a < math.SmallestNonzeroFloat64 { y = x } else if a > 0.697 { y = exp(x) - 1 // negligible cancellation } else { if a > 1e-8 { y = exp(x) - 1 } else { // Taylor expansion, more accurate in this range y = (x/2 + 1) * x } // Newton step for solving log(1 + y) = x for y // WARNING: does not work for y ~ -1: bug in 1.5.0 y -= (1 + y) * (math.Log1p(y) - x) } //else return y }
// Idf is the inverse document frequency of tf-idf // param: // return: func Idf(word string, doc_list []string, log string) (idf float64) { // set val for reuse; +1 so we don't get +Inf values val := float64(len(doc_list)+1) / (NumDocsContain(word, doc_list) + 1) switch log { case "log": idf = math.Log(val) //Log returns the natural logarithm of x. case "log10": idf = math.Log10(val) //Log10 returns the decimal logarithm of x. case "nolog": idf = val //no logarithm case "log1p": idf = math.Log1p(val) //Log1p natural log of 1 plus its argument x case "log2": idf = math.Log2(val) //Log2 returns the binary log of x. default: idf = math.Log(val) } return }
// log(exp(a) + exp(b)), evaluated in a numerically stable way. func LogAddExp(a, b float64) float64 { if b > a { a, b = b, a } return a + math.Log1p(math.Exp(b-a)) }
func ext۰math۰Log1p(fr *Frame, args []Value) Value { return math.Log1p(args[0].(float64)) }
// float32 version of math.Log1p func Log1p(x float32) float32 { return float32(math.Log1p(float64(x))) }
// The exponential distribution has the form // p(x) dx = exp(-x/mu) dx/mu // for x = 0 ... + infty func ExponentialRand(mu float64) (x float64) { u := rand.Float64() x = -mu * math.Log1p(-u) return }
func (u *Log) Eval(t float64, x, c, s []float64) float64 { return math.Log1p(u.C.Eval(t, x, c, s)) }