/*Beta function Special cases: z<0: NaN w<0: NaN */ func Beta(z float64, w float64) float64 { if z < 0 || w < 0 { return math.NaN() } a, _ := math.Lgamma(z) b, _ := math.Lgamma(w) c, _ := math.Lgamma(z + w) return math.Exp(a + b - c) }
func lfastchoose2(n, k float64) (float64, int) { // mathematically the same as lfastchoose() // less stable typically, but useful if n-k+1 < 0 // r := lgammafn_sign(n-k+1, s_choose) r, s_choose := math.Lgamma(n - k + 1) p, _ := math.Lgamma(n + 1) q, _ := math.Lgamma(k + 1) return p - q - r, s_choose }
func (sampler *Sampler) updateComponentE(targetEvent int) (lgamma float64) { var tPositive, tNegative, tNormalize float64 for k := 0; k < numTop; k++ { tPositive, _ = math.Lgamma(float64(sampler.Model.Eventtype_histogram[k]) + sampler.eventPrior) tNegative, _ = math.Lgamma(float64((sampler.Model.NumESDs)-sampler.Model.Eventtype_histogram[k]) + sampler.eventPrior) tNormalize, _ = math.Lgamma(float64(sampler.Model.NumESDs) + 2*sampler.eventPrior) lgamma += ((tPositive + tNegative) - tNormalize) } return lgamma }
func (sampler *Sampler) updateComponentP(participantID, eventID int) (lgamma float64) { // for each alternative participanttype var pPositive, pNegative, pNormalize float64 for i := 0; i < numPar; i++ { pPositive, _ = math.Lgamma(float64(sampler.Model.Participanttype_eventtype_histogram[i][eventID]) + sampler.participantPrior) pNegative, _ = math.Lgamma(float64(sampler.Model.Eventtype_histogram[eventID]-sampler.Model.Participanttype_eventtype_histogram[i][eventID]) + sampler.participantPrior) pNormalize, _ = math.Lgamma(float64(sampler.Model.Eventtype_histogram[eventID]) + 2*sampler.participantPrior) lgamma += ((pPositive + pNegative) - pNormalize) } return }
// Calculates Ix(a, b). Borrowed from // NUMERICAL RECIPES IN FORTRAN 77: THE ART OF SCIENTIFIC COMPUTING // ISBN (0-521-43064-Z) func betaInc(a, b, x float64) float64 { lgab, _ := math.Lgamma(a + b) lga, _ := math.Lgamma(a) lgb, _ := math.Lgamma(b) exp := lgab - lga - lgb + a*math.Log(x) + b*math.Log(1.0-x) bt := math.Exp(exp) if x < (a+1.0)/(a+b+2.0) { return bt * betaCF(a, b, x) / a } else { return 1.0 - bt*betaCF(b, a, 1.0-x)/b } }
// LogGeneralizedBinomial returns the log of the generalized binomial coefficient. // See GeneralizedBinomial for more information. func LogGeneralizedBinomial(n, k float64) float64 { if n < 0 || k < 0 { panic(badNegInput) } if n < k { panic(badSetSize) } a, _ := math.Lgamma(n + 1) b, _ := math.Lgamma(k + 1) c, _ := math.Lgamma(n - k + 1) return a - b - c }
func main() { for true { r := bufio.NewReader(os.Stdin) s, err := r.ReadString('\n') if err == os.EOF { break } s = strings.TrimRight(s, "\n") a := strings.Split(s, " ") f := a[0] x, err := strconv.Atof64(a[1]) switch f { case "erf": fmt.Println(math.Erf(x)) case "expm1": fmt.Println(math.Expm1(x)) case "phi": fmt.Println(phi.Phi(x)) case "NormalCDFInverse": fmt.Println(normal_cdf_inverse.NormalCDFInverse(x)) case "Gamma": fmt.Println(math.Gamma(x)) case "LogGamma": r, _ := math.Lgamma(x) fmt.Println(r) case "LogFactorial": fmt.Println(log_factorial.LogFactorial(int(x))) default: fmt.Println("Unknown function: " + f) return } } }
// Lower incomplete gamma. func lgamma(x, s float64, regularized bool) float64 { if x == 0 { return 0 } if x < 0 || s <= 0 { return math.NaN() } if x > 1.1 && x > s { if regularized { return 1.0 - ugamma(x, s, regularized) } return math.Gamma(s) - ugamma(x, s, regularized) } var ft float64 r := s c := 1.0 pws := 1.0 if regularized { logg, _ := math.Lgamma(s) ft = s*math.Log(x) - x - logg } else { ft = s*math.Log(x) - x } ft = math.Exp(ft) for c/pws > eps { r++ c *= x / r pws += c } return pws * ft / s }
// Upper incomplete gamma. func ugamma(x, s float64, regularized bool) float64 { if x <= 1.1 || x <= s { if regularized { return 1 - lgamma(x, s, regularized) } return math.Gamma(s) - lgamma(x, s, regularized) } f := 1.0 + x - s C := f D := 0.0 var a, b, chg float64 for i := 1; i < 10000; i++ { a = float64(i) * (s - float64(i)) b = float64(i<<1) + 1.0 + x - s D = b + a*D C = b + a/C D = 1.0 / D chg = C * D f *= chg if math.Abs(chg-1) < eps { break } } if regularized { logg, _ := math.Lgamma(s) return math.Exp(s*math.Log(x) - x - logg - math.Log(f)) } return math.Exp(s*math.Log(x) - x - math.Log(f)) }
// Skewness returns the skewness of the distribution. func (w Weibull) Skewness() float64 { stdDev := w.StdDev() firstGamma, firstGammaSign := math.Lgamma(1 + 3/w.K) logFirst := firstGamma + 3*(math.Log(w.Lambda)-math.Log(stdDev)) logSecond := math.Log(3) + math.Log(w.Mean()) + 2*math.Log(stdDev) - 3*math.Log(stdDev) logThird := 3 * (math.Log(w.Mean()) - math.Log(stdDev)) return float64(firstGammaSign)*math.Exp(logFirst) - math.Exp(logSecond) - math.Exp(logThird) }
/*Normalized Gamma Function (or Complementary Incomplete Gamma Function) Equal to Gamma(a, x)/Gamma(a) Evaluated by Legendre's continued fraction Special Cases: G(0, 0) = Infinity G(0, positive) = 1.0 G(0, negative) = 1/(-a) */ func G(a float64, x float64) float64 { if x == 0 { if a == 0 { return math.Inf(1) } else if a > 0 { return 1.0 } else if a < 0 { return 1.0 / math.Abs(a) } } //Evaluate Legendre's continued fraction //using Lentz's algorithm //Shift from Thomson and Barnett //Continued fraction: //http://functions.wolfram.com/GammaBetaErf/GammaRegularized/10/0003/ b0 := x + 1.0 - a C := 1.0 / (10E-30) D := 1.0 / b0 if b0 == 0 { D = 10E30 } f := D //numerator an := func(n int) float64 { if n == 0 { return 1.0 } return -1.0 * float64(n) * (float64(n) - a) } //denominator bn := func(n int) float64 { return x + (float64(2*n + 1)) - a } //Lentz's algorithm until machine precision or 1,000 iterations for j := 1; j < 1000; j++ { D = bn(j) + an(j)*D if math.Abs(D) < 10E-20 { D = 10E-30 } C = bn(j) + an(j)/C if math.Abs(C) < 10E-20 { C = 10E-30 } D = 1.0 / D del := D * C f *= del if math.Abs(del-1.0) < 10E-15 { break } } lnGa, _ := math.Lgamma(a) return f * math.Exp(-x+a*math.Log(x)-lnGa) }
// compute document likelihood of the events in the current esd // all participant labelings will stay constant -> no need to compute them! func (sampler *Sampler) documentLikelihood(label Label) float64 { var wordTypeFactor, wordFactor, wordNorm float64 var typeWordTotal int documentLikelihood := 0.0 // iterate over eventtypes for k := 0; k < numTop; k++ { wordFactor = 0.0 typeWordTotal = 0 // iterate over terms in event-vocab for term, histogram := range sampler.Model.Word_eventtype_histogram { typeWordTotal += histogram[k] // compute LGamma(N(word,event) + prior + udpate) wordTypeFactor, _ = math.Lgamma(float64(histogram[k]) + sampler.EventlmPriors[k][term]) wordFactor += wordTypeFactor } // normalize LGamma(N(words_by_event) + V*prior + total_update) wordNorm, _ = math.Lgamma(float64(typeWordTotal) + sum(sampler.EventlmPriors[k])) documentLikelihood += (wordFactor - wordNorm) } return documentLikelihood }
// compute document likelihood of the participant realization in question, given the proposed label // all event doc likelihoods will stay constant w.r.t. change -> no need to compute them! func (sampler *Sampler) documentLikelihoodP(event int, participant int, label Label) float64 { var wordTypeFactor, wordFactor, wordNorm float64 var typeWordTotal /*, update*/ int documentLikelihood := 0.0 // iterate over participanttypes for i := 0; i < numPar; i++ { wordFactor = 0.0 typeWordTotal = 0 // iterate over terms in participant vocab for term, histogram := range sampler.Model.Word_participanttype_histogram { typeWordTotal += histogram[i] // set 'update' according to the number of times term is present in current particip descr // compute LGamma(N(word,part) + prior + update) wordTypeFactor, _ = math.Lgamma(float64(histogram[i]) + sampler.ParticipantlmPriors[i][term]) wordFactor += wordTypeFactor } // normalize wordNorm, _ = math.Lgamma(float64(typeWordTotal) + sum(sampler.ParticipantlmPriors[i])) documentLikelihood += (wordFactor - wordNorm) } return documentLikelihood }
//Gamma Distribution PDF func (g *Gamma) PDF(x float64) float64 { if x <= 0 { return math.NaN() } if math.IsInf(x, 1) { return 1.0 } else if math.IsInf(x, -1) { return 0.0 } lga, _ := math.Lgamma(g.alpha) logp := (g.alpha * math.Log(g.beta)) + ((g.alpha - 1.0) * math.Log(x)) - (x * g.beta) - lga return math.Exp(logp) }
func TestLnGamma(t *testing.T) { acc := 0.0000001 check := func(x, y float64) bool { if false { return x == y } return math.Abs(x-y) < acc } for i := 0; i < 100; i++ { x := NextGamma(10, 10) g1 := LnΓ(x) g2, _ := math.Lgamma(x) if !check(g1, g2) { t.Error(fmt.Sprintf("For %v: %v vs %v", x, g1, g2)) } } //var start int64 Seed(10) start := time.Now() for i := 0; i < 1e6; i++ { x := NextGamma(10, 10) math.Lgamma(x) } now := time.Now() duration2 := float64(now.Sub(start)) / 1e9 //duration2 := float64(time.Now()-start) / 1e9 Seed(10) start = time.Now() for i := 0; i < 1e6; i++ { x := NextGamma(10, 10) LnΓ(x) } now = time.Now() duration1 := float64(now.Sub(start)) / 1e9 fmt.Printf("Mine was %f\nTheirs was %f\n", duration1, duration2) }
// incGammaSeries computes the incomplete gamma function via the // series sum: // // gamma(a, x) = exp(-x) x^a sum_n (Gamma(a) / Gamma(a + 1 + n)) x^n. func incGammaSeries(a, x float64) float64 { an := a termVal := 1 / an sum := 1 / an for n := 0; n < ConvergenceIters; n++ { an++ termVal *= x / float64(an) sum += termVal if CloseEnough(sum, termVal) { // Note that x^a = exp(a * log(x)). lg, _ := math.Lgamma(a) return sum * math.Exp(-x+a*math.Log(x)-lg) } } fmtStr := "incGammaSeries(%g, %g) failed to converge" panic(fmt.Sprintf(fmtStr, a, x)) }
// incGammaContinuedFraction computes the incomplete gamma function // via the contiued fraction: // // exp(-x) x^ a (1 / (x + 1 - a - (1 (1 - a)) / (x + 3 - a - ...))). // // This is implemented via modified Lentz's method. func incGammaContinuedFraction(a, x float64) float64 { minValue := math.SmallestNonzeroFloat64 // Start computing terms at n = 1. // c_n = A_n / A_n-1. Note that A_0 = a_0 = 0. c := math.MaxFloat64 // c_n = B_n-1 / B_n. Note that B_0 = b_0 = 1. b := x + 1 - a d := 1 / b fracEst := d for n := 1; n <= ConvergenceIters; n++ { an := -float64(n) * (float64(n) - a) b += 2.0 d = an*d + b if math.Abs(d) < minValue { d = minValue } c = an/c + b if math.Abs(c) < minValue { c = minValue } d = 1 / d diff := d * c fracEst *= diff if CloseEnough(1, diff-1) { lg, _ := math.Lgamma(a) return fracEst * math.Exp(-x+a*math.Log(x)-lg) } } fmtStr := "incGammaContinuedFraction(%g, %g) failed to converge" panic(fmt.Sprintf(fmtStr, a, x)) }
//Based on similar code in rf-ace (Apache 2.0, Timo Erkkilä) func lgamma(x float64) float64 { v, _ := math.Lgamma(x) //v := math.Log(math.Abs(math.Gamma(x))) return v }
func lchoose(n, k int) float64 { a, _ := math.Lgamma(float64(n + 1)) b, _ := math.Lgamma(float64(k + 1)) c, _ := math.Lgamma(float64(n - k + 1)) return a - b - c }
// Lgamma computes the log gamma function, but ignores domain errors. func Lgamma(x float64) (y float64) { y, _ = math.Lgamma(x) return }
func Lgamma(x float64) float64 { val, _ := math.Lgamma(x) return val }
func (l *LdaModeler) model_one_pass(tokenss_ptr *[]util.Document, seed int, initial_topicss [][]int) *ModelResults { // log.Printf("we're in %p\n", l) // log.Printf("from %p: address of initial_topicss: %p\n", l, &initial_topicss) tokenss := *tokenss_ptr ntopics := l.ntopics ndocuments := len(tokenss) range_ntopics := util.PyRange(ntopics) range_ndocuments := util.PyRange(ndocuments) // set up a random number generator randomizer := rand.New(rand.NewSource(int64(len(tokenss) + seed))) // topicss is an document-token matrix where the values are topic assignments. topicss := make([][]int, len(tokenss)) if initial_topicss == nil { // set up new, randomly initialized matrix for i, doc := range tokenss { doc_topics := make([]int, len(doc)) for j, _ := range doc { // randomly assign each token in doc to a random topic doc_topics[j] = randomizer.Intn(ntopics) } topicss[i] = doc_topics } } else { // we were passed in a topic matrix; let's just set up our own local copy for i, doc := range initial_topicss { // NOTE: We *cannot* just say topicss[i] = doc- shallow copy vs. deep copy, pointers, etc. etc.- here there be possibilities for threading havoc! topicss[i] = make([]int, len(doc)) copy(topicss[i], doc) } } // compute doc-topic representation counts and topic-word representation counts document_tokens_counts := make([]int, len(tokenss)) for idx, doc := range tokenss { document_tokens_counts[idx] = len(doc) } document_topics_counts := make([][]int, len(tokenss)) topic_words_counts := make([]map[util.Token]int, ntopics) // maps in Go will, if asked for an element that doesn't exist, give the zero-value for that type (as well as an optional second return value indicating whether it was found or not) http://golang.org/doc/effective_go.html#maps total_topic_counts := make([]int, ntopics) // n.b.: the values in a "fresh" just-made slice are the zero-value for that type. for i, tokens := range tokenss { // for each document topics := topicss[i] counts := make([]int, ntopics) for j, token := range tokens { // for each token topic := topics[j] counts[topic] += 1 // count of topic mentions in this document if topic_words_counts[topic] == nil { topic_words_counts[topic] = map[util.Token]int{} } topic_words_counts[topic][token] += 1 // count tokens mentions for this topic total_topic_counts[topic] += 1 // total count of this topic } document_topics_counts[i] = counts // = append(document_topics_counts, counts) } all_keys := make([][]util.Token, len(range_ntopics)) // a set of per-topic vocab lists for i := range range_ntopics { all_keys[i] = util.KeysFromMap(topic_words_counts[i]) } all_tokens := util.SetFromLists(all_keys) // Dirichlet smoothing parameters alpha := l.alpha beta := l.beta W := len(all_tokens) // size of vocab: num of possible unique words in each topic T := ntopics // num topics betaW := beta * float64(W) alphaT := alpha * float64(T) // maxT := ntopics - 1 // uniform_random_func := randomizer.Float64 // seems to be equivalent to Python's random()- uniform dist between [0.0, 1.0] // loop over all docs and all tokens, resampling topic assignments & adjusting counts proportional_probabilities := make([]float64, ntopics) // probability of each topic fixups := make([]int, ntopics) // which topics need count adjustment for current token for iteration := 0; iteration < l.iterations; iteration++ { change_count := 0 for t_idx, tokens := range tokenss { // for each document topics := topicss[t_idx] document_index := range_ndocuments[t_idx] current_document_topics_counts := document_topics_counts[document_index] current_document_tokens_count := document_tokens_counts[document_index] n_di_minus_i := float64(current_document_tokens_count - 1) for token_index, token := range tokens { // for each token // Based on: // Griffiths TL, Steyvers M. Finding scientific topics. // Proceedings of the National Academy of Sciences of the United States of America. 2004;101(Suppl 1):5228-5235. // get topic assignment for current token: topic := topics[token_index] // compute conditional probabilities for each topic, // the "fixups" list is an optimization to avoid branching. fixups[topic] = 1 total_proportional_probabilities := 0.0 for _, j := range range_ntopics { // for each topic fixup := fixups[j] n_wi_minus_i_j := float64(topic_words_counts[j][token] - fixup) // most of the time, fixup will be zero n_di_minus_i_j := float64(current_document_topics_counts[j] - fixup) // ditto n_dot_minus_i_j := float64(total_topic_counts[j] - fixup) // eq. 5 from above paper p_token_topic := (n_wi_minus_i_j + beta) / (n_dot_minus_i_j + betaW) p_topic_document := (n_di_minus_i_j + alpha) / (n_di_minus_i + alphaT) p := p_topic_document * p_token_topic proportional_probabilities[j] = p total_proportional_probabilities += p } // end for topics fixups[topic] = 0 // resample current token topic, integrate the inline version of resample function new_topic := l.resample(randomizer.Float64(), proportional_probabilities, total_proportional_probabilities) // update assignments & counts if new_topic != topic { // update topic label for this token: topics[token_index] = new_topic // update total topic counts: total_topic_counts[topic] -= 1 total_topic_counts[new_topic] += 1 // update document-topic counts current_document_topics_counts[topic] -= 1 current_document_topics_counts[new_topic] += 1 topic_words_counts[topic][token] -= 1 topic_words_counts[new_topic][token] += 1 // count changes for this pass change_count += 1 } } // end for tokens } // end for document // log.Printf("LDA - iteration %d resulted in %d changes.\n", iteration, change_count) if iteration%100 == 0 { log.Printf("LDA - iteration %d of %d.\n", iteration, l.iterations) } } // for iterations // document-topic assignments (theta_hat_d_j) theta_hat_ds := make([][]float64, ndocuments) for document_index := 0; document_index < ndocuments; document_index++ { document_token_count := document_tokens_counts[document_index] theta_hat_d := make([]float64, ntopics) document_topics_count := document_topics_counts[document_index] if document_token_count > 0 { for j := 0; j < ntopics; j++ { p := (float64(document_topics_count[j]) + alpha) / (float64(document_token_count) + alphaT) theta_hat_d[j] = p } } else { // degenerate document with no tokens- equal prob of all topics // temp := float64((1.0 / ntopics) * ntopics) for j := range range_ntopics { theta_hat_d[j] = float64(1.0 / ntopics) //temp } } theta_hat_ds[document_index] = theta_hat_d } // compute topic-token assignments (phi_hat_w_j in paper) phi_hats := make([]util.TokenProbMap, ntopics) for t := 0; t < ntopics; t++ { // for each topic dx := util.TokenProbMap{} for token, top_tok_count := range topic_words_counts[t] { // for each token dx[token] = util.Probability((float64(top_tok_count) + beta) / (float64(total_topic_counts[t]) + betaW)) } phi_hats[t] = dx } // compute log-likelihood of tokens given topic model; Eq. 2 in Steyvers paper part_1, _ := math.Lgamma(float64(W) * beta) // note that Lgamma returns both the gamma, as well as a sign indicator- we don't care about the latter here part_2, _ := math.Lgamma(beta) log_likelihood := float64(T) * (part_1 - (float64(W) * part_2)) for t := 0; t < ntopics; t++ { for _, w := range all_tokens { n_t_w := topic_words_counts[t][w] ntw_gamma, _ := math.Lgamma(float64(n_t_w) + beta) log_likelihood += ntw_gamma } n_dot_t := total_topic_counts[t] ndt_gamma, _ := math.Lgamma(float64(n_dot_t) + betaW) log_likelihood -= ndt_gamma } log.Printf("LDA - log-likelihood of data given model: %0.8e\n", log_likelihood) // sum over samples: // See definition in: // Chemudugunta C, Steyvers PSM. Modeling General and Specific Aspects of Documents with a Probabilistic Topic Model. // In: Advances in Neural Information Processing Systems 19: Proceedings of the 2006 Conference. MIT Press; 2007. p. 241. // // modified implementation to add the logs of the P(tokens)'s rather than multiply // the P(tokens) and then take the log in order to avoid underflowing to zero. perplexity := 0.0 ntokens := 0 for doc_idx, tokens := range tokenss { // each document theta_hat_d := theta_hat_ds[doc_idx] for _, w := range tokens { // each token temp_phi_hat_theta_hat := make([]float64, len(range_ntopics)) for j, z := range range_ntopics { // each topic temp_phi_hat_theta_hat[j] = float64(phi_hats[z][w]) * theta_hat_d[z] } perplexity += math.Log2(util.SumFloat(temp_phi_hat_theta_hat)) ntokens += 1 } } perplexity = math.Pow(2.0, (-perplexity / float64(ntokens))) log.Printf("LDA - mean sample token perplexity of data given model: %0.4f\n", perplexity) // save final snapshot of token-topic assignment snapshot if l.last_model_token_topic_sample_assignments == nil { l.last_model_token_topic_sample_assignments = topicss } // return results sample return &ModelResults{&theta_hat_ds, &phi_hats, log_likelihood, perplexity} }
func Lgamma(f float32) (lgamma float32, sign int) { lgamma64, sign := math.Lgamma(float64(f)) lgamma = float32(lgamma64) return }
func lgamma(x float64) float64 { y, _ := math.Lgamma(x) return y }