func TestRandomDatasetHasExpectedStatistics(t *testing.T) { tests := []struct { numSamples int probability float64 }{ {100000, 0.02}, {100000, 0.5}, {100000, 0.9}, } for _, tt := range tests { d := randomDataset(tt.numSamples, tt.probability) t.Log() t.Log(d.String()) if !near(d.Calibration(), 1.0) { t.Errorf("Calibration: expected %v, had %v", 1.0, d.Calibration()) } expectedLogScore := tt.probability*math.Log2(tt.probability) + (1-tt.probability)*math.Log2(1-tt.probability) if !near(d.LogScore(), expectedLogScore) { t.Errorf("Logscore: expected %v, had %v", expectedLogScore, d.LogScore()) } if !near(d.NormalizedEntropy(), 1.0) { t.Errorf("Entropy: expected %v, had %v", 1.0, d.NormalizedEntropy()) } if !near(d.ROC(), 0.5) { t.Errorf("ROC: expected %v, had %v", 0.5, d.ROC()) } } }
// Calculates the entropy of each symbol, based on the counts of each symbol. The // result is similar to the result of CalculateBitLengths, but with the // actual theoritical bit lengths according to the entropy. Since the resulting // values are fractional, they cannot be used to encode the tree specified by // DEFLATE. func CalculateEntropy(count []float64) (bitLengths []float64) { var sum, log2sum float64 n := len(count) for i := 0; i < n; i++ { sum += count[i] } if sum == 0 { log2sum = math.Log2(float64(n)) } else { log2sum = math.Log2(sum) } bitLengths = make([]float64, n) for i := 0; i < n; i++ { // When the count of the symbol is 0, but its cost is requested anyway, it // means the symbol will appear at least once anyway, so give it the cost as if // its count is 1. if count[i] == 0 { bitLengths[i] = log2sum } else { bitLengths[i] = math.Log2(sum / count[i]) } if !(bitLengths[i] >= 0) { panic("bit length is not positive") } } return bitLengths }
func Js(words_left map[string]int, words_right map[string]int) float64 { len_left, len_right := 0, 0 for _, val := range words_left { len_left += val } for _, val := range words_right { len_right += val } dist := 0.0 for key, val := range words_left { p := float64(val) / float64(len_left) q := 0.0 if len_right > 0 { q = float64(words_right[key]) / float64(len_right) } dist += p * math.Log2(2*p/(p+q)) } for key, val := range words_right { p := float64(val) / float64(len_right) q := 0.0 if len_left > 0 { q = float64(words_left[key]) / float64(len_left) } dist += p * math.Log2(2*p/(p+q)) } return dist }
/** Used to build a rank directory from the given input string. @param data A javascript string containing the data, as readable using the BitString object. @param numBits The number of bits to index. @param l1Size The number of bits that each entry in the Level 1 table summarizes. This should be a multiple of l2Size. @param l2Size The number of bits that each entry in the Level 2 table summarizes. */ func CreateRankDirectory(data string, numBits, l1Size, l2Size uint) RankDirectory { bits := BitString{} bits.Init(data) var p, i uint = 0, 0 var count1, count2 uint = 0, 0 l1bits := uint(math.Ceil(math.Log2(float64(numBits)))) l2bits := uint(math.Ceil(math.Log2(float64(l1Size)))) directory := BitWriter{} for p+l2Size <= numBits { count2 += bits.Count(p, l2Size) i += l2Size p += l2Size if i == l1Size { count1 += count2 directory.Write(count1, l1bits) count2 = 0 i = 0 } else { directory.Write(count2, l2bits) } } rd := RankDirectory{} rd.Init(directory.GetData(), data, numBits, l1Size, l2Size) return rd }
// expTables prepares the exp-tables described in section 6.4 of the EIDMA report by F.M.J. Willems and Tj. J. Tjalkens. // Complexity Reduction of the Context-Tree Weighting Algorithm: A Study for KPN Research, Technical University of Eindhoven, EIDMA Report RS.97.01 func expTables() ([]uint64, []uint64) { var pow2f float64 = 1 << f A := make([]uint64, int(pow2f)+1) for i := 1; i <= int(pow2f); i++ { A[i] = uint64(pow2f*math.Exp2(-float64(i)/pow2f) + 0.5) } // B entries for (1<<(f-1)), (1<<f)-1 B := make([]uint64, int(pow2f)) for j := 1 << (f - 1); j <= (1<<f)-1; j++ { B[j] = uint64(-pow2f*math.Log2(float64(j)/pow2f) + 0.5) } // B entries for 1,(1<<(f-1))-1 for j := 1; j < (1 << (f - 1)); j++ { k := math.Ceil(float64(f) - 1 - math.Log2(float64(j))) b2kj := B[int(math.Exp2(k))*j] if b2kj == 0 { panic("") } B[j] = b2kj + uint64(k*pow2f) } return A, B }
func (corpus *Corpus) MutualInformation(seq []int) (I float64) { // Returns the mutual information, in bits, conveyed by the items in a sequence. I = math.Log2(corpus.Probability(seq)) for i := 0; i < len(seq); i++ { I -= math.Log2(corpus.Probability(seq[i : i+1])) } return I }
func solve(n float64) int { for digits := int(math.Log10(n)) + 2; ; digits++ { low := int(math.Log2(n) + float64(digits)*log2_10) high := int(math.Log2(n+1) + float64(digits)*log2_10) if low+1 == high { return high } } }
// InitOneLayer func (qt *QuadTree) InitOneLayer(xmin, xmax, ymin, ymax, z int64, resx, resy, resz float64) *QuadTree { qt = new(QuadTree) dimx := xmax - xmin + 1 dimy := ymax - ymin + 1 var depthx, depthy int if dimx > qtW { depthx = int(math.Log2(float64(dimx))+0.5) - int(math.Log2(float64(qtW))) + 1 } else { depthx = 1 } if dimy > qtH { depthy = int(math.Log2(float64(dimy))+0.5) - int(math.Log2(float64(qtH))) + 1 } else { depthy = 1 } depth := int(math.Max(float64(depthx), float64(depthy))) tasks := qt.TaskLoad(depth) fmt.Printf("The depth of this quadtree is %v with %v tasks assigned\n", depth, tasks) for i := 1; i < depth; i++ { resx *= 2.0 resy *= 2.0 } ch := make(chan bool) wg.Add(1) go qt.Construct(nil, 0, depth, -1, xmin, ymin, z, xmax, ymax, resx, resy, resz, 0, 0, 0, qtW, qtH, 1, ch, &wg) //<-ch go func() { wg.Wait() //close(ch) <-ch }() //close(ch) // for i := range ch { // fmt.Println("~~~channels ",i) // } //wg.Wait() fmt.Printf("~~~current tile %v children %v %v %v %v %v %v\n", qt, qt.TL, qt.TR, qt.BL, qt.BR, resx, resy) qt.TraverseTree() return qt }
// pushPushScale is used to scale the time interval at which push/pull // syncs take place. It is used to prevent network saturation as the // cluster size grows func pushPullScale(interval time.Duration, n int) time.Duration { // Don't scale until we cross the threshold if n <= pushPullScaleThreshold { return interval } multiplier := math.Ceil(math.Log2(float64(n))-math.Log2(pushPullScaleThreshold)) + 1.0 return time.Duration(multiplier) * interval }
func init() { flag.BoolVar(&sequenceMode, "s", false, "sequence mode") lg3 = math.Log2(3) lg5 = math.Log2(5) front = [3]cursor{ {0, 0, 1}, // 2 {1, 0, lg3}, // 3 {2, 0, lg5}, // 5 } }
func (rd *RankDirectory) Init(directoryData, bitData string, numBits, l1Size, l2Size uint) { rd.directory.Init(directoryData) rd.data.Init(bitData) rd.l1Size = l1Size rd.l2Size = l2Size rd.l1Bits = uint(math.Ceil(math.Log2(float64(numBits)))) rd.l2Bits = uint(math.Ceil(math.Log2(float64(l1Size)))) rd.sectionBits = (l1Size/l2Size-1)*rd.l2Bits + rd.l1Bits rd.numBits = numBits }
func (l labelledPredictions) NormalizedEntropy() float64 { numPositives := 0 for _, e := range l { if e.Label { numPositives += 1 } } p := float64(numPositives) / float64(l.Len()) return l.LogScore() / (p*math.Log2(p) + (1-p)*math.Log2(1-p)) }
func human_scale(value float64, base float64, unit string) string { exp := []string{"y", "z", "a", "f", "p", "n", "µ", "m", "", "k", "M", "G", "T", "P", "E", "Z", "Y"} s := math.Floor(math.Log2(value) / math.Log2(base)) h_v := value / math.Pow(base, s) if s > -9 && s < 9 { return strconv.FormatFloat(h_v, 'f', 2, 64) + " " + exp[int(s)+8] + unit } return strconv.FormatFloat(value, 'E', 6, 64) + " " + unit }
func (l labelledPredictions) LogScore() float64 { cumulativeLogLoss := 0.0 for _, e := range l { if e.Label { cumulativeLogLoss += math.Log2(e.Prediction) } else { cumulativeLogLoss += math.Log2(1 - e.Prediction) } } return cumulativeLogLoss / float64(l.Len()) }
// From http://rosettacode.org/wiki/Entropy#Go func entropy(s string) float64 { m := map[rune]float64{} for _, r := range s { m[r]++ } hm := 0. for _, c := range m { hm += c * math.Log2(c) } l := float64(len(s)) return math.Log2(l) - hm/l }
// Returns a new Bloom filter. Parameters are the expected number of elements // in the set and the desired false positive probability. Optimal size and // number of hashes are calculated based on these numbers. // // p = false positive rate of the form 1/p, powers of two preferred // optimal number of hashes k = (m/n)ln(2) func NewBloomFilter(capacity, probability int) *BloomFilter { bitSize := int64(math.Abs(math.Ceil(float64(capacity) * math.Log2(math.E) * math.Log2(1/float64(probability))))) numHashes := int(math.Floor(float64((bitSize / int64(capacity))) * math.Log(2))) numBuckets := bitSize / 32 return &BloomFilter{ Capacity: capacity, FalsePositiveRate: probability, NumHashes: numHashes, BitSize: bitSize, numBuckets: numBuckets, state: make([]uint32, uint(numBuckets))} }
// returns the sdcg score as a float64 func SDCG(run Ranking, depth, R uint) float64 { dcg := float64(0) weight := float64(0) for i := uint(0); i < depth; i++ { score := run & (1 << i) if score != 0 { // This is a relevant document dcg += (math.Pow(2, 1) - 1) / (math.Log2(float64(i + 2))) } weight += float64(1) / math.Log2(float64(i+2)) } return dcg / weight }
// New returns a new Histogram instance capable of tracking values in the given // range and with the given amount of precision. func New(minValue, maxValue int64, sigfigs int) *Histogram { if sigfigs < 1 || 5 < sigfigs { panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs)) } largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs) subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution)))) subBucketHalfCountMagnitude := subBucketCountMagnitude if subBucketHalfCountMagnitude < 1 { subBucketHalfCountMagnitude = 1 } subBucketHalfCountMagnitude-- unitMagnitude := int32(math.Floor(math.Log2(float64(minValue)))) if unitMagnitude < 0 { unitMagnitude = 0 } subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1)) subBucketHalfCount := subBucketCount / 2 subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude) // determine exponent range needed to support the trackable value with no // overflow: smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude) bucketsNeeded := int32(1) for smallestUntrackableValue < maxValue { smallestUntrackableValue <<= 1 bucketsNeeded++ } bucketCount := bucketsNeeded countsLen := (bucketCount + 1) * (subBucketCount / 2) return &Histogram{ lowestTrackableValue: minValue, highestTrackableValue: maxValue, unitMagnitude: int64(unitMagnitude), significantFigures: int64(sigfigs), subBucketHalfCountMagnitude: subBucketHalfCountMagnitude, subBucketHalfCount: subBucketHalfCount, subBucketMask: subBucketMask, subBucketCount: subBucketCount, bucketCount: bucketCount, countsLen: countsLen, totalCount: 0, counts: make([]int64, countsLen), } }
// returns the sdcg score as a float64 func sdcg(run Ranking, depth, R uint) float64 { log.Fatal("Unimplemented") dcg := float64(0) weight := float64(0) for i := uint(0); i < depth; i++ { score := (uint(run) >> (i * 2)) & uint(3) if score != 0 { // This is a relevant document dcg += (math.Pow(2, 1) - 1) / (math.Log2(float64(i + 2))) } weight += float64(1) / math.Log2(float64(i+2)) } return dcg / weight }
//权重为等待时间的2为底的对数+人工给定的优先级,最终权重越大越先调度; //相同优先级,微小的等待时间差异能够被反映出来; //优先级差1,等待时间需翻倍,最终权重才能相等 func (this *CrawlTaskSorter) defaultLessBy(t1, t2 *types.CrawlTask) bool { var waitTime int64 = this.Now - t1.LastCrawlTime if waitTime == 0 { waitTime = 1 } var w1 float64 = math.Log2(float64(waitTime)) + float64(t1.Priority) waitTime = this.Now - t2.LastCrawlTime if waitTime == 0 { waitTime = 1 } var w2 float64 = math.Log2(float64(waitTime)) + float64(t2.Priority) return w1 < w2 }
func main() { flag.Parse() file, err := os.Open(*Dictionary) if err != nil { log.Fatal(err) } defer file.Close() scanner := bufio.NewScanner(file) totalWords := 0 words := make([]string, 0) for scanner.Scan() { if WordRegexp.MatchString(scanner.Text()) { words = append(words, scanner.Text()) } totalWords++ } numWords := len(words) numWordsBig := big.NewInt(int64(numWords)) bitsPerWord := math.Log2(float64(numWords)) bitsPerPhrase := bitsPerWord * float64(*WordsPerPhrase) totalBits := bitsPerPhrase - math.Log2(float64(*NumPhrases)) if !*Quiet { fmt.Printf("%d possible words (of %d in %s).\n", numWords, totalWords, *Dictionary) fmt.Printf("%d random words per phrase.\n", *WordsPerPhrase) fmt.Printf("∴ %f bits of entropy per word.\n", bitsPerWord) fmt.Printf("∴ %f bits of entropy per phrase.\n", bitsPerPhrase) fmt.Printf("%d phrases to choose from.\n", *NumPhrases) fmt.Printf("∴ %f bits if you pick one phrase from this list.\n", totalBits) fmt.Println("---------------------------------------------------") } for i := 0; i < *NumPhrases; i++ { phrase := make([]string, 0, *NumPhrases) for j := 0; j < *WordsPerPhrase; j++ { randBig, err := rand.Int(rand.Reader, numWordsBig) if err != nil { log.Fatal(err) } phrase = append(phrase, words[randBig.Int64()]) } fmt.Println(strings.Join(phrase, " ")) } }
// NextPowerOf2 returns the next power of 2 >= x. func NextPowerOf2(x int) int { if IsPowerOf2(x) { return x } return int(math.Pow(2, math.Ceil(math.Log2(float64(x))))) }
func isPowerOf2(n int) bool { t := math.Log2(float64(n)) if t-float64(int(t)) == 0.0 { return true } return false }
// Fetch remote ips from REMOTE_URL. // // syntax: // // apnic|CN|ipv4|1.94.0.0|131072|20100806|allocated // ... func FetchRemoteIps() (ips []Ip) { println("Fetching latest ip data from apnic.net, this may take a few minutes, please wait...") resp, err := http.Get(REMOTE_URL) if err != nil { panic(err) } defer resp.Body.Close() body, err := ioutil.ReadAll(resp.Body) if err != nil { panic(err) } re, _ := regexp.Compile(`apnic\|CN\|ipv4\|([\d\.]+)\|(\d+)\|`) rows := re.FindAllSubmatch(body, -1) for i := 0; i < len(rows); i++ { row := rows[i] ip := string(row[1]) num_ip, _ := strconv.Atoi(string(row[2])) cidr := 32 - int(math.Log2(float64(num_ip))) cidrStr := strconv.Itoa(cidr) ips = append(ips, Ip{ip, cidrStr, cidr2mask(cidr)}) } return ips }
// convert2PhysicalPlan implements LogicalPlan convert2PhysicalPlan interface. func (p *NewSort) convert2PhysicalPlan(prop requiredProperty) (*physicalPlanInfo, *physicalPlanInfo, uint64, error) { var err error sortedPlanInfo, unSortedPlanInfo, count := p.getPlanInfo(prop) if sortedPlanInfo != nil { return sortedPlanInfo, unSortedPlanInfo, count, nil } selfProp := make(requiredProperty, 0, len(p.ByItems)) for _, by := range p.ByItems { if col, ok := by.Expr.(*expression.Column); ok { selfProp = append(selfProp, &columnProp{col: col, desc: by.Desc}) } else { selfProp = nil break } } sortedPlanInfo, unSortedPlanInfo, count, err = p.GetChildByIndex(0).(LogicalPlan).convert2PhysicalPlan(selfProp) if err != nil { return nil, nil, 0, errors.Trace(err) } cnt := float64(count) sortCost := cnt*math.Log2(cnt)*cpuFactor + memoryFactor*cnt if len(selfProp) == 0 { sortedPlanInfo = addPlanToResponse(p, unSortedPlanInfo) } else if sortCost+unSortedPlanInfo.cost < sortedPlanInfo.cost { sortedPlanInfo.cost = sortCost + unSortedPlanInfo.cost sortedPlanInfo = addPlanToResponse(p, unSortedPlanInfo) } if matchProp(prop, selfProp) { return sortedPlanInfo, sortedPlanInfo, count, nil } unSortedPlanInfo = sortedPlanInfo sortedPlanInfo = &physicalPlanInfo{cost: math.MaxFloat64} p.storePlanInfo(prop, sortedPlanInfo, unSortedPlanInfo, count) return sortedPlanInfo, unSortedPlanInfo, count, nil }
func (p *pIterator) next() bool { if !(p.countToIdx < p.h.totalCount) { if p.seenLastValue { return false } p.seenLastValue = true p.percentile = 100 return true } if p.subBucketIdx == -1 && !p.iterator.next() { return false } var done = false for !done { currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount) if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile { p.percentile = p.percentileToIteratorTo halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1)) percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance p.percentileToIteratorTo += 100.0 / percentileReportingTicks return true } done = !p.iterator.next() } return true }
func BenchmarkLog2Float64ToU64(b *testing.B) { for n := 0; n < b.N; n++ { for i := uint64(1); i < 64; i++ { _ = uint64(math.Log2(float64(uint64(1 << i)))) } } }
// Returns the entropy for the given distribution. // The distribution does not have to sum up to 1, for it will be normalized // anyway. func Ent(distribution []float64) float64 { // Sum of the distribution. sum := Sum(distribution) // Go over each bucket. result := 0.0 for _, v := range distribution { // Negative values are not allowed. if v < 0.0 { return math.NaN() } // Ignore zeros. if v == 0.0 { continue } // Probability. p := v / sum // Entropy. result -= p * math.Log2(p) } return result }
// FlagsSlice returns a slice containing each distinct flag that // is currently set and included as a part of the current value // (bitmask) of flags. Results are sorted in an ascending order. // If there is an error, it will be of type *Error. func (mgc *Magic) FlagsSlice() ([]int, error) { mgc.Lock() defer mgc.Unlock() runtime.KeepAlive(mgc.magic) if mgc.cookie == nil { return []int{}, mgc.error() } if mgc.flags == 0 { return []int{0}, nil } var n int var flags []int // Split current value (bitmask) into a list // of distinct flags (bits) currently set. for i := mgc.flags; i > 0; i = i - n { n = int(math.Log2(float64(i))) n = int(math.Pow(2, float64(n))) flags = append(flags, n) } sort.Ints(flags) return flags, nil }
func GrayCode(cnt int) []string { KeyWidth := int(math.Log2(float64(cnt))) // cnt = cnt - 1 // KeyWidtKeyWidthh := 3 // fmt.Printf("\nKey %v \n", KeyWidth) i := big.NewInt(0) i2 := big.NewInt(0) one := big.NewInt(1) k := big.NewInt(0) keys := make([]string, cnt) var indx int = 0 for j := 0; j < cnt/2; j++ { k.Xor(i, i2) keys[indx] = fmt.Sprintf("%0*b", KeyWidth, k) // fmt.Printf("LENG %s - %d : %v", keys[indx], len(keys[indx]), strings.TrimSpace(keys[indx])) indx++ i.Add(i, one) k.Xor(i, i2) keys[indx] = fmt.Sprintf("%0*b", KeyWidth, k) // fmt.Printf("LENG %s %d %v", keys[indx], len(keys[indx]), strings.TrimSpace(keys[indx])) indx++ // fmt.Printf("%0*b\n", KeyWidth, k) i.Add(i, one) i2.Add(i2, one) } return keys }