func TestRandomDatasetHasExpectedStatistics(t *testing.T) {
	tests := []struct {
		numSamples  int
		probability float64
	}{
		{100000, 0.02},
		{100000, 0.5},
		{100000, 0.9},
	}

	for _, tt := range tests {
		d := randomDataset(tt.numSamples, tt.probability)
		t.Log()
		t.Log(d.String())
		if !near(d.Calibration(), 1.0) {
			t.Errorf("Calibration: expected %v, had %v", 1.0, d.Calibration())
		}

		expectedLogScore :=
			tt.probability*math.Log2(tt.probability) +
				(1-tt.probability)*math.Log2(1-tt.probability)
		if !near(d.LogScore(), expectedLogScore) {
			t.Errorf("Logscore: expected %v, had %v", expectedLogScore, d.LogScore())
		}

		if !near(d.NormalizedEntropy(), 1.0) {
			t.Errorf("Entropy: expected %v, had %v", 1.0, d.NormalizedEntropy())
		}

		if !near(d.ROC(), 0.5) {
			t.Errorf("ROC: expected %v, had %v", 0.5, d.ROC())
		}
	}
}
Beispiel #2
0
// Calculates the entropy of each symbol, based on the counts of each symbol. The
// result is similar to the result of CalculateBitLengths, but with the
// actual theoritical bit lengths according to the entropy. Since the resulting
// values are fractional, they cannot be used to encode the tree specified by
// DEFLATE.
func CalculateEntropy(count []float64) (bitLengths []float64) {
	var sum, log2sum float64
	n := len(count)
	for i := 0; i < n; i++ {
		sum += count[i]
	}
	if sum == 0 {
		log2sum = math.Log2(float64(n))
	} else {
		log2sum = math.Log2(sum)
	}
	bitLengths = make([]float64, n)
	for i := 0; i < n; i++ {
		// When the count of the symbol is 0, but its cost is requested anyway, it
		// means the symbol will appear at least once anyway, so give it the cost as if
		// its count is 1.
		if count[i] == 0 {
			bitLengths[i] = log2sum
		} else {
			bitLengths[i] = math.Log2(sum / count[i])
		}
		if !(bitLengths[i] >= 0) {
			panic("bit length is not positive")
		}
	}
	return bitLengths
}
Beispiel #3
0
func Js(words_left map[string]int, words_right map[string]int) float64 {
	len_left, len_right := 0, 0

	for _, val := range words_left {
		len_left += val
	}

	for _, val := range words_right {
		len_right += val
	}

	dist := 0.0
	for key, val := range words_left {
		p := float64(val) / float64(len_left)
		q := 0.0
		if len_right > 0 {
			q = float64(words_right[key]) / float64(len_right)
		}
		dist += p * math.Log2(2*p/(p+q))
	}

	for key, val := range words_right {
		p := float64(val) / float64(len_right)
		q := 0.0
		if len_left > 0 {
			q = float64(words_left[key]) / float64(len_left)
		}
		dist += p * math.Log2(2*p/(p+q))
	}

	return dist
}
/**
  Used to build a rank directory from the given input string.

  @param data A javascript string containing the data, as readable using the
  BitString object.

  @param numBits The number of bits to index.

  @param l1Size The number of bits that each entry in the Level 1 table
  summarizes. This should be a multiple of l2Size.

  @param l2Size The number of bits that each entry in the Level 2 table
  summarizes.
*/
func CreateRankDirectory(data string, numBits, l1Size, l2Size uint) RankDirectory {
	bits := BitString{}
	bits.Init(data)
	var p, i uint = 0, 0
	var count1, count2 uint = 0, 0
	l1bits := uint(math.Ceil(math.Log2(float64(numBits))))
	l2bits := uint(math.Ceil(math.Log2(float64(l1Size))))

	directory := BitWriter{}

	for p+l2Size <= numBits {
		count2 += bits.Count(p, l2Size)
		i += l2Size
		p += l2Size
		if i == l1Size {
			count1 += count2
			directory.Write(count1, l1bits)
			count2 = 0
			i = 0
		} else {
			directory.Write(count2, l2bits)
		}
	}

	rd := RankDirectory{}
	rd.Init(directory.GetData(), data, numBits, l1Size, l2Size)
	return rd
}
Beispiel #5
0
Datei: ac.go Projekt: postfix/ctw
// expTables prepares the exp-tables described in section 6.4 of the EIDMA report by F.M.J. Willems and Tj. J. Tjalkens.
// Complexity Reduction of the Context-Tree Weighting Algorithm: A Study for KPN Research, Technical University of Eindhoven, EIDMA Report RS.97.01
func expTables() ([]uint64, []uint64) {
	var pow2f float64 = 1 << f
	A := make([]uint64, int(pow2f)+1)
	for i := 1; i <= int(pow2f); i++ {
		A[i] = uint64(pow2f*math.Exp2(-float64(i)/pow2f) + 0.5)
	}

	// B entries for (1<<(f-1)), (1<<f)-1
	B := make([]uint64, int(pow2f))
	for j := 1 << (f - 1); j <= (1<<f)-1; j++ {
		B[j] = uint64(-pow2f*math.Log2(float64(j)/pow2f) + 0.5)
	}

	// B entries for 1,(1<<(f-1))-1
	for j := 1; j < (1 << (f - 1)); j++ {
		k := math.Ceil(float64(f) - 1 - math.Log2(float64(j)))
		b2kj := B[int(math.Exp2(k))*j]
		if b2kj == 0 {
			panic("")
		}
		B[j] = b2kj + uint64(k*pow2f)
	}

	return A, B
}
Beispiel #6
0
func (corpus *Corpus) MutualInformation(seq []int) (I float64) {
	// Returns the mutual information, in bits, conveyed by the items in a sequence.
	I = math.Log2(corpus.Probability(seq))
	for i := 0; i < len(seq); i++ {
		I -= math.Log2(corpus.Probability(seq[i : i+1]))
	}
	return I
}
Beispiel #7
0
func solve(n float64) int {
	for digits := int(math.Log10(n)) + 2; ; digits++ {
		low := int(math.Log2(n) + float64(digits)*log2_10)
		high := int(math.Log2(n+1) + float64(digits)*log2_10)
		if low+1 == high {
			return high
		}
	}
}
Beispiel #8
0
// InitOneLayer
func (qt *QuadTree) InitOneLayer(xmin, xmax, ymin, ymax, z int64, resx, resy, resz float64) *QuadTree {

	qt = new(QuadTree)

	dimx := xmax - xmin + 1
	dimy := ymax - ymin + 1

	var depthx, depthy int

	if dimx > qtW {
		depthx = int(math.Log2(float64(dimx))+0.5) - int(math.Log2(float64(qtW))) + 1
	} else {
		depthx = 1
	}

	if dimy > qtH {
		depthy = int(math.Log2(float64(dimy))+0.5) - int(math.Log2(float64(qtH))) + 1
	} else {
		depthy = 1
	}

	depth := int(math.Max(float64(depthx), float64(depthy)))

	tasks := qt.TaskLoad(depth)

	fmt.Printf("The depth of this quadtree is %v with %v tasks assigned\n", depth, tasks)

	for i := 1; i < depth; i++ {
		resx *= 2.0
		resy *= 2.0
	}

	ch := make(chan bool)
	wg.Add(1)
	go qt.Construct(nil, 0, depth, -1, xmin, ymin, z, xmax, ymax, resx, resy, resz, 0, 0, 0, qtW, qtH, 1, ch, &wg)
	//<-ch

	go func() {
		wg.Wait()
		//close(ch)
		<-ch
	}()

	//close(ch)

	//	for i := range ch {
	//		fmt.Println("~~~channels ",i)
	//	}

	//wg.Wait()

	fmt.Printf("~~~current tile %v children %v %v %v %v %v %v\n", qt, qt.TL, qt.TR, qt.BL, qt.BR, resx, resy)

	qt.TraverseTree()

	return qt
}
Beispiel #9
0
// pushPushScale is used to scale the time interval at which push/pull
// syncs take place. It is used to prevent network saturation as the
// cluster size grows
func pushPullScale(interval time.Duration, n int) time.Duration {
	// Don't scale until we cross the threshold
	if n <= pushPullScaleThreshold {
		return interval
	}

	multiplier := math.Ceil(math.Log2(float64(n))-math.Log2(pushPullScaleThreshold)) + 1.0
	return time.Duration(multiplier) * interval
}
func init() {
	flag.BoolVar(&sequenceMode, "s", false, "sequence mode")
	lg3 = math.Log2(3)
	lg5 = math.Log2(5)
	front = [3]cursor{
		{0, 0, 1},   // 2
		{1, 0, lg3}, // 3
		{2, 0, lg5}, // 5
	}
}
func (rd *RankDirectory) Init(directoryData, bitData string, numBits, l1Size, l2Size uint) {
	rd.directory.Init(directoryData)
	rd.data.Init(bitData)
	rd.l1Size = l1Size
	rd.l2Size = l2Size
	rd.l1Bits = uint(math.Ceil(math.Log2(float64(numBits))))
	rd.l2Bits = uint(math.Ceil(math.Log2(float64(l1Size))))
	rd.sectionBits = (l1Size/l2Size-1)*rd.l2Bits + rd.l1Bits
	rd.numBits = numBits
}
func (l labelledPredictions) NormalizedEntropy() float64 {
	numPositives := 0
	for _, e := range l {
		if e.Label {
			numPositives += 1
		}
	}
	p := float64(numPositives) / float64(l.Len())
	return l.LogScore() / (p*math.Log2(p) + (1-p)*math.Log2(1-p))
}
Beispiel #13
0
func human_scale(value float64, base float64, unit string) string {
	exp := []string{"y", "z", "a", "f", "p", "n", "µ", "m", "", "k", "M", "G", "T", "P", "E", "Z", "Y"}
	s := math.Floor(math.Log2(value) / math.Log2(base))
	h_v := value / math.Pow(base, s)

	if s > -9 && s < 9 {
		return strconv.FormatFloat(h_v, 'f', 2, 64) + " " + exp[int(s)+8] + unit
	}

	return strconv.FormatFloat(value, 'E', 6, 64) + " " + unit
}
func (l labelledPredictions) LogScore() float64 {
	cumulativeLogLoss := 0.0
	for _, e := range l {
		if e.Label {
			cumulativeLogLoss += math.Log2(e.Prediction)
		} else {
			cumulativeLogLoss += math.Log2(1 - e.Prediction)
		}
	}
	return cumulativeLogLoss / float64(l.Len())
}
// From http://rosettacode.org/wiki/Entropy#Go
func entropy(s string) float64 {
	m := map[rune]float64{}
	for _, r := range s {
		m[r]++
	}
	hm := 0.
	for _, c := range m {
		hm += c * math.Log2(c)
	}
	l := float64(len(s))
	return math.Log2(l) - hm/l
}
Beispiel #16
0
// Returns a new Bloom filter. Parameters are the expected number of elements
// in the set and the desired false positive probability. Optimal size and
// number of hashes are calculated based on these numbers.
//
// p = false positive rate of the form 1/p, powers of two preferred
// optimal number of hashes k = (m/n)ln(2)
func NewBloomFilter(capacity, probability int) *BloomFilter {
	bitSize := int64(math.Abs(math.Ceil(float64(capacity) *
		math.Log2(math.E) * math.Log2(1/float64(probability)))))
	numHashes := int(math.Floor(float64((bitSize / int64(capacity))) * math.Log(2)))
	numBuckets := bitSize / 32
	return &BloomFilter{
		Capacity:          capacity,
		FalsePositiveRate: probability,
		NumHashes:         numHashes,
		BitSize:           bitSize,
		numBuckets:        numBuckets,
		state:             make([]uint32, uint(numBuckets))}
}
// returns the sdcg score as a float64
func SDCG(run Ranking, depth, R uint) float64 {
	dcg := float64(0)
	weight := float64(0)
	for i := uint(0); i < depth; i++ {
		score := run & (1 << i)
		if score != 0 {
			// This is a relevant document
			dcg += (math.Pow(2, 1) - 1) / (math.Log2(float64(i + 2)))
		}
		weight += float64(1) / math.Log2(float64(i+2))
	}
	return dcg / weight
}
Beispiel #18
0
// New returns a new Histogram instance capable of tracking values in the given
// range and with the given amount of precision.
func New(minValue, maxValue int64, sigfigs int) *Histogram {
	if sigfigs < 1 || 5 < sigfigs {
		panic(fmt.Errorf("sigfigs must be [1,5] (was %d)", sigfigs))
	}

	largestValueWithSingleUnitResolution := 2 * math.Pow10(sigfigs)
	subBucketCountMagnitude := int32(math.Ceil(math.Log2(float64(largestValueWithSingleUnitResolution))))

	subBucketHalfCountMagnitude := subBucketCountMagnitude
	if subBucketHalfCountMagnitude < 1 {
		subBucketHalfCountMagnitude = 1
	}
	subBucketHalfCountMagnitude--

	unitMagnitude := int32(math.Floor(math.Log2(float64(minValue))))
	if unitMagnitude < 0 {
		unitMagnitude = 0
	}

	subBucketCount := int32(math.Pow(2, float64(subBucketHalfCountMagnitude)+1))

	subBucketHalfCount := subBucketCount / 2
	subBucketMask := int64(subBucketCount-1) << uint(unitMagnitude)

	// determine exponent range needed to support the trackable value with no
	// overflow:
	smallestUntrackableValue := int64(subBucketCount) << uint(unitMagnitude)
	bucketsNeeded := int32(1)
	for smallestUntrackableValue < maxValue {
		smallestUntrackableValue <<= 1
		bucketsNeeded++
	}

	bucketCount := bucketsNeeded
	countsLen := (bucketCount + 1) * (subBucketCount / 2)

	return &Histogram{
		lowestTrackableValue:        minValue,
		highestTrackableValue:       maxValue,
		unitMagnitude:               int64(unitMagnitude),
		significantFigures:          int64(sigfigs),
		subBucketHalfCountMagnitude: subBucketHalfCountMagnitude,
		subBucketHalfCount:          subBucketHalfCount,
		subBucketMask:               subBucketMask,
		subBucketCount:              subBucketCount,
		bucketCount:                 bucketCount,
		countsLen:                   countsLen,
		totalCount:                  0,
		counts:                      make([]int64, countsLen),
	}
}
// returns the sdcg score as a float64
func sdcg(run Ranking, depth, R uint) float64 {
	log.Fatal("Unimplemented")
	dcg := float64(0)
	weight := float64(0)
	for i := uint(0); i < depth; i++ {
		score := (uint(run) >> (i * 2)) & uint(3)
		if score != 0 {
			// This is a relevant document
			dcg += (math.Pow(2, 1) - 1) / (math.Log2(float64(i + 2)))
		}
		weight += float64(1) / math.Log2(float64(i+2))
	}
	return dcg / weight
}
Beispiel #20
0
//权重为等待时间的2为底的对数+人工给定的优先级,最终权重越大越先调度;
//相同优先级,微小的等待时间差异能够被反映出来;
//优先级差1,等待时间需翻倍,最终权重才能相等
func (this *CrawlTaskSorter) defaultLessBy(t1, t2 *types.CrawlTask) bool {
	var waitTime int64 = this.Now - t1.LastCrawlTime
	if waitTime == 0 {
		waitTime = 1
	}
	var w1 float64 = math.Log2(float64(waitTime)) + float64(t1.Priority)

	waitTime = this.Now - t2.LastCrawlTime
	if waitTime == 0 {
		waitTime = 1
	}
	var w2 float64 = math.Log2(float64(waitTime)) + float64(t2.Priority)

	return w1 < w2
}
Beispiel #21
0
func main() {
	flag.Parse()

	file, err := os.Open(*Dictionary)
	if err != nil {
		log.Fatal(err)
	}
	defer file.Close()

	scanner := bufio.NewScanner(file)

	totalWords := 0
	words := make([]string, 0)
	for scanner.Scan() {
		if WordRegexp.MatchString(scanner.Text()) {
			words = append(words, scanner.Text())
		}
		totalWords++
	}

	numWords := len(words)
	numWordsBig := big.NewInt(int64(numWords))
	bitsPerWord := math.Log2(float64(numWords))
	bitsPerPhrase := bitsPerWord * float64(*WordsPerPhrase)
	totalBits := bitsPerPhrase - math.Log2(float64(*NumPhrases))

	if !*Quiet {
		fmt.Printf("%d possible words (of %d in %s).\n", numWords, totalWords, *Dictionary)
		fmt.Printf("%d random words per phrase.\n", *WordsPerPhrase)
		fmt.Printf("∴ %f bits of entropy per word.\n", bitsPerWord)
		fmt.Printf("∴ %f bits of entropy per phrase.\n", bitsPerPhrase)
		fmt.Printf("%d phrases to choose from.\n", *NumPhrases)
		fmt.Printf("∴ %f bits if you pick one phrase from this list.\n", totalBits)
		fmt.Println("---------------------------------------------------")
	}

	for i := 0; i < *NumPhrases; i++ {
		phrase := make([]string, 0, *NumPhrases)
		for j := 0; j < *WordsPerPhrase; j++ {
			randBig, err := rand.Int(rand.Reader, numWordsBig)
			if err != nil {
				log.Fatal(err)
			}
			phrase = append(phrase, words[randBig.Int64()])
		}
		fmt.Println(strings.Join(phrase, " "))
	}
}
Beispiel #22
0
// NextPowerOf2 returns the next power of 2 >= x.
func NextPowerOf2(x int) int {
	if IsPowerOf2(x) {
		return x
	}

	return int(math.Pow(2, math.Ceil(math.Log2(float64(x)))))
}
func isPowerOf2(n int) bool {
	t := math.Log2(float64(n))
	if t-float64(int(t)) == 0.0 {
		return true
	}
	return false
}
Beispiel #24
0
// Fetch remote ips from REMOTE_URL.
//
// syntax:
//
//   apnic|CN|ipv4|1.94.0.0|131072|20100806|allocated
//   ...
func FetchRemoteIps() (ips []Ip) {
	println("Fetching latest ip data from apnic.net, this may take a few minutes, please wait...")

	resp, err := http.Get(REMOTE_URL)
	if err != nil {
		panic(err)
	}

	defer resp.Body.Close()
	body, err := ioutil.ReadAll(resp.Body)
	if err != nil {
		panic(err)
	}

	re, _ := regexp.Compile(`apnic\|CN\|ipv4\|([\d\.]+)\|(\d+)\|`)
	rows := re.FindAllSubmatch(body, -1)
	for i := 0; i < len(rows); i++ {
		row := rows[i]
		ip := string(row[1])
		num_ip, _ := strconv.Atoi(string(row[2]))
		cidr := 32 - int(math.Log2(float64(num_ip)))
		cidrStr := strconv.Itoa(cidr)

		ips = append(ips, Ip{ip, cidrStr, cidr2mask(cidr)})
	}

	return ips
}
Beispiel #25
0
// convert2PhysicalPlan implements LogicalPlan convert2PhysicalPlan interface.
func (p *NewSort) convert2PhysicalPlan(prop requiredProperty) (*physicalPlanInfo, *physicalPlanInfo, uint64, error) {
	var err error
	sortedPlanInfo, unSortedPlanInfo, count := p.getPlanInfo(prop)
	if sortedPlanInfo != nil {
		return sortedPlanInfo, unSortedPlanInfo, count, nil
	}
	selfProp := make(requiredProperty, 0, len(p.ByItems))
	for _, by := range p.ByItems {
		if col, ok := by.Expr.(*expression.Column); ok {
			selfProp = append(selfProp, &columnProp{col: col, desc: by.Desc})
		} else {
			selfProp = nil
			break
		}
	}
	sortedPlanInfo, unSortedPlanInfo, count, err = p.GetChildByIndex(0).(LogicalPlan).convert2PhysicalPlan(selfProp)
	if err != nil {
		return nil, nil, 0, errors.Trace(err)
	}
	cnt := float64(count)
	sortCost := cnt*math.Log2(cnt)*cpuFactor + memoryFactor*cnt
	if len(selfProp) == 0 {
		sortedPlanInfo = addPlanToResponse(p, unSortedPlanInfo)
	} else if sortCost+unSortedPlanInfo.cost < sortedPlanInfo.cost {
		sortedPlanInfo.cost = sortCost + unSortedPlanInfo.cost
		sortedPlanInfo = addPlanToResponse(p, unSortedPlanInfo)
	}
	if matchProp(prop, selfProp) {
		return sortedPlanInfo, sortedPlanInfo, count, nil
	}
	unSortedPlanInfo = sortedPlanInfo
	sortedPlanInfo = &physicalPlanInfo{cost: math.MaxFloat64}
	p.storePlanInfo(prop, sortedPlanInfo, unSortedPlanInfo, count)
	return sortedPlanInfo, unSortedPlanInfo, count, nil
}
Beispiel #26
0
func (p *pIterator) next() bool {
	if !(p.countToIdx < p.h.totalCount) {
		if p.seenLastValue {
			return false
		}

		p.seenLastValue = true
		p.percentile = 100

		return true
	}

	if p.subBucketIdx == -1 && !p.iterator.next() {
		return false
	}

	var done = false
	for !done {
		currentPercentile := (100.0 * float64(p.countToIdx)) / float64(p.h.totalCount)
		if p.countAtIdx != 0 && p.percentileToIteratorTo <= currentPercentile {
			p.percentile = p.percentileToIteratorTo
			halfDistance := math.Trunc(math.Pow(2, math.Trunc(math.Log2(100.0/(100.0-p.percentileToIteratorTo)))+1))
			percentileReportingTicks := float64(p.ticksPerHalfDistance) * halfDistance
			p.percentileToIteratorTo += 100.0 / percentileReportingTicks
			return true
		}
		done = !p.iterator.next()
	}

	return true
}
Beispiel #27
0
func BenchmarkLog2Float64ToU64(b *testing.B) {
	for n := 0; n < b.N; n++ {
		for i := uint64(1); i < 64; i++ {
			_ = uint64(math.Log2(float64(uint64(1 << i))))
		}
	}
}
Beispiel #28
0
// Returns the entropy for the given distribution.
// The distribution does not have to sum up to 1, for it will be normalized
// anyway.
func Ent(distribution []float64) float64 {
	// Sum of the distribution.
	sum := Sum(distribution)

	// Go over each bucket.
	result := 0.0
	for _, v := range distribution {
		// Negative values are not allowed.
		if v < 0.0 {
			return math.NaN()
		}

		// Ignore zeros.
		if v == 0.0 {
			continue
		}

		// Probability.
		p := v / sum

		// Entropy.
		result -= p * math.Log2(p)
	}

	return result
}
Beispiel #29
0
// FlagsSlice returns a slice containing each distinct flag that
// is currently set and included as a part of the current value
// (bitmask) of flags.  Results are sorted in an ascending order.
// If there is an error, it will be of type *Error.
func (mgc *Magic) FlagsSlice() ([]int, error) {
	mgc.Lock()
	defer mgc.Unlock()
	runtime.KeepAlive(mgc.magic)

	if mgc.cookie == nil {
		return []int{}, mgc.error()
	}

	if mgc.flags == 0 {
		return []int{0}, nil
	}

	var n int
	var flags []int

	// Split current value (bitmask) into a list
	// of distinct flags (bits) currently set.
	for i := mgc.flags; i > 0; i = i - n {
		n = int(math.Log2(float64(i)))
		n = int(math.Pow(2, float64(n)))
		flags = append(flags, n)
	}
	sort.Ints(flags)
	return flags, nil
}
Beispiel #30
0
func GrayCode(cnt int) []string {

	KeyWidth := int(math.Log2(float64(cnt)))
	// cnt = cnt - 1

	// KeyWidtKeyWidthh := 3

	// fmt.Printf("\nKey %v \n", KeyWidth)
	i := big.NewInt(0)
	i2 := big.NewInt(0)
	one := big.NewInt(1)
	k := big.NewInt(0)

	keys := make([]string, cnt)
	var indx int = 0
	for j := 0; j < cnt/2; j++ {
		k.Xor(i, i2)
		keys[indx] = fmt.Sprintf("%0*b", KeyWidth, k)
		// fmt.Printf("LENG %s - %d : %v", keys[indx], len(keys[indx]), strings.TrimSpace(keys[indx]))

		indx++
		i.Add(i, one)

		k.Xor(i, i2)
		keys[indx] = fmt.Sprintf("%0*b", KeyWidth, k)
		// fmt.Printf("LENG %s %d %v", keys[indx], len(keys[indx]), strings.TrimSpace(keys[indx]))
		indx++
		// fmt.Printf("%0*b\n", KeyWidth, k)
		i.Add(i, one)
		i2.Add(i2, one)
	}

	return keys
}