예제 #1
0
func (hg *HostGraph) addHost(name string, info *SksNode) {
	if _, ok := hg.outbound[name]; !ok {
		hg.outbound[name] = btree.NewTree(btreeStringLess)
	}
	if _, ok := hg.inbound[name]; !ok {
		hg.inbound[name] = btree.NewTree(btreeStringLess)
	}
	for _, peerAsGiven := range info.GossipPeerList {
		var peerCanonical string
		if canon, ok := hg.aliases[strings.ToLower(peerAsGiven)]; ok {
			peerCanonical = canon
		} else {
			lowered := strings.ToLower(peerAsGiven)
			peerCanonical = lowered
			// peer is down, have no node, but still have outbound link:
			hg.aliases[lowered] = lowered
			if peerAsGiven != lowered {
				hg.aliases[peerAsGiven] = lowered
			}
		}
		hg.outbound[name].Insert(peerCanonical)
		if _, ok := hg.inbound[peerCanonical]; !ok {
			hg.inbound[peerCanonical] = btree.NewTree(btreeStringLess)
		}
		hg.inbound[peerCanonical].Insert(name)
	}
}
예제 #2
0
func NewCountrySet(s string) *CountrySet {
	cs := &CountrySet{ss: btree.NewTree(btreeStringLess)}
	for _, country := range strings.Split(s, ",") {
		cs.ss.Insert(strings.ToUpper(country))
	}
	return cs
}
예제 #3
0
// inbounds can exist where there's no outbound because servers are down and we just have links to them
// I don't want to deal with nil's elsewhere
func (hg *HostGraph) fixOutbounds() {
	for k := range hg.inbound {
		for hn := range hg.inbound[k].Data() {
			if _, ok := hg.outbound[hn]; !ok {
				hg.outbound[hn] = btree.NewTree(btreeStringLess)
			}
		}
	}
}
예제 #4
0
func (hg *HostGraph) AllPeersOf(name string) []string {
	canonName, ok := hg.aliases[strings.ToLower(name)]
	if !ok {
		return []string{}
	}
	allPeers := btree.NewTree(btreeHostLess)
	for out := range hg.outbound[canonName].Data() {
		allPeers.Insert(out)
	}
	for in := range hg.inbound[canonName].Data() {
		allPeers.Insert(in)
	}
	sortedList := make([]string, allPeers.Len())
	i := 0
	for peer := range allPeers.Data() {
		sortedList[i] = peer
		i++
	}
	return sortedList
}
예제 #5
0
func apiIpValidPage(w http.ResponseWriter, req *http.Request) {
	var err error
	if err = req.ParseForm(); err != nil {
		http.Error(w, "Failed to parse form information", http.StatusBadRequest)
		return
	}
	var (
		showStats        bool
		emitJson         bool
		limitToProxies   bool
		limitToCountries *CountrySet
	)
	if _, ok := req.Form["stats"]; ok {
		showStats = true
	}
	if _, ok := req.Form["json"]; ok {
		emitJson = true
	}
	if _, ok := req.Form["proxies"]; ok {
		limitToProxies = true
	}
	if _, ok := req.Form["countries"]; ok {
		limitToCountries = NewCountrySet(req.Form.Get("countries"))
	}

	statsList := make([]string, 0, 100)
	Statsf := func(s string, v ...interface{}) {
		statsList = append(statsList, fmt.Sprintf(s, v...))
	}

	var (
		abortMessage func(string)
		doShowStats  func()
		contentType  string
	)

	if emitJson {
		contentType = ContentTypeJson
		if _, ok := req.Form["textplain"]; ok {
			contentType = ContentTypeTextPlain
		}
		doShowStats = func() {
			b, err := json.Marshal(statsList)
			if err != nil {
				Log.Printf("Unable to JSON marshal stats: %s", err)
				return
			}
			fmt.Fprintf(w, "\"stats\": %s\n", b)
		}
		abortMessage = func(s string) {
			fmt.Fprintf(w, "{\n")
			if showStats {
				doShowStats()
				fmt.Fprintf(w, ", ")
			}
			fmt.Fprintf(w, `"status": { "status": "INVALID", "count": 0, "reason": "%s" }`, s)
			fmt.Fprintf(w, "\n}\n")
		}
	} else {
		contentType = ContentTypeTextPlain
		doShowStats = func() {
			for _, l := range statsList {
				fmt.Fprintf(w, "STATS: %s\n", l)
			}
		}
		abortMessage = func(s string) {
			if showStats {
				doShowStats()
			}
			fmt.Fprintf(w, "IP-Gen/1.1: status=INVALID count=0 reason=%s\n.\n", s)
		}
	}
	w.Header().Set("Content-Type", contentType)

	persisted := GetCurrentPersisted()
	if persisted == nil {
		abortMessage("first_scan")
		return
	}

	var minimumVersion *SksVersion = nil
	mvReq := req.Form.Get("minimum_version")
	if mvReq != "" {
		tmp := NewSksVersion(mvReq)
		minimumVersion = tmp
	}

	var (
		// for stats, we avoid double-weighting dual-stack boxes by working with
		// just one IP per box, but then later deal with all the IPs for filtering.
		ips_one_per_server = make(map[string]int, len(persisted.HostMap)*2)
		ips_all            = make(map[string]int, len(persisted.HostMap)*2)
	)

	var (
		count_servers_1010            int
		count_servers_too_old         int
		count_servers_unwanted_server int
		count_servers_wrong_country   int
		ips_skip_1010                 btree.SortedSet = btree.NewTree(btreeStringLess)
		ips_too_old                   btree.SortedSet = btree.NewTree(btreeStringLess)
		ips_unwanted_server           btree.SortedSet = btree.NewTree(btreeStringLess)
		ips_wrong_country             btree.SortedSet = btree.NewTree(btreeStringLess)
	)

	for _, name := range persisted.Sorted {
		node := persisted.HostMap[name]
		var (
			skip_this_1010     = false
			skip_this_age      = false
			skip_this_nonproxy = false
			skip_this_country  = false
		)
		if node.Keycount <= 1 {
			Statsf("dropping server <%s> with %d keys", name, node.Keycount)
			continue
		}

		if string(node.Version) == "1.0.10" {
			skip_this_1010 = true
			//ips_skip_1010.Insert(name) // nope, IPs
			count_servers_1010 += 1
		}

		if minimumVersion != nil {
			thisVersion := NewSksVersion(node.Version)
			if thisVersion == nil || !thisVersion.IsAtLeast(minimumVersion) {
				skip_this_age = true
				count_servers_too_old += 1
			}
		}

		if limitToProxies && node.ViaHeader == "" {
			server := strings.ToLower(strings.SplitN(node.ServerHeader, "/", 2)[0])
			if _, ok := serverHeadersNative[server]; ok {
				skip_this_nonproxy = true
				count_servers_unwanted_server += 1
			}
		}

		if limitToCountries != nil {
			var keep bool
			for _, ip := range node.IpList {
				geo, ok := persisted.IPCountryMap[ip]
				if ok && limitToCountries.HasCountry(geo) {
					keep = true
				}
			}
			if !keep {
				skip_this_country = true
				count_servers_wrong_country += 1
			}
		}

		if len(node.IpList) > 0 {
			ips_one_per_server[node.IpList[0]] = node.Keycount
			for _, ip := range node.IpList {
				ips_all[ip] = node.Keycount
				if skip_this_1010 {
					ips_skip_1010.Insert(ip)
				}
				if skip_this_age {
					ips_too_old.Insert(ip)
				}
				if skip_this_nonproxy {
					ips_unwanted_server.Insert(ip)
				}
				if skip_this_country {
					ips_wrong_country.Insert(ip)
				}
			}
		}

	}

	// We want to discard statistic-distorting outliers, then of what remains,
	// discard those too far away from "normal", but we really want the "best"
	// servers to be our guide, so 1 std-dev of the second-highest remaining
	// value should be safe; in fact, we'll hardcode a limit of how far below.
	// To discard, find mode size (knowing that value can be split across two
	// buckets) and discard more than five stddevs from mode.  The bucketing
	// should be larger than the distance from desired value so that the mode
	// is only split across two buckets, if we assume enough servers that a
	// small number will be down, most will be valid-if-large-enough, so that
	// splitting the count across two buckets won't let the third-best value win

	// This is barely-modified from Python, just enough to translate language, not idioms
	// This was ... "much easier" with list comprehensions in Python
	var buckets = make(map[int][]int, 40)
	for _, count := range ips_one_per_server {
		bucket := int(count / kBUCKET_SIZE)
		if _, ok := buckets[bucket]; !ok {
			buckets[bucket] = make([]int, 0, 20)
		}
		buckets[bucket] = append(buckets[bucket], count)
	}
	if len(buckets) == 0 {
		abortMessage("broken_no_buckets")
		return
	}

	var largest_bucket int
	var largest_bucket_len int
	for k := range buckets {
		if len(buckets[k]) > largest_bucket_len {
			largest_bucket = k
			largest_bucket_len = len(buckets[k])
		}
	}
	first_n := len(buckets[largest_bucket])
	var first_sum int
	for _, v := range buckets[largest_bucket] {
		first_sum += v
	}
	first_mean := float64(first_sum) / float64(first_n)
	var first_sd float64
	for _, v := range buckets[largest_bucket] {
		d := float64(v) - first_mean
		first_sd += d * d
	}
	first_sd = math.Sqrt(first_sd / float64(first_n))
	first_bounds_min := int(first_mean - 5*first_sd)
	first_bounds_max := int(first_mean + 5*first_sd)

	first_ips_list := make([]string, 0, len(ips_one_per_server))
	for ip := range ips_one_per_server {
		if first_bounds_min <= ips_all[ip] && ips_all[ip] <= first_bounds_max {
			first_ips_list = append(first_ips_list, ip)
		}
	}
	first_ips_alllist := make([]string, 0, len(ips_all))
	for ip := range ips_all {
		if first_bounds_min <= ips_all[ip] && ips_all[ip] <= first_bounds_max {
			first_ips_alllist = append(first_ips_alllist, ip)
		}
	}
	var second_mean, second_sd float64
	first_ips := make(map[string]int, len(first_ips_list))
	for _, ip := range first_ips_list {
		first_ips[ip] = ips_all[ip]
		second_mean += float64(ips_all[ip])
	}
	first_ips_all := make(map[string]int, len(first_ips_alllist))
	for _, ip := range first_ips_alllist {
		first_ips_all[ip] = ips_all[ip]
	}
	second_mean /= float64(len(first_ips_list))
	for _, v := range first_ips {
		d := float64(v) - second_mean
		second_sd += d * d
	}
	second_sd = math.Sqrt(second_sd / float64(len(first_ips_list)))

	if showStats {
		Statsf("have %d servers in %d buckets (%d ips total)", len(ips_one_per_server), len(buckets), len(ips_all))
		bucket_sizes := make([]int, 0, len(buckets))
		for k := range buckets {
			bucket_sizes = append(bucket_sizes, k)
		}
		sort.Ints(bucket_sizes)
		for _, b := range bucket_sizes {
			Statsf("%6d: %s", b, strings.Repeat("*", len(buckets[b])))
		}
		Statsf("largest bucket is %d with %d entries", largest_bucket, first_n)
		Statsf("bucket size %d means bucket %d is [%d, %d)", kBUCKET_SIZE, largest_bucket,
			kBUCKET_SIZE*largest_bucket, kBUCKET_SIZE*(largest_bucket+1))
		Statsf("largest bucket: mean=%f sd=%f", first_mean, first_sd)
		Statsf("first bounds: [%d, %d]", first_bounds_min, first_bounds_max)
		Statsf("have %d servers within bounds, mean value %f sd=%f", len(first_ips_list), second_mean, second_sd)
	}

	if second_mean < float64(*flKeysSanityMin) {
		Statsf("mean %f < %d", second_mean, *flKeysSanityMin)
		abortMessage("broken_data")
		return
	}
	threshold_base_index := len(first_ips) - 2
	if threshold_base_index < 0 {
		threshold_base_index = 0
	}
	threshold_candidates := make([]int, 0, len(first_ips))
	for _, count := range first_ips {
		threshold_candidates = append(threshold_candidates, count)
	}
	sort.Ints(threshold_candidates)
	var threshold int = threshold_candidates[threshold_base_index] - (*flKeysDailyJitter + int(second_sd))

	if showStats {
		Statsf("Second largest count within bounds: %d", threshold_candidates[threshold_base_index])
		Statsf("threshold: %d", threshold)
	}

	if nt, ok := req.Form["threshold"]; ok {
		i, ok2 := strconv.Atoi(nt[0])
		if ok2 == nil && i > 0 {
			Statsf("Overriding threshold from CGI parameter; %d -> %d", threshold, i)
			threshold = i
		}
	}

	ips := make([]string, 0, len(first_ips_all))
	for ip, count := range first_ips_all {
		if count >= threshold {
			ips = append(ips, ip)
		}
	}
	if len(ips) == 0 {
		Statsf("No IPs above threshold %d", threshold)
		abortMessage("threshold_too_high")
		return
	}

	filterOut := func(rationale string, eliminate btree.SortedSet, eliminate_server_count int, candidates []string) []string {
		alreadyDropped := btree.NewTree(btreeStringLess)
		for ip := range eliminate.Data() {
			alreadyDropped.Insert(ip)
		}
		for _, ip := range candidates {
			alreadyDropped.Remove(ip)
		}
		ips = make([]string, 0, len(candidates))
		for _, ip := range candidates {
			if !eliminate.Contains(ip) {
				ips = append(ips, ip)
			}
		}
		Statsf("dropping all %d servers %s, for %d possible IPs but %d of those already dropped",
			eliminate_server_count, rationale, eliminate.Len(), alreadyDropped.Len())
		return ips
	}

	ips = filterOut("running version v1.0.10", ips_skip_1010, count_servers_1010, ips)
	if len(ips) == 0 {
		abortMessage("No_servers_left_after_v1.0.10_filter")
		return
	}

	if minimumVersion != nil {
		ips = filterOut(fmt.Sprintf("running version < v%s", minimumVersion), ips_too_old, count_servers_too_old, ips)
		if len(ips) == 0 {
			abortMessage(fmt.Sprintf("No_servers_left_after_minimum_version_filter_(v%s)", minimumVersion))
			return
		}
	}

	if limitToCountries != nil {
		ips = filterOut(fmt.Sprintf("not in countries [%s]", limitToCountries), ips_wrong_country, count_servers_wrong_country, ips)
		if len(ips) == 0 {
			abortMessage(fmt.Sprintf("No_servers_left_after_country_filter_[%s]", limitToCountries))
			return
		}
	}

	if limitToProxies {
		ips = filterOut("not behind a web-proxy", ips_unwanted_server, count_servers_unwanted_server, ips)
		if len(ips) == 0 {
			abortMessage("No_servers_left_after_proxies_filter")
			return
		}
	}

	//TODO: change now to be the time the scan finished
	timestamp := time.Now().UTC().Format("2006-01-02T15:04:05") + "Z"
	count := len(ips)
	Log.Printf("ip-valid: Yielding %d of %d values", count, len(ips_all))

	// The tags are public statements; history:
	//   skip 1.0.10 -> skip_1010, because of lookup problems biting gnupg
	//   alg_1 used a fixed threshold (too small to deal with jitter)
	//   alg_2 used stddev+jitter
	//   alg_3 fixed maximum bucket selection (was a code bug)
	//   alg_4 stopped double-counting servers with multiple IP addresses
	//   alg_5 keep 1.0.10 servers for long enough to calculate stats, drop afterwards
	statusD := make(map[string]interface{}, 16)
	statusD["status"] = "COMPLETE"
	statusD["count"] = count
	statusD["tags"] = []string{"skip_1010", "alg_5"}
	if minimumVersion != nil {
		statusD["minimum_version"] = minimumVersion.String()
	}
	if limitToProxies {
		statusD["proxies"] = "1"
	}
	if limitToCountries != nil {
		statusD["countries"] = limitToCountries.String()
	}
	statusD["minimum"] = threshold
	statusD["collected"] = timestamp

	if emitJson {
		fmt.Fprintf(w, "{\n")
		if showStats {
			doShowStats()
			fmt.Fprintf(w, ", ")
		}
		bIps, _ := json.Marshal(ips)
		bStatus, _ := json.Marshal(statusD)
		fmt.Fprintf(w, "\"status\": %s,\n\"ips\": %s\n}\n", bStatus, bIps)
	} else {
		if showStats {
			doShowStats()
		}
		fmt.Fprintf(w, "IP-Gen/1.1:")
		for k, v := range statusD {
			var vstr string
			//fmt.Fprintf(w, " {{%T}}", v)
			switch v.(type) {
			case int:
				vstr = strconv.Itoa(v.(int))
			case []string:
				vstr = strings.Join(v.([]string), ",")
			default:
				vstr = fmt.Sprintf("%s", v)
			}
			fmt.Fprintf(w, " %s=%s", k, vstr)
		}
		fmt.Fprintf(w, "\n")
		for _, ip := range ips {
			fmt.Fprintf(w, "%s\n", ip)
		}
		fmt.Fprintf(w, ".\n")
	}

}