// Reverse geocode func (g *GeoBed) ReverseGeocode(lat float64, lng float64) GeobedCity { c := GeobedCity{} gh := geohash.Encode(lat, lng) // This is produced with empty lat/lng values - don't look for anything. if gh == "7zzzzzzzzzzz" { return c } // Note: All geohashes are going to be 12 characters long. Even if the precision on the lat/lng isn't great. The geohash package will center things. // Obviously lat/lng like 37, -122 is a guess. That's no where near the resolution of a city. Though we're going to allow guesses. mostMatched := 0 matched := 0 for k, v := range g.c { // check first two characters to reduce the number of loops if v.Geohash[0] == gh[0] && v.Geohash[1] == gh[1] { matched = 2 for i := 2; i <= len(gh); i++ { //log.Println(gh[0:i]) if v.Geohash[0:i] == gh[0:i] { matched++ } } // tie breakers go to city with larger population (NOTE: There's still a chance that the next pass will uncover a better match) if matched == mostMatched && g.c[k].Population > c.Population { c = g.c[k] // log.Println("MATCHES") // log.Println(matched) // log.Println("CITY") // log.Println(c.City) // log.Println("POPULATION") // log.Println(c.Population) } if matched > mostMatched { c = g.c[k] mostMatched = matched } } } return c }
// Unzips the data sets and loads the data. func (g *GeoBed) loadDataSets() { locationDedupeIdx = make(map[string]bool) for _, f := range dataSetFiles { // This one is zipped if f["id"] == "geonamesCities1000" { rz, err := zip.OpenReader(f["path"]) if err != nil { log.Fatal(err) } defer rz.Close() for _, uF := range rz.File { fi, err := uF.Open() if err != nil { log.Fatal(err) } defer fi.Close() // Geonames uses a tab delineated format and it's not even consistent. No CSV reader that I've found for Go can understand this. // I'm not expecting any reader to either because it's an invalid CSV to be frank. However, we can still split up each row by \t scanner := bufio.NewScanner(fi) scanner.Split(bufio.ScanLines) i := 1 for scanner.Scan() { i++ // So regexp, sadly, must be used (well, unless I wanted parse each string byte by byte, pushing each into a buffer to append to a slice until a tab is reached, etc.). // But I'd have to also then put in a condition if the next byte was a \t rune, then append an empty string, etc. This just, for now, seems nicer (easier). // This is only an import/update, so it shouldn't be an issue for performance. If it is, then I'll look into other solutions. fields := regexp.MustCompile("\t").Split(scanner.Text(), 19) // NOTE: Now using a combined GeobedCity struct since not all data sets have the same fields. // Plus, the entire point was to geocode forward and reverse. Bonus information like elevation and such is just superfluous. // Leaving it here because it may be configurable... If options are passed to NewGeobed() then maybe Geobed can simply be a Geonames search. // Don't even load in MaxMind data...And if that's the case, maybe that bonus information is desired. if len(fields) == 19 { //id, _ := strconv.Atoi(fields[0]) lat, _ := strconv.ParseFloat(fields[4], 64) lng, _ := strconv.ParseFloat(fields[5], 64) pop, _ := strconv.Atoi(fields[14]) //elv, _ := strconv.Atoi(fields[15]) //dem, _ := strconv.Atoi(fields[16]) gh := geohash.Encode(lat, lng) // This is produced with empty lat/lng values - don't store it. if gh == "7zzzzzzzzzzz" { gh = "" } var c GeobedCity c.City = strings.Trim(string(fields[1]), " ") c.CityAlt = string(fields[3]) c.Country = string(fields[8]) c.Region = string(fields[10]) c.Latitude = lat c.Longitude = lng c.Population = int32(pop) c.Geohash = gh // Don't include entries without a city name. If we want to geocode the centers of countries and states, then we can do that faster through other means. if len(c.City) > 0 { g.c = append(g.c, c) } } } } } // ...And this one is Gzipped (and this one may have worked with the CSV package, but parse it the same way as the others line by line) if f["id"] == "maxmindWorldCities" { // It also has a lot of dupes maxMindCityDedupeIdx = make(map[string][]string) fi, err := os.Open(f["path"]) if err != nil { log.Println(err) } defer fi.Close() fz, err := gzip.NewReader(fi) if err != nil { log.Println(err) } defer fz.Close() scanner := bufio.NewScanner(fz) scanner.Split(bufio.ScanLines) i := 1 for scanner.Scan() { i++ t := scanner.Text() fields := strings.Split(t, ",") if len(fields) == 7 { var b bytes.Buffer b.WriteString(fields[0]) // country b.WriteString(fields[3]) // region b.WriteString(fields[1]) // city idx := b.String() b.Reset() maxMindCityDedupeIdx[idx] = fields } } // Loop the map of fields after dupes have been removed (about 1/5th less... 2.6m vs 3.1m inreases lookup performance). for _, fields := range maxMindCityDedupeIdx { if fields[0] != "" && fields[0] != "0" { if fields[2] != "AccentCity" { pop, _ := strconv.Atoi(fields[4]) lat, _ := strconv.ParseFloat(fields[5], 64) lng, _ := strconv.ParseFloat(fields[6], 64) // MaxMind's data set is a bit dirty. I've seen city names surrounded by parenthesis in a few places. cn := strings.Trim(string(fields[2]), " ") cn = strings.Trim(cn, "( )") // Don't take any city names with erroneous punctuation either. if strings.Contains(cn, "!") || strings.Contains(cn, "@") { continue } gh := geohash.Encode(lat, lng) // This is produced with empty lat/lng values - don't store it. if gh == "7zzzzzzzzzzz" { gh = "" } // If the geohash was seen before... _, ok := locationDedupeIdx[gh] if !ok { locationDedupeIdx[gh] = true var c GeobedCity c.City = cn c.Country = toUpper(string(fields[0])) c.Region = string(fields[3]) c.Latitude = lat c.Longitude = lng c.Population = int32(pop) c.Geohash = gh // Don't include entries without a city name. If we want to geocode the centers of countries and states, then we can do that faster through other means. if len(c.City) > 0 && len(c.Country) > 0 { g.c = append(g.c, c) } } } } } // Clear out the temrporary index (set to nil, it does get re-created) so that Go can garbage collect it at some point whenever it feels the need. maxMindCityDedupeIdx = nil locationDedupeIdx = nil } // ...And this one is just plain text if f["id"] == "geonamesCountryInfo" { fi, err := os.Open(f["path"]) if err != nil { log.Fatal(err) } defer fi.Close() scanner := bufio.NewScanner(fi) scanner.Split(bufio.ScanLines) i := 1 for scanner.Scan() { t := scanner.Text() // There are a bunch of lines in this file that are comments, they start with # if string(t[0]) != "#" { i++ fields := regexp.MustCompile("\t").Split(t, 19) if len(fields) == 19 { if fields[0] != "" && fields[0] != "0" { isoNumeric, _ := strconv.Atoi(fields[2]) area, _ := strconv.Atoi(fields[6]) pop, _ := strconv.Atoi(fields[7]) gid, _ := strconv.Atoi(fields[16]) var ci CountryInfo ci.ISO = string(fields[0]) ci.ISO3 = string(fields[1]) ci.ISONumeric = int16(isoNumeric) ci.Fips = string(fields[3]) ci.Country = string(fields[4]) ci.Capital = string(fields[5]) ci.Area = int32(area) ci.Population = int32(pop) ci.Continent = string(fields[8]) ci.Tld = string(fields[9]) ci.CurrencyCode = string(fields[10]) ci.CurrencyName = string(fields[11]) ci.Phone = string(fields[12]) ci.PostalCodeFormat = string(fields[13]) ci.PostalCodeRegex = string(fields[14]) ci.Languages = string(fields[15]) ci.GeonameId = int32(gid) ci.Neighbours = string(fields[17]) ci.EquivalentFipsCode = string(fields[18]) g.co = append(g.co, ci) } } } } } } // Sort []GeobedCity by city names to help with binary search (the City field is the most searched upon field and the matching names can be easily filtered down from there). sort.Sort(g.c) //debug //log.Println("TOTAL RECORDS:") //log.Println(len(g.c)) // Index the locations of city names in the g.c []GeoCity slice. This way when searching the range can be limited so it will be faster. cityNameIdx = make(map[string]int) for k, v := range g.c { // Get the index key for the first character of the city name. ik := toLower(string(v.City[0])) if val, ok := cityNameIdx[ik]; ok { // If this key number is greater than what was previously recorded, then set it as the new indexed key. if val < k { cityNameIdx[ik] = k } } else { // If the index key has not yet been set for this value, then set it. cityNameIdx[ik] = k } // Get the index key for the first two characters of the city name. // if len(v.CityLower) >= 2 { // ik2 := v.CityLower[0:2] // if val, ok := cityNameIdx[ik2]; ok { // // If this key number is greater than what was previously recorded, then set it as the new indexed key. // if val < k { // cityNameIdx[ik2] = k // } // } else { // // If the index key has not yet been set for this value, then set it. // cityNameIdx[ik2] = k // } // } } }