func createDb(sourceDir string) { var err error var dbConn *mgo.Session var dbIndex, lastIndex = 0, 0 var dbName, tmp string var dbNames []string var admins []*adminDiv var features = []string{} var timezones = []string{} var countries = []string{} fmt.Println("Connecting...") dbutil.Panic = true dbConn, err = dbutil.ConnectToGlobal() dbNames, err = dbConn.DatabaseNames() if err != nil { panic(err) } for _, dbName = range dbNames { if strings.HasPrefix(dbName, "gn_") { dbIndex++ } } for { dbName = fmt.Sprintf("gn_%d", dbIndex) if stringutil.InSliceAt(dbNames, dbName) < 0 { break } else { dbIndex++ } } createDbCollection(dbConn, dbName, path.Join(sourceDir, "timeZones.txt"), "t", true, false, func(index int, rec []string) bson.M { var n = strings.Replace(rec[0], "_", " ", -1) timezones = append(timezones, n) return bson.M{"_id": index - 1, "n": n, "g": stringutil.ToFloat32(rec[1]), "d": stringutil.ToFloat32(rec[2]), "r": stringutil.ToFloat32(rec[3])} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "featureCodes_en.txt"), "f", false, false, func(index int, rec []string) bson.M { features = append(features, rec[0]) return bson.M{"_id": index, "n": rec[0], "t": rec[1], "d": rec[2]} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "countryInfo.txt"), "c", false, false, func(index int, rec []string) bson.M { /* #ISO ISO3 ISO-Numeric fips Country Capital Area(in sq km) Population Continent tld CurrencyCode CurrencyName Phone Postal Code Format Postal Code Regex Languages geonameid neighbours EquivalentFipsCode AD AND 020 AN Andorra Andorra la Vella 468 84000 EU .ad EUR Euro 376 AD### ^(?:AD)*(\d{3})$ ca 3041565 ES,FR AE ARE 784 AE United Arab Emirates Abu Dhabi 82880 4975593 AS .ae AED Dirham 971 ar-AE,fa,en,hi,ur 290557 SA,OM */ countries = append(countries, rec[0]) return bson.M{"_id": index, "i": rec[0], "i3": rec[1], "f": rec[2], "t": rec[4], "ca": rec[5], "co": rec[8], "d": rec[9], "cc": rec[10], "cn": rec[11], "p": rec[12], "l": stringutil.Split(rec[15], ","), "g": stringutil.ToInt(rec[16]), "n": stringutil.Split(rec[17], ",")} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "zip_allCountries.txt"), "z", false, true, func(index int, rec []string) bson.M { var an = []string{stringutil.Title(rec[3]), stringutil.Title(rec[5]), stringutil.Title(rec[7])} var ac = []string{rec[4], rec[6], rec[8]} var ll, err = numutil.NewDvec2(rec[10], rec[9]) var n = rec[2] var r = bson.M{"_id": index, "c": stringutil.InSliceAt(countries, rec[0]), "z": rec[1], "n": n} var words []string if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) { r["l"] = ll } else { return nil } r["a"], admins = findAdminIndex(admins, rec[0], ac[0], ac[1], ac[2], an) if len(n) > 0 { if n == strings.ToUpper(n) { n = stringutil.Title(n) } if words = stringutil.Split(n, " "); len(words) > 1 { n = "" for i, w := range words { if stringutil.InSliceAt(words, w) == i { n = stringutil.Concat(n, w, " ") } } r["n"] = n[0 : len(n)-1] } else { r["n"] = n } } return r }) for i, ad := range admins { dbutil.Insert(dbConn, dbName, "a", fixupAdminDiv(bson.M{"_id": i, "a": stringutil.NonEmpties(true, ad.ac1, ad.ac2, ad.ac3, ad.ac4), "n": ad.n}, countries)) } stringutil.ForEach(func(i int, s string) { createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "a", false, false, func(index int, rec []string) bson.M { var a = stringutil.Split(rec[0], ".") var n, na = rec[1], rec[2] var r bson.M for _, ad := range admins { if (ad.ac1 == a[0]) && (ad.ac2 == a[1]) && ((len(a) == 2) || (ad.ac3 == a[2])) { return nil } } index += len(admins) if i == 0 { lastIndex = index } else { index = index + 1 + lastIndex } r = bson.M{"_id": index, "a": a, "g": stringutil.ToInt(rec[3]), "n": n, "na": na} return fixupAdminDiv(r, countries) }) }, "admin1CodesASCII.txt", "admin2Codes.txt") lastIndex = 0 stringutil.ForEach(func(i int, s string) { createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "n", false, true, func(index int, rec []string) bson.M { /* 0 geonameid : integer id of record in geonames database 1 name : name of geographical point (utf8) varchar(200) 2 asciiname : name of geographical point in plain ascii characters, varchar(200) 3 alternatenames : alternatenames, comma separated varchar(5000) 4 latitude : latitude in decimal degrees (wgs84) 5 longitude : longitude in decimal degrees (wgs84) 6 feature class : see http://www.geonames.org/export/codes.html, char(1) 7 feature code : see http://www.geonames.org/export/codes.html, varchar(10) 8 country code : ISO-3166 2-letter country code, 2 characters 9 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters 10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20) 11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80) 12 admin3 code : code for third level administrative division, varchar(20) 13 admin4 code : code for fourth level administrative division, varchar(20) 17 timezone : the timezone id (see file timeZone.txt) */ var pos, c = -1, -1 var ll, err = numutil.NewDvec2(rec[5], rec[4]) var n, na = rec[1], rec[2] var an = stringutil.Without(stringutil.Split(rec[3], ","), false, n, na, "") var tz = strings.Replace(rec[17], "_", " ", -1) var r = bson.M{"g": stringutil.ToInt(rec[0])} if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) { r["l"] = ll } else { return nil } if i == 0 { lastIndex = index } else { index = index + 1 + lastIndex } r["_id"] = index if len(n) == 0 { n = na } if (len(n) == 0) && (len(an) > 0) { n = an[0] } if (len(na) > 0) && (strings.ToLower(na) == strings.ToLower(n)) { na = "" } an = stringutil.Without(an, false, n, na) if len(n) > 0 { r["n"] = n } if len(na) > 0 { r["na"] = na } if len(an) > 0 { r["an"] = an } if pos = stringutil.InSliceAt(timezones, tz); pos >= 0 { r["t"] = pos } if pos = stringutil.InSliceAt(countries, rec[8]); pos >= 0 { c = pos r["c"] = pos } else if len(rec[9]) > 0 { for _, cn := range stringutil.Split(rec[9], ",") { if pos = stringutil.InSliceAt(countries, cn); pos >= 0 { c = pos r["c"] = pos break } } } if c >= 0 { pos, _ = findAdminIndex(admins, countries[c], rec[10], rec[11], rec[12], nil) if pos >= 0 { r["a"] = pos } } if pos = stringutil.InSliceAt(features, stringutil.Concat(rec[6], ".", rec[7])); pos >= 0 { r["f"] = pos } return r }) }, "allCountries.txt", "null.txt") dbNames, err = dbConn.DatabaseNames() if err == nil { dbutil.Panic = false for _, tmp = range dbNames { if (tmp != dbName) && strings.HasPrefix(tmp, "gn_") { dbutil.DropDatabase(dbConn, tmp) } } } }
func createDbCollection(dbConn *mgo.Session, dbName string, sourceFilePath, collName string, skipFirst bool, hasGeoIndex bool, makeRec recMaker) { var fr *os.File var br *bufio.Reader var err error var rec []string var line []byte var isPrefix, isDvec2 bool var geoIndexDone = false var mr bson.M var ll numutil.Dvec2 var i, ri, rc = 0, 0, 0 fr, err = os.Open(sourceFilePath) if fr != nil { defer fr.Close() } if err != nil { panic(err) } if fr != nil { fmt.Println("Reading", sourceFilePath) br = bufio.NewReaderSize(fr, coreutil.Ifi(strings.HasSuffix(sourceFilePath, "allCountries.txt"), 1024*1024*1024, 1024*1024*72)) for rec = nil; err == nil; line, isPrefix, err = br.ReadLine() { if isPrefix || err != nil { fmt.Println(err) panic("readline") } else { rec = stringutil.Split(string(line), "\t") } if (rec != nil) && (len(rec) > 0) && !strings.HasPrefix(rec[0], "#") { if (i != 0) || !skipFirst { if mr = makeRec(i, rec); mr != nil { mrs[ri] = mr ri++ rc++ if hasGeoIndex { if ll, isDvec2 = mr["l"].(numutil.Dvec2); isDvec2 { mr["l"] = []float64{ll.X, ll.Y} if !geoIndexDone { dbutil.EnsureIndex(dbConn, dbName, collName, &mgo.Index{Key: []string{"@l"}, Bits: 32, Min: -180, Max: 181}) geoIndexDone = true } } } if (i % 250000) == 0 { fmt.Println("Read", i) } } } i++ } } for i = 0; i < rc; i = i + 4096 { if ri = i + 4096; ri > rc { ri = rc } dbutil.Insert(dbConn, dbName, collName, mrs[i:ri]...) if (i % (4096 * 50)) == 0 { fmt.Println("Insert", i) } } fmt.Println("Recs total: ", collName, rc) } }