Пример #1
0
func createDb(sourceDir string) {
	var err error
	var dbConn *mgo.Session
	var dbIndex, lastIndex = 0, 0
	var dbName, tmp string
	var dbNames []string
	var admins []*adminDiv
	var features = []string{}
	var timezones = []string{}
	var countries = []string{}
	fmt.Println("Connecting...")
	dbutil.Panic = true
	dbConn, err = dbutil.ConnectToGlobal()
	dbNames, err = dbConn.DatabaseNames()
	if err != nil {
		panic(err)
	}
	for _, dbName = range dbNames {
		if strings.HasPrefix(dbName, "gn_") {
			dbIndex++
		}
	}
	for {
		dbName = fmt.Sprintf("gn_%d", dbIndex)
		if stringutil.InSliceAt(dbNames, dbName) < 0 {
			break
		} else {
			dbIndex++
		}
	}
	createDbCollection(dbConn, dbName, path.Join(sourceDir, "timeZones.txt"), "t", true, false, func(index int, rec []string) bson.M {
		var n = strings.Replace(rec[0], "_", " ", -1)
		timezones = append(timezones, n)
		return bson.M{"_id": index - 1, "n": n, "g": stringutil.ToFloat32(rec[1]), "d": stringutil.ToFloat32(rec[2]), "r": stringutil.ToFloat32(rec[3])}
	})
	createDbCollection(dbConn, dbName, path.Join(sourceDir, "featureCodes_en.txt"), "f", false, false, func(index int, rec []string) bson.M {
		features = append(features, rec[0])
		return bson.M{"_id": index, "n": rec[0], "t": rec[1], "d": rec[2]}
	})
	createDbCollection(dbConn, dbName, path.Join(sourceDir, "countryInfo.txt"), "c", false, false, func(index int, rec []string) bson.M {
		/*
			#ISO	ISO3	ISO-Numeric	fips	Country					Capital				Area(in sq km)	Population	Continent	tld	CurrencyCode	CurrencyName	Phone	Postal Code Format	Postal Code Regex	Languages			geonameid	neighbours	EquivalentFipsCode
			AD		AND		020			AN		Andorra					Andorra la Vella	468				84000		EU			.ad	EUR				Euro			376		AD###				^(?:AD)*(\d{3})$	ca					3041565		ES,FR
			AE		ARE		784			AE		United Arab Emirates	Abu Dhabi			82880			4975593		AS			.ae	AED				Dirham			971												ar-AE,fa,en,hi,ur	290557		SA,OM
		*/
		countries = append(countries, rec[0])
		return bson.M{"_id": index, "i": rec[0], "i3": rec[1], "f": rec[2], "t": rec[4], "ca": rec[5], "co": rec[8], "d": rec[9], "cc": rec[10], "cn": rec[11], "p": rec[12], "l": stringutil.Split(rec[15], ","), "g": stringutil.ToInt(rec[16]), "n": stringutil.Split(rec[17], ",")}
	})
	createDbCollection(dbConn, dbName, path.Join(sourceDir, "zip_allCountries.txt"), "z", false, true, func(index int, rec []string) bson.M {
		var an = []string{stringutil.Title(rec[3]), stringutil.Title(rec[5]), stringutil.Title(rec[7])}
		var ac = []string{rec[4], rec[6], rec[8]}
		var ll, err = numutil.NewDvec2(rec[10], rec[9])
		var n = rec[2]
		var r = bson.M{"_id": index, "c": stringutil.InSliceAt(countries, rec[0]), "z": rec[1], "n": n}
		var words []string
		if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) {
			r["l"] = ll
		} else {
			return nil
		}
		r["a"], admins = findAdminIndex(admins, rec[0], ac[0], ac[1], ac[2], an)
		if len(n) > 0 {
			if n == strings.ToUpper(n) {
				n = stringutil.Title(n)
			}
			if words = stringutil.Split(n, " "); len(words) > 1 {
				n = ""
				for i, w := range words {
					if stringutil.InSliceAt(words, w) == i {
						n = stringutil.Concat(n, w, " ")
					}
				}
				r["n"] = n[0 : len(n)-1]
			} else {
				r["n"] = n
			}
		}
		return r
	})
	for i, ad := range admins {
		dbutil.Insert(dbConn, dbName, "a", fixupAdminDiv(bson.M{"_id": i, "a": stringutil.NonEmpties(true, ad.ac1, ad.ac2, ad.ac3, ad.ac4), "n": ad.n}, countries))
	}
	stringutil.ForEach(func(i int, s string) {
		createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "a", false, false, func(index int, rec []string) bson.M {
			var a = stringutil.Split(rec[0], ".")
			var n, na = rec[1], rec[2]
			var r bson.M
			for _, ad := range admins {
				if (ad.ac1 == a[0]) && (ad.ac2 == a[1]) && ((len(a) == 2) || (ad.ac3 == a[2])) {
					return nil
				}
			}
			index += len(admins)
			if i == 0 {
				lastIndex = index
			} else {
				index = index + 1 + lastIndex
			}
			r = bson.M{"_id": index, "a": a, "g": stringutil.ToInt(rec[3]), "n": n, "na": na}
			return fixupAdminDiv(r, countries)
		})
	}, "admin1CodesASCII.txt", "admin2Codes.txt")
	lastIndex = 0
	stringutil.ForEach(func(i int, s string) {
		createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "n", false, true, func(index int, rec []string) bson.M {
			/*
				0	geonameid         : integer id of record in geonames database
				1	name              : name of geographical point (utf8) varchar(200)
				2	asciiname         : name of geographical point in plain ascii characters, varchar(200)
				3	alternatenames    : alternatenames, comma separated varchar(5000)
				4	latitude          : latitude in decimal degrees (wgs84)
				5	longitude         : longitude in decimal degrees (wgs84)
				6	feature class     : see http://www.geonames.org/export/codes.html, char(1)
				7	feature code      : see http://www.geonames.org/export/codes.html, varchar(10)
				8	country code      : ISO-3166 2-letter country code, 2 characters
				9	cc2               : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters
				10	admin1 code       : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20)
				11	admin2 code       : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80)
				12	admin3 code       : code for third level administrative division, varchar(20)
				13	admin4 code       : code for fourth level administrative division, varchar(20)
				17	timezone          : the timezone id (see file timeZone.txt)
			*/
			var pos, c = -1, -1
			var ll, err = numutil.NewDvec2(rec[5], rec[4])
			var n, na = rec[1], rec[2]
			var an = stringutil.Without(stringutil.Split(rec[3], ","), false, n, na, "")
			var tz = strings.Replace(rec[17], "_", " ", -1)
			var r = bson.M{"g": stringutil.ToInt(rec[0])}
			if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) {
				r["l"] = ll
			} else {
				return nil
			}
			if i == 0 {
				lastIndex = index
			} else {
				index = index + 1 + lastIndex
			}
			r["_id"] = index
			if len(n) == 0 {
				n = na
			}
			if (len(n) == 0) && (len(an) > 0) {
				n = an[0]
			}
			if (len(na) > 0) && (strings.ToLower(na) == strings.ToLower(n)) {
				na = ""
			}
			an = stringutil.Without(an, false, n, na)
			if len(n) > 0 {
				r["n"] = n
			}
			if len(na) > 0 {
				r["na"] = na
			}
			if len(an) > 0 {
				r["an"] = an
			}
			if pos = stringutil.InSliceAt(timezones, tz); pos >= 0 {
				r["t"] = pos
			}
			if pos = stringutil.InSliceAt(countries, rec[8]); pos >= 0 {
				c = pos
				r["c"] = pos
			} else if len(rec[9]) > 0 {
				for _, cn := range stringutil.Split(rec[9], ",") {
					if pos = stringutil.InSliceAt(countries, cn); pos >= 0 {
						c = pos
						r["c"] = pos
						break
					}
				}
			}
			if c >= 0 {
				pos, _ = findAdminIndex(admins, countries[c], rec[10], rec[11], rec[12], nil)
				if pos >= 0 {
					r["a"] = pos
				}
			}
			if pos = stringutil.InSliceAt(features, stringutil.Concat(rec[6], ".", rec[7])); pos >= 0 {
				r["f"] = pos
			}
			return r
		})
	}, "allCountries.txt", "null.txt")
	dbNames, err = dbConn.DatabaseNames()
	if err == nil {
		dbutil.Panic = false
		for _, tmp = range dbNames {
			if (tmp != dbName) && strings.HasPrefix(tmp, "gn_") {
				dbutil.DropDatabase(dbConn, tmp)
			}
		}
	}
}
Пример #2
0
func createDbCollection(dbConn *mgo.Session, dbName string, sourceFilePath, collName string, skipFirst bool, hasGeoIndex bool, makeRec recMaker) {
	var fr *os.File
	var br *bufio.Reader
	var err error
	var rec []string
	var line []byte
	var isPrefix, isDvec2 bool
	var geoIndexDone = false
	var mr bson.M
	var ll numutil.Dvec2
	var i, ri, rc = 0, 0, 0
	fr, err = os.Open(sourceFilePath)
	if fr != nil {
		defer fr.Close()
	}
	if err != nil {
		panic(err)
	}
	if fr != nil {
		fmt.Println("Reading", sourceFilePath)
		br = bufio.NewReaderSize(fr, coreutil.Ifi(strings.HasSuffix(sourceFilePath, "allCountries.txt"), 1024*1024*1024, 1024*1024*72))
		for rec = nil; err == nil; line, isPrefix, err = br.ReadLine() {
			if isPrefix || err != nil {
				fmt.Println(err)
				panic("readline")
			} else {
				rec = stringutil.Split(string(line), "\t")
			}
			if (rec != nil) && (len(rec) > 0) && !strings.HasPrefix(rec[0], "#") {
				if (i != 0) || !skipFirst {
					if mr = makeRec(i, rec); mr != nil {
						mrs[ri] = mr
						ri++
						rc++
						if hasGeoIndex {
							if ll, isDvec2 = mr["l"].(numutil.Dvec2); isDvec2 {
								mr["l"] = []float64{ll.X, ll.Y}
								if !geoIndexDone {
									dbutil.EnsureIndex(dbConn, dbName, collName, &mgo.Index{Key: []string{"@l"}, Bits: 32, Min: -180, Max: 181})
									geoIndexDone = true
								}
							}
						}
						if (i % 250000) == 0 {
							fmt.Println("Read", i)
						}
					}
				}
				i++
			}
		}
		for i = 0; i < rc; i = i + 4096 {
			if ri = i + 4096; ri > rc {
				ri = rc
			}
			dbutil.Insert(dbConn, dbName, collName, mrs[i:ri]...)
			if (i % (4096 * 50)) == 0 {
				fmt.Println("Insert", i)
			}
		}
		fmt.Println("Recs total: ", collName, rc)
	}
}