func DropDatabase(dbConn *mgo.Session, dbName string) error { var err = dbConn.DB(dbName).DropDatabase() if Panic && (err != nil) { panic(err) } return err }
func Crawl(pool *mgo.Session, du *DigestwUser, tl *TwTimeLine, resolveURL bool, done chan<- int) { defer func() { done <- 0 }() sess := pool.New() defer sess.Close() sa := NewStatsAll(du.TwUser.Screen_Name, sess) var sid, first, last int64 for k, v := range *tl { tmpTime, _ := time.Parse(time.RubyDate, v.Created_at) tmpSec := tmpTime.Seconds() if du.UTC_Offset != nil { tmpSec += *du.UTC_Offset v.Created_at = time.SecondsToUTC(tmpSec).Format(time.RubyDate) } if k == 0 { sid = v.Id last = tmpSec } addStats(sa, &v, resolveURL) if k == (len(*tl) - 1) { first = tmpSec } //log.Printf("id:%d", v.Id) } sa.Foreach(update) // decide to the next execution time du.SinceId = strconv.Itoa64(sid) du.NextSeconds = time.Seconds() + (last - first) if _, err := du.Upsert(sess); err != nil { log.Print(err) } }
func EnsureIndex(dbConn *mgo.Session, dbName string, collName string, index *mgo.Index) error { var err = dbConn.DB(dbName).C(collName).EnsureIndex(*index) if Panic && (err != nil) { panic(err) } return err }
func Insert(dbConn *mgo.Session, dbName string, collName string, recs ...interface{}) error { var err = dbConn.DB(dbName).C(collName).Insert(recs...) if Panic && (err != nil) { panic(err) } return err }
func (su *StatsUnit) Find(sess *mgo.Session, col, uid, unitid string) os.Error { c := sess.DB(MGO_DB).C(col) if unitid == "" { return c.Find(bson.M{"userid": uid}).One(su) } return c.Find(bson.M{"userid": uid, "unitid": unitid}).One(su) }
func createGobs(targetDir string) { var makeGob = func(bmap interface{}, ptr interface{}) interface{} { dbutil.BsonMapToObject(bmap, ptr) return ptr } var makeAdminGob = func(bmap interface{}) interface{} { return makeGob(bmap, &geoutil.GeoNamesAdminRecord{}) } var makeNameGob = func(bmap interface{}) interface{} { return makeGob(bmap, &geoutil.GeoNamesNameRecord{}) } var makeZipGob = func(bmap interface{}) interface{} { return makeGob(bmap, &geoutil.GeoNamesZipRecord{}) } var lola numutil.Dvec2 var box [2][2]float64 var dbConn *mgo.Session var dbName, fbName, fp string var geoRecs []interface{} dbutil.Panic = true dbConn, _ = dbutil.ConnectToGlobal() defer dbConn.Close() dbName = dbutil.GeoNamesDbName(dbConn, true) dbutil.FindAll(dbConn, dbName, "a", nil, &geoRecs) gobutil.CreateGobsFile(path.Join(targetDir, "ga"), &geoRecs, makeAdminGob, true) for lola.Y = geoutil.LatMin; lola.Y < geoutil.LatMax; lola.Y++ { for lola.X = geoutil.LonMin; lola.X < geoutil.LonMax; lola.X++ { fbName = geoutil.LoLaFileName(lola.X, lola.Y) fmt.Println(fbName) geoRecs = nil box[0][0] = lola.X box[0][1] = lola.Y box[1][0] = lola.X + 1 box[1][1] = lola.Y + 1 dbutil.FindAll(dbConn, dbName, "n", bson.M{"l": bson.M{"$within": bson.M{"$box": box}}}, &geoRecs) if len(geoRecs) == 0 { geoRecs = nil dbutil.FindOne(dbConn, dbName, "n", bson.M{"l": bson.M{"$near": []float64{lola.X + 0.5, lola.Y + 0.5}}}, &geoRecs) } fp = path.Join(path.Join(targetDir, "gn"), fbName) fmt.Println(fp) gobutil.CreateGobsFile(fp, &geoRecs, makeNameGob, true) geoRecs = nil dbutil.FindAll(dbConn, dbName, "z", bson.M{"l": bson.M{"$within": bson.M{"$box": box}}}, &geoRecs) if len(geoRecs) > 0 { fp = path.Join(path.Join(targetDir, "gz"), fbName) fmt.Println(fp) gobutil.CreateGobsFile(fp, &geoRecs, makeZipGob, true) } } } }
func GeoNamesDbName(dbConn *mgo.Session, force bool) string { if force || (len(geoNamesDbName) == 0) { var last = "" dbNames, err := dbConn.DatabaseNames() if err != nil { panic(err) } for _, dbn := range dbNames { if strings.HasPrefix(dbn, "gn_") { last = dbn } } geoNamesDbName = last } return geoNamesDbName }
func getPage(session *mgo.Session, title string) (result *Page, err error) { result = new(Page) c := session.DB(dbname).C("pages") err = c.Find(bson.M{"title": title}).One(result) return }
func createDb(sourceDir string) { var err error var dbConn *mgo.Session var dbIndex, lastIndex = 0, 0 var dbName, tmp string var dbNames []string var admins []*adminDiv var features = []string{} var timezones = []string{} var countries = []string{} fmt.Println("Connecting...") dbutil.Panic = true dbConn, err = dbutil.ConnectToGlobal() dbNames, err = dbConn.DatabaseNames() if err != nil { panic(err) } for _, dbName = range dbNames { if strings.HasPrefix(dbName, "gn_") { dbIndex++ } } for { dbName = fmt.Sprintf("gn_%d", dbIndex) if stringutil.InSliceAt(dbNames, dbName) < 0 { break } else { dbIndex++ } } createDbCollection(dbConn, dbName, path.Join(sourceDir, "timeZones.txt"), "t", true, false, func(index int, rec []string) bson.M { var n = strings.Replace(rec[0], "_", " ", -1) timezones = append(timezones, n) return bson.M{"_id": index - 1, "n": n, "g": stringutil.ToFloat32(rec[1]), "d": stringutil.ToFloat32(rec[2]), "r": stringutil.ToFloat32(rec[3])} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "featureCodes_en.txt"), "f", false, false, func(index int, rec []string) bson.M { features = append(features, rec[0]) return bson.M{"_id": index, "n": rec[0], "t": rec[1], "d": rec[2]} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "countryInfo.txt"), "c", false, false, func(index int, rec []string) bson.M { /* #ISO ISO3 ISO-Numeric fips Country Capital Area(in sq km) Population Continent tld CurrencyCode CurrencyName Phone Postal Code Format Postal Code Regex Languages geonameid neighbours EquivalentFipsCode AD AND 020 AN Andorra Andorra la Vella 468 84000 EU .ad EUR Euro 376 AD### ^(?:AD)*(\d{3})$ ca 3041565 ES,FR AE ARE 784 AE United Arab Emirates Abu Dhabi 82880 4975593 AS .ae AED Dirham 971 ar-AE,fa,en,hi,ur 290557 SA,OM */ countries = append(countries, rec[0]) return bson.M{"_id": index, "i": rec[0], "i3": rec[1], "f": rec[2], "t": rec[4], "ca": rec[5], "co": rec[8], "d": rec[9], "cc": rec[10], "cn": rec[11], "p": rec[12], "l": stringutil.Split(rec[15], ","), "g": stringutil.ToInt(rec[16]), "n": stringutil.Split(rec[17], ",")} }) createDbCollection(dbConn, dbName, path.Join(sourceDir, "zip_allCountries.txt"), "z", false, true, func(index int, rec []string) bson.M { var an = []string{stringutil.Title(rec[3]), stringutil.Title(rec[5]), stringutil.Title(rec[7])} var ac = []string{rec[4], rec[6], rec[8]} var ll, err = numutil.NewDvec2(rec[10], rec[9]) var n = rec[2] var r = bson.M{"_id": index, "c": stringutil.InSliceAt(countries, rec[0]), "z": rec[1], "n": n} var words []string if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) { r["l"] = ll } else { return nil } r["a"], admins = findAdminIndex(admins, rec[0], ac[0], ac[1], ac[2], an) if len(n) > 0 { if n == strings.ToUpper(n) { n = stringutil.Title(n) } if words = stringutil.Split(n, " "); len(words) > 1 { n = "" for i, w := range words { if stringutil.InSliceAt(words, w) == i { n = stringutil.Concat(n, w, " ") } } r["n"] = n[0 : len(n)-1] } else { r["n"] = n } } return r }) for i, ad := range admins { dbutil.Insert(dbConn, dbName, "a", fixupAdminDiv(bson.M{"_id": i, "a": stringutil.NonEmpties(true, ad.ac1, ad.ac2, ad.ac3, ad.ac4), "n": ad.n}, countries)) } stringutil.ForEach(func(i int, s string) { createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "a", false, false, func(index int, rec []string) bson.M { var a = stringutil.Split(rec[0], ".") var n, na = rec[1], rec[2] var r bson.M for _, ad := range admins { if (ad.ac1 == a[0]) && (ad.ac2 == a[1]) && ((len(a) == 2) || (ad.ac3 == a[2])) { return nil } } index += len(admins) if i == 0 { lastIndex = index } else { index = index + 1 + lastIndex } r = bson.M{"_id": index, "a": a, "g": stringutil.ToInt(rec[3]), "n": n, "na": na} return fixupAdminDiv(r, countries) }) }, "admin1CodesASCII.txt", "admin2Codes.txt") lastIndex = 0 stringutil.ForEach(func(i int, s string) { createDbCollection(dbConn, dbName, path.Join(sourceDir, s), "n", false, true, func(index int, rec []string) bson.M { /* 0 geonameid : integer id of record in geonames database 1 name : name of geographical point (utf8) varchar(200) 2 asciiname : name of geographical point in plain ascii characters, varchar(200) 3 alternatenames : alternatenames, comma separated varchar(5000) 4 latitude : latitude in decimal degrees (wgs84) 5 longitude : longitude in decimal degrees (wgs84) 6 feature class : see http://www.geonames.org/export/codes.html, char(1) 7 feature code : see http://www.geonames.org/export/codes.html, varchar(10) 8 country code : ISO-3166 2-letter country code, 2 characters 9 cc2 : alternate country codes, comma separated, ISO-3166 2-letter country code, 60 characters 10 admin1 code : fipscode (subject to change to iso code), see exceptions below, see file admin1Codes.txt for display names of this code; varchar(20) 11 admin2 code : code for the second administrative division, a county in the US, see file admin2Codes.txt; varchar(80) 12 admin3 code : code for third level administrative division, varchar(20) 13 admin4 code : code for fourth level administrative division, varchar(20) 17 timezone : the timezone id (see file timeZone.txt) */ var pos, c = -1, -1 var ll, err = numutil.NewDvec2(rec[5], rec[4]) var n, na = rec[1], rec[2] var an = stringutil.Without(stringutil.Split(rec[3], ","), false, n, na, "") var tz = strings.Replace(rec[17], "_", " ", -1) var r = bson.M{"g": stringutil.ToInt(rec[0])} if (err == nil) && (ll.X >= geoutil.LonMin) && (ll.X <= geoutil.LonMax) && (ll.Y >= geoutil.LatMin) && (ll.Y <= geoutil.LatMax) { r["l"] = ll } else { return nil } if i == 0 { lastIndex = index } else { index = index + 1 + lastIndex } r["_id"] = index if len(n) == 0 { n = na } if (len(n) == 0) && (len(an) > 0) { n = an[0] } if (len(na) > 0) && (strings.ToLower(na) == strings.ToLower(n)) { na = "" } an = stringutil.Without(an, false, n, na) if len(n) > 0 { r["n"] = n } if len(na) > 0 { r["na"] = na } if len(an) > 0 { r["an"] = an } if pos = stringutil.InSliceAt(timezones, tz); pos >= 0 { r["t"] = pos } if pos = stringutil.InSliceAt(countries, rec[8]); pos >= 0 { c = pos r["c"] = pos } else if len(rec[9]) > 0 { for _, cn := range stringutil.Split(rec[9], ",") { if pos = stringutil.InSliceAt(countries, cn); pos >= 0 { c = pos r["c"] = pos break } } } if c >= 0 { pos, _ = findAdminIndex(admins, countries[c], rec[10], rec[11], rec[12], nil) if pos >= 0 { r["a"] = pos } } if pos = stringutil.InSliceAt(features, stringutil.Concat(rec[6], ".", rec[7])); pos >= 0 { r["f"] = pos } return r }) }, "allCountries.txt", "null.txt") dbNames, err = dbConn.DatabaseNames() if err == nil { dbutil.Panic = false for _, tmp = range dbNames { if (tmp != dbName) && strings.HasPrefix(tmp, "gn_") { dbutil.DropDatabase(dbConn, tmp) } } } }
func Find(dbConn *mgo.Session, dbName string, collName string, query interface{}) *mgo.Query { return dbConn.DB(dbName).C(collName).Find(query) }
func (su *DigestwUser) Find(sess *mgo.Session, ns int64) *mgo.Iter { c := sess.DB(MGO_DB).C(MGO_COL_USER) return c.Find(bson.M{"nextseconds": bson.M{"$lt": ns}}).Limit(CRAWL_UNIT).Iter() }
func (su *DigestwUser) FindOne(sess *mgo.Session, sn string) os.Error { c := sess.DB(MGO_DB).C(MGO_COL_USER) return c.Find(bson.M{"screen_name": sn}).One(su) }
func (su *DigestwUser) Upsert(sess *mgo.Session) (interface{}, os.Error) { c := sess.DB(MGO_DB).C(MGO_COL_USER) return c.Upsert(bson.M{"screen_name": su.TwUser.Screen_Name}, su) }
func (su *StatsUnit) Upsert(sess *mgo.Session, col, uid, unitid string) (interface{}, os.Error) { c := sess.DB(MGO_DB).C(col) return c.Upsert(bson.M{"userid": uid, "unitid": unitid}, su) }