func saveStatus(db database.MongoDB, lineName, status string, source models.Source) { uri := models.MakeUri(lineName) result := re.FindStringSubmatch(lineName) lineNumber, _ := strconv.Atoi(result[0]) q := bson.M{"id": uri} _, err := db.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": lineName, "linenumber": lineNumber, }, "$addToSet": bson.M{ "sources": source, }, }, models.Line{}) log.Println(uri) parser.CheckError(err) var statusOld models.Status err = db.FindOne(bson.M{"line_id": uri}, &statusOld) statusQ := bson.M{"line_id": uri, "_id": bson.NewObjectId()} if err == nil && statusOld.Status == status { statusQ = bson.M{"_id": statusOld.Id, "line_id": uri} } _, err = db.Upsert(statusQ, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "status": status, "line_id": uri, }, "$addToSet": bson.M{ "sources": source, }, }, models.Status{}) parser.CheckError(err) parser.Log.Debug(lineName + " - " + status) parser.Log.Info("-- Created Status to " + lineName) parser.Log.Info("Status: " + status) parser.Log.Info("------") if uri == "linha11coral" { saveStatus(db, "Linha 11-Coral-Expresso", status, source) } }
func getData(g string, db database.MongoDB) { url := "http://www.apolo11.com/reservatorios.php?step=" + g doc, err := goquery.NewDocument(url) parser.CheckError(err) doc.Find("body > center:nth-child(1) > table > tbody > tr > td:nth-child(1) > b > table").Each(func(_ int, s *goquery.Selection) { trs := s.Find("tr") title := "Sistema " + parser.Titlelize(strings.Replace(trs.Eq(0).Text(), "SISTEMA", "", -1)) uri := models.MakeUri(title) getInfo := func(i int, ss *goquery.Selection) string { return ss.Text() } percent := trs.Eq(1).Find("font").Map(getInfo) date := trs.Eq(2).Find("font").Map(getInfo) data := make([]bson.M, 0) for i, _ := range percent { if strings.TrimSpace(date[i]) != "" && strings.TrimSpace(date[i]) != "/" { data = append(data, bson.M{"percent": percent[i], "date": date[i]}) } } query := bson.M{"uri": uri, "granularity_letter": g} source := models.Source{ Url: "http://www.apolo11.com", Note: "Apolo11", } _, err := db.Upsert(query, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "uri": uri, "name": title, "granularity_letter": g, "granularity": getGranularity(g), "data": data, "source": []models.Source{source}, }, }, models.Reservoir{}) parser.CheckError(err) log.Println(uri, title, data) }) }
func (p SaveDeputiesFromTransparenciaBrasil) Run(DB database.MongoDB) { source := models.Source{ Url: "http://dev.transparencia.org.br/", Note: "Transparencia Brasil", } if parser.IsCached("http://dev.transparencia.org.br/") { parser.Log.Info("SaveDeputiesFromTransparenciaBrasil Cached") return } defer parser.DeferedCache("http://dev.transparencia.org.br/") parser.Log.Info("Starting SaveDeputiesFromTransparenciaBrasil") c := transparencia.New("kqOfbdNKSlpf") query := map[string]string{ "casa": "1", } parliamenrians, err := c.Excelencias(query) parser.CheckError(err) for _, parliamenrian := range parliamenrians { uri := models.MakeUri(parliamenrian.Apelido) parser.Log.Info("Saving %s", parliamenrian.Nome) _, err := DB.Upsert(bson.M{"id": uri}, bson.M{ "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "summary": parliamenrian.MiniBio, "nationalidentify": parliamenrian.CPF, }, "$addToSet": bson.M{ "sources": source, "identifiers": bson.M{ "$each": []bson.M{ { "identifier": parliamenrian.Id, "scheme": "TransparenciaBrasilID", }, { "identifier": parliamenrian.CPF, "scheme": "CPF", }, }, }, }, }, models.Parliamentarian{}) parser.CheckError(err) } }
func (self BasicStateBot) ParseState(db database.MongoDB, stateUrl string) { doc, err := goquery.NewDocument(stateUrl) parser.CheckError(err) source := models.Source{ Url: stateUrl, Note: "ibge", } data := doc.Find("#sintese tr") pUrl, _ := url.Parse(stateUrl) id := pUrl.Query().Get("sigla") capital := parser.ToUtf8(data.Eq(0).Find(".total").Text()) population2014 := data.Eq(1).Find(".total").Text() population2010 := data.Eq(2).Find(".total").Text() area := data.Eq(3).Find(".total").Text() populationDensity := data.Eq(4).Find(".total").Text() numberOfMunicipalities, _ := strconv.Atoi(data.Eq(5).Find(".total").Text()) log.Println(id, capital, population2014, population2010, area, populationDensity, numberOfMunicipalities) if STATES_NAME[id] == "" { panic(id) } q := bson.M{"id": id} _, err = db.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": STATES_NAME[id], "capitalid": models.MakeUri(capital), "population": toFloat(population2014), "area": toFloat(area), "populationdensity": toFloat(populationDensity), "numberofmunicipalities": numberOfMunicipalities, }, "$addToSet": bson.M{ "sources": source, }, }, models.State{}) doc = nil parser.CheckError(err) }
func LineColor(uri, hex string, db database.MongoDB) { q := bson.M{"id": uri} c, _ := colorful.Hex(hex) r, g, b := c.RGB255() color := bson.M{ "hex": hex, "rgb": []int{int(r), int(g), int(b)}, } parser.Log.Debug("Save", uri, "with color", color) _, err := db.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "color": color, }, }, models.Line{}) parser.CheckError(err) }
func (_ StatusBot) Run(db database.MongoDB) { // Metro SP doc, err := goquery.NewDocument("http://www.metro.sp.gov.br/Sistemas/direto-do-metro-via4/diretodoMetroHome.aspx") parser.CheckError(err) metroSource := models.Source{ Url: "http://www.metro.sp.gov.br/Sistemas/direto-do-metro-via4/diretodoMetroHome.aspx", } doc.Find("ul li").Each(func(_ int, s *goquery.Selection) { lineName := strings.TrimSpace(s.Find(".nomeDaLinha").Text()) status := strings.TrimSpace(s.Find(".statusDaLinha").Text()) saveStatus(db, lineName, status, metroSource) }) // CPTM doc, err = goquery.NewDocument("http://www.cptm.sp.gov.br/Pages/atendimento.aspx") parser.CheckError(err) cptmSource := models.Source{ Url: "http://www.cptm.sp.gov.br/Central-Relacionamento/situacao-linhas.asp", } lines := map[string]string{ "rubi": "7", "diamante": "8", "esmeralda": "9", "turquesa": "10", "coral": "11", "safira": "12", } doc.Find("#atendimento_consumidor .situacao_linhas .col-md-10 div.col-md-2").Each(func(_ int, s *goquery.Selection) { class, _ := s.Attr("class") uri := strings.TrimSpace(strings.Replace(class, "col-md-2", "", -1)) name := s.Find(".nome_linha") lineNumber := lines[uri] status := s.Find("[data-toggle='tooltip']").Text() lineName := "Linha " + lineNumber + " - " + parser.Titlelize(strings.TrimSpace(name.Text())) saveStatus(db, lineName, status, cptmSource) }) }
func (self BasicCityBot) getCitiesData(db database.MongoDB, url string, stateID string) { doc, err := goquery.NewDocument(url) parser.CheckError(err) source := models.Source{ Url: url, Note: "ibge", } doc.Find("#municipios tbody tr").Each(func(_ int, s *goquery.Selection) { data := s.Find("td") name := data.Eq(0).Text() parser.Log.Debug("Salving: " + name + " (" + stateID + ")") id := models.MakeUri(name) ibgecode, _ := strconv.Atoi(data.Eq(1).Text()) q := bson.M{"id": id, "stateid": stateID} _, err = db.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": name, "ibgecode": ibgecode, "gentile": data.Eq(2).Text(), "population": toFloat(data.Eq(3).Text()), "area": toFloat(data.Eq(4).Text()), "density": toFloat(data.Eq(5).Text()), "pib": toFloat(data.Eq(6).Text()), }, "$addToSet": bson.M{ "sources": source, }, }, models.City{}) parser.CheckError(err) }) doc = nil }
func (p SaveDeputiesFromSearch) Run(DB database.MongoDB) { searchURL := "http://www2.camara.leg.br/deputados/pesquisa" if parser.IsCached(searchURL) { parser.Log.Info("SaveDeputiesFromSearch Cached") return } defer parser.DeferedCache(searchURL) var doc *goquery.Document var e error if doc, e = goquery.NewDocument(searchURL); e != nil { parser.Log.Critical(e.Error()) } source := models.Source{ Url: searchURL, Note: "Pesquisa Câmara", } doc.Find("#deputado option").Each(func(i int, s *goquery.Selection) { value, _ := s.Attr("value") if value != "" { info := regexp.MustCompile("=|%23|!|\\||\\?").Split(value, -1) name := parser.Titlelize(info[0]) q := bson.M{ "id": models.MakeUri(name), } _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$addToSet": bson.M{ "sources": source, "identifiers": models.Identifier{ Identifier: info[2], Scheme: "nMatricula", }, }, }, models.Parliamentarian{}) parser.CheckError(err) } }) }
func (self BasicStateBot) Run(db database.MongoDB) { var wg sync.WaitGroup doc, err := goquery.NewDocument(STATE_BASE_URL) parser.CheckError(err) doc.Find("#menu a").Each(func(_ int, s *goquery.Selection) { wg.Add(1) partialUrl, _ := s.Attr("href") url := STATE_BASE_URL + partialUrl go func() { self.ParseState(db, url) wg.Done() }() }) wg.Wait() }
func saveDeputies(id string, d models.Parliamentarian, DB database.MongoDB) { bioURL := "http://www2.camara.leg.br/deputados/pesquisa/layouts_deputados_biografia?pk=" + id if parser.IsCached(bioURL) { parser.Log.Info("SaveDeputiesAbout(%s) Cached", id) return } source := models.Source{ Url: bioURL, Note: "Pesquisa Câmara", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(bioURL); e != nil { parser.Log.Critical(e.Error()) } bio := doc.Find("#bioDeputado .bioOutros") biographyItems := make([]string, 0) bio.Each(func(i int, s *goquery.Selection) { title := s.Find(".bioOutrosTitulo").Text() if title != "" { title = strings.TrimSpace(title) title = strings.Replace(title, ":", "", -1) body := s.Find(".bioOutrosTexto").Text() biographyItems = append(biographyItems, title) biographyItems = append(biographyItems, body) biographyItems = append(biographyItems, "") } }) bioDetails := doc.Find("#bioDeputado .bioDetalhes strong") birthdateA := strings.Split(bioDetails.Eq(1).Text(), "/") var year int switch id { case "123756", "160635": year = 1970 case "74230", "129618": year = 1952 case "74665", "141387": year = 1953 case "73933": year = 1959 case "73786": year = 1939 case "74124": year = 1964 case "74447": year = 1936 case "74474": year = 1940 default: parser.Log.Debug("(%s) %s", id, birthdateA) if len(birthdateA) != 3 { parser.Log.Debug("Error, deputies without year %s", bioURL) year = 0 } else { year, _ = strconv.Atoi(birthdateA[2]) } } birthDate := popolo.Date{} if len(birthdateA) > 1 { month, _ := strconv.Atoi(birthdateA[1]) day, _ := strconv.Atoi(birthdateA[0]) loc, _ := time.LoadLocation("America/Sao_Paulo") birthDate = popolo.Date{time.Date(year, time.Month(month), day, 0, 0, 0, 0, loc)} } _, err := DB.Upsert(bson.M{"id": d.Id}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "summary": bio.Eq(0).Find(".bioOutrosTexto").Text(), "biography": strings.Join(biographyItems, "\n"), "link": "http://www.camara.gov.br/internet/Deputado/dep_Detalhe.asp?id=" + id, "birthdate": birthDate, }, "$addToSet": bson.M{ "sources": source, }, }, models.Parliamentarian{}) parser.CheckError(err) parser.CacheURL(bioURL) }
func (self SaveSenatorsFromIndex) Run(DB database.MongoDB) { indexURL := "http://www.senado.gov.br" if parser.IsCached(indexURL) { parser.Log.Info("SaveSenatorsFromIndex Cached") return } defer parser.DeferedCache(indexURL) source := models.Source{ Url: indexURL, Note: "senado.gov.br website", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(indexURL + "/senadores/"); e != nil { parser.Log.Critical(e.Error()) } doc.Find("#senadores tbody tr").Each(func(i int, s *goquery.Selection) { data := s.Find("td") name := data.Eq(0).Text() link, okLink := data.Eq(0).Find("a").Attr("href") if !okLink { parser.CheckError(errors.New("link not found")) } else { link = indexURL + link } email, okEmail := data.Eq(6).Find("a").Attr("href") if !okEmail { email = "" } else { email = strings.Replace(email, "mailto:", "", -1) } partyId := models.MakeUri(data.Eq(1).Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "classification": "party", }, }, &models.Party{}) parliamenrianId := models.MakeUri(name) q := bson.M{ "id": parliamenrianId, } re := regexp.MustCompile("paginst/senador(.+)a.asp") senatorId := re.FindStringSubmatch(link)[1] _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$addToSet": bson.M{ "sources": source, "contactdetails": bson.M{ "$each": []models.ContactDetail{ { Label: "Telefone", Type: "phone", Value: data.Eq(4).Text(), Sources: []models.Source{source}, }, { Label: "Fax", Type: "fax", Value: data.Eq(5).Text(), Sources: []models.Source{source}, }, }, }, "identifiers": bson.M{ "$each": []models.Identifier{ {Identifier: senatorId, Scheme: "CodSenador"}, }, }, }, "$set": bson.M{ "name": name, "email": email, "link": link, "shortname": models.MakeUri(name), }, }, models.Parliamentarian{}) parser.CheckError(err) docDetails, e := goquery.NewDocument(link) if e != nil { parser.Log.Critical(e.Error()) } info := docDetails.Find(".dadosSenador b") birthdateA := strings.Split(info.Eq(1).Text(), "/") year, _ := strconv.Atoi(birthdateA[2]) month, _ := strconv.Atoi(birthdateA[1]) day, _ := strconv.Atoi(birthdateA[0]) loc, _ := time.LoadLocation("America/Sao_Paulo") birthDate := popolo.Date{time.Date(year, time.Month(month), day, 0, 0, 0, 0, loc)} _, err = DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "birthdate": birthDate, }, "$addToSet": bson.M{ "sources": source, "othernames": models.OtherNames{ Name: info.Eq(0).Text(), Note: "Nome de nascimento", }, "contactdetails": models.ContactDetail{ Label: "Gabinete", Type: "address", Value: info.Eq(4).Text(), Sources: []models.Source{source}, }, }, }, models.Parliamentarian{}) parser.CreateMembermeship(DB, models.Rel{ Id: parliamenrianId, Link: parser.LinkTo("parliamenrians", parliamenrianId), }, models.Rel{ Id: partyId, Link: parser.LinkTo("parties", partyId), }, source, "Filiado", "Partido") parser.CheckError(err) }) }
func (_ StationBot) Run(db database.MongoDB) { doc, err := goquery.NewDocument("http://www.metro.sp.gov.br/app/trajeto/xt/estacoesTipoXML.asp") parser.CheckError(err) doc.Find("estacao").Each(func(_ int, s *goquery.Selection) { id, _ := s.Attr("estacaoid") name, _ := s.Attr("nome") lineId, _ := s.Attr("linhaid") _lineName, _ := s.Attr("linha") typeId, _ := s.Attr("tipoid") typeName, _ := s.Attr("tipo") if typeId == "3" { return } lineName := "Linha " + strings.Split(_lineName, " ")[0] uri := models.MakeUri(lineName) cannonicaluri := uri names := strings.Split(lineName, "-") if len(names) == 3 { cannonicaluri = models.MakeUri(strings.Replace(lineName, names[2], "", -1)) } lineQ := bson.M{"id": uri} _, err := db.Upsert(lineQ, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": lineName, "cannonicaluri": cannonicaluri, "metroid": lineId, "type": bson.M{ "id": typeId, "name": typeName, }, }, }, models.Line{}) parser.CheckError(err) q := bson.M{"id": models.MakeUri(name)} _, err = db.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "metroid": id, "name": name, }, }, models.Station{}) parser.CheckError(err) log.Println(id, name, lineId, lineName, typeName) }) }
func getQuotaPage(id, url string, DB database.MongoDB) { if parser.IsCached(url) { return } defer parser.DeferedCache(url) <-time.After(2 * time.Second) doc, err := goquery.NewDocument(url) if err != nil { parser.Log.Error(err.Error(), url) return } var p models.Parliamentarian DB.FindOne(bson.M{ "id": id, }, &p) doc.Find(".espacoPadraoInferior2 tr:not(.celulasCentralizadas)").Each(func(i int, s *goquery.Selection) { data := s.Find("td") cnpj := data.Eq(0).Text() if cnpj == "TOTAL" { return } suplier := data.Eq(1).Text() orderN := strings.TrimSpace(data.Eq(2).Text()) companyUri := models.MakeUri(suplier) if cnpj == "" { cnpj = companyUri } _, err := DB.Upsert(bson.M{"id": cnpj}, bson.M{ "$set": bson.M{ "name": suplier, "uri": companyUri, }, }, models.Company{}) parser.CheckError(err) switch len(data.Nodes) { case 4: // value := data.Eq(3).Text() // log.Println("normal:", cnpj, "|", suplier, "|", orderN, value) // log.Println("skip") case 7: sendedAt, _ := time.Parse("2006-01-02", strings.Split(data.Eq(3).Text(), " ")[0]) value := strings.Replace(data.Eq(6).Text(), "R$", "", -1) value = strings.Replace(value, ".", "", -1) value = strings.Replace(value, "-", "", -1) value = strings.TrimSpace(strings.Replace(value, ",", ".", -1)) valueF, _ := strconv.ParseFloat(value, 64) parser.Log.Debug(orderN) orderNS := strings.Split(orderN, ":") var ticket string if len(orderNS) == 1 { ticket = strings.TrimSpace(orderNS[0]) } else { ticket = strings.TrimSpace(orderNS[1]) } _, err = DB.Upsert(bson.M{"order": orderN, "parliamentarian": p.Id}, bson.M{ "$set": bson.M{ "company": cnpj, "date": sendedAt, "passenger_name": data.Eq(4).Text(), "route": data.Eq(5).Text(), "value": valueF, "ticket": ticket, }, }, models.Quota{}) parser.CheckError(err) default: panic(data.Text()) } }) }
func (p SaveDeputiesFromXML) Run(DB database.MongoDB) { xmlURL := "http://www.camara.gov.br/SitCamaraWS/Deputados.asmx/ObterDeputados" if parser.IsCached(xmlURL) { parser.Log.Info("SaveDeputiesFromXML Cached") return } defer parser.DeferedCache(xmlURL) source := models.Source{ Url: xmlURL, Note: "Câmara API", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(xmlURL); e != nil { parser.Log.Critical(e.Error()) } doc.Find("deputado").Each(func(i int, s *goquery.Selection) { name := parser.Titlelize(s.Find("nomeparlamentar").First().Text()) parser.Log.Info("Saving " + name) partyId := models.MakeUri(s.Find("partido").First().Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "classification": "party", }, }, &models.Party{}) parliamenrianId := models.MakeUri(name) q := bson.M{ "id": parliamenrianId, } fullName := strings.Split(parser.Titlelize(s.Find("nome").First().Text()), " ") _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": &name, "sortname": &name, "id": models.MakeUri(name), "gender": s.Find("sexo").First().Text(), "image": s.Find("urlFoto").First().Text(), "email": s.Find("email").First().Text(), }, "$addToSet": bson.M{ "sources": source, "identifiers": bson.M{ "$each": []models.Identifier{ {Identifier: s.Find("idParlamentar").First().Text(), Scheme: "idParlamentar"}, {Identifier: s.Find("ideCadastro").First().Text(), Scheme: "ideCadastro"}, }, }, "othernames": models.OtherNames{ Name: parser.Titlelize(s.Find("nome").First().Text()), FamilyName: fullName[len(fullName)-1:][0], GivenName: fullName[0], Note: "Nome de nascimento", }, "contactdetails": bson.M{ "$each": []models.ContactDetail{ { Label: "Telefone", Type: "phone", Value: s.Find("fone").First().Text(), Sources: []models.Source{source}, }, { Label: "Gabinete", Type: "address", Value: s.Find("gabinete").First().Text() + ", Anexo " + s.Find("anexo").First().Text(), Sources: []models.Source{source}, }, }, }, }, }, &models.Parliamentarian{}) parser.CreateMembermeship(DB, models.Rel{ Id: parliamenrianId, Link: parser.LinkTo("parliamenrians", parliamenrianId), }, models.Rel{ Id: partyId, Link: parser.LinkTo("parties", partyId), }, source, "Filiado", "Partido") parser.CheckError(err) }) }