func getData(g string, db database.MongoDB) { url := "http://www.apolo11.com/reservatorios.php?step=" + g doc, err := goquery.NewDocument(url) parser.CheckError(err) doc.Find("body > center:nth-child(1) > table > tbody > tr > td:nth-child(1) > b > table").Each(func(_ int, s *goquery.Selection) { trs := s.Find("tr") title := "Sistema " + parser.Titlelize(strings.Replace(trs.Eq(0).Text(), "SISTEMA", "", -1)) uri := models.MakeUri(title) getInfo := func(i int, ss *goquery.Selection) string { return ss.Text() } percent := trs.Eq(1).Find("font").Map(getInfo) date := trs.Eq(2).Find("font").Map(getInfo) data := make([]bson.M, 0) for i, _ := range percent { if strings.TrimSpace(date[i]) != "" && strings.TrimSpace(date[i]) != "/" { data = append(data, bson.M{"percent": percent[i], "date": date[i]}) } } query := bson.M{"uri": uri, "granularity_letter": g} source := models.Source{ Url: "http://www.apolo11.com", Note: "Apolo11", } _, err := db.Upsert(query, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "uri": uri, "name": title, "granularity_letter": g, "granularity": getGranularity(g), "data": data, "source": []models.Source{source}, }, }, models.Reservoir{}) parser.CheckError(err) log.Println(uri, title, data) }) }
func (p SaveDeputiesFromSearch) Run(DB database.MongoDB) { searchURL := "http://www2.camara.leg.br/deputados/pesquisa" if parser.IsCached(searchURL) { parser.Log.Info("SaveDeputiesFromSearch Cached") return } defer parser.DeferedCache(searchURL) var doc *goquery.Document var e error if doc, e = goquery.NewDocument(searchURL); e != nil { parser.Log.Critical(e.Error()) } source := models.Source{ Url: searchURL, Note: "Pesquisa Câmara", } doc.Find("#deputado option").Each(func(i int, s *goquery.Selection) { value, _ := s.Attr("value") if value != "" { info := regexp.MustCompile("=|%23|!|\\||\\?").Split(value, -1) name := parser.Titlelize(info[0]) q := bson.M{ "id": models.MakeUri(name), } _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$addToSet": bson.M{ "sources": source, "identifiers": models.Identifier{ Identifier: info[2], Scheme: "nMatricula", }, }, }, models.Parliamentarian{}) parser.CheckError(err) } }) }
func (_ StatusBot) Run(db database.MongoDB) { // Metro SP doc, err := goquery.NewDocument("http://www.metro.sp.gov.br/Sistemas/direto-do-metro-via4/diretodoMetroHome.aspx") parser.CheckError(err) metroSource := models.Source{ Url: "http://www.metro.sp.gov.br/Sistemas/direto-do-metro-via4/diretodoMetroHome.aspx", } doc.Find("ul li").Each(func(_ int, s *goquery.Selection) { lineName := strings.TrimSpace(s.Find(".nomeDaLinha").Text()) status := strings.TrimSpace(s.Find(".statusDaLinha").Text()) saveStatus(db, lineName, status, metroSource) }) // CPTM doc, err = goquery.NewDocument("http://www.cptm.sp.gov.br/Pages/atendimento.aspx") parser.CheckError(err) cptmSource := models.Source{ Url: "http://www.cptm.sp.gov.br/Central-Relacionamento/situacao-linhas.asp", } lines := map[string]string{ "rubi": "7", "diamante": "8", "esmeralda": "9", "turquesa": "10", "coral": "11", "safira": "12", } doc.Find("#atendimento_consumidor .situacao_linhas .col-md-10 div.col-md-2").Each(func(_ int, s *goquery.Selection) { class, _ := s.Attr("class") uri := strings.TrimSpace(strings.Replace(class, "col-md-2", "", -1)) name := s.Find(".nome_linha") lineNumber := lines[uri] status := s.Find("[data-toggle='tooltip']").Text() lineName := "Linha " + lineNumber + " - " + parser.Titlelize(strings.TrimSpace(name.Text())) saveStatus(db, lineName, status, cptmSource) }) }
func (_ SavePartiesFromTSE) Run(DB database.MongoDB) { url := "http://www.tse.jus.br/partidos/partidos-politicos/registrados-no-tse" if parser.IsCached(url) { parser.Log.Info("SavePartiesFromTSE Cached") return } defer parser.DeferedCache(url) source := models.Source{ Url: url, Note: "Tribunal Superior Eleitoral", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(url); e != nil { parser.Log.Critical(e.Error()) } const ( IDX = iota SIGLA_IDX NAME_IDX DEFERIMENTO_IDX PRESIDENT_IDX N_IDX ) doc.Find("#textoConteudo table tr").Each(func(i int, s *goquery.Selection) { if s.Find(".titulo_tabela").Length() < 6 && s.Find("td").Length() > 1 { info := s.Find("td") parser.Log.Info("%s - %s - %s - %s - %s - %s", info.Eq(IDX).Text(), info.Eq(SIGLA_IDX).Text(), info.Eq(NAME_IDX).Text(), info.Eq(DEFERIMENTO_IDX).Text(), info.Eq(PRESIDENT_IDX).Text(), info.Eq(N_IDX).Text(), ) partyId := models.MakeUri(info.Eq(SIGLA_IDX).Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "name": parser.Titlelize(info.Eq(NAME_IDX).Text()), "othernames": []bson.M{{ "name": info.Eq(SIGLA_IDX).Text(), }}, "classification": "party", }, "$addToSet": bson.M{ "sources": []models.Source{source}, }, }, &models.Party{}) urlDetails, b := info.Eq(SIGLA_IDX).Find("a").Attr("href") if b { docDetails, err := goquery.NewDocument(urlDetails) if err != nil { parser.Log.Critical(err.Error()) } sourceDetails := models.Source{ Url: urlDetails, Note: "Tribunal Superior Eleitoral", } contactdetails := make([]bson.M, 0) details := docDetails.Find("#ancora-text-um p") address := strings.Split(details.Eq(3).Text(), ":")[1] contactdetails = append(contactdetails, bson.M{ "label": "Endereço", "type": "address", "value": address, "sources": []models.Source{sourceDetails}, }) contactdetails = append(contactdetails, bson.M{ "label": "CEP", "type": "zipcode", "value": findZipcode(0, details), "sources": []models.Source{sourceDetails}, }) phoneString := strings.Split(details.Eq(5).Text(), ":")[1] phone := strings.Split(phoneString, "/")[0] contactdetails = append(contactdetails, bson.M{ "label": "Telefone", "type": "phone", "value": phone, "sources": []models.Source{sourceDetails}, }) faxString := strings.Split(details.Eq(6).Text(), ":")[1] fax := strings.Split(faxString, "/")[0] contactdetails = append(contactdetails, bson.M{ "label": "Fax", "type": "fax", "value": fax, "sources": []models.Source{sourceDetails}, }) website, ok := details.Eq(7).Find("a").Attr("href") if ok { contactdetails = append(contactdetails, bson.M{ "label": "Site", "type": "website", "value": website, "sources": []models.Source{sourceDetails}, }) } details.Eq(8).Find("a").Each(func(i int, ss *goquery.Selection) { email, ok := ss.Attr("href") if !ok { return } contactdetails = append(contactdetails, bson.M{ "label": "Email", "type": "email", "value": email, "sources": []models.Source{sourceDetails}, }) }) data := bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "contactdetails": contactdetails, }, } DB.Upsert(bson.M{"id": partyId}, data, models.Party{}) } } }) }
func (p SaveDeputiesFromXML) Run(DB database.MongoDB) { xmlURL := "http://www.camara.gov.br/SitCamaraWS/Deputados.asmx/ObterDeputados" if parser.IsCached(xmlURL) { parser.Log.Info("SaveDeputiesFromXML Cached") return } defer parser.DeferedCache(xmlURL) source := models.Source{ Url: xmlURL, Note: "Câmara API", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(xmlURL); e != nil { parser.Log.Critical(e.Error()) } doc.Find("deputado").Each(func(i int, s *goquery.Selection) { name := parser.Titlelize(s.Find("nomeparlamentar").First().Text()) parser.Log.Info("Saving " + name) partyId := models.MakeUri(s.Find("partido").First().Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "classification": "party", }, }, &models.Party{}) parliamenrianId := models.MakeUri(name) q := bson.M{ "id": parliamenrianId, } fullName := strings.Split(parser.Titlelize(s.Find("nome").First().Text()), " ") _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": &name, "sortname": &name, "id": models.MakeUri(name), "gender": s.Find("sexo").First().Text(), "image": s.Find("urlFoto").First().Text(), "email": s.Find("email").First().Text(), }, "$addToSet": bson.M{ "sources": source, "identifiers": bson.M{ "$each": []models.Identifier{ {Identifier: s.Find("idParlamentar").First().Text(), Scheme: "idParlamentar"}, {Identifier: s.Find("ideCadastro").First().Text(), Scheme: "ideCadastro"}, }, }, "othernames": models.OtherNames{ Name: parser.Titlelize(s.Find("nome").First().Text()), FamilyName: fullName[len(fullName)-1:][0], GivenName: fullName[0], Note: "Nome de nascimento", }, "contactdetails": bson.M{ "$each": []models.ContactDetail{ { Label: "Telefone", Type: "phone", Value: s.Find("fone").First().Text(), Sources: []models.Source{source}, }, { Label: "Gabinete", Type: "address", Value: s.Find("gabinete").First().Text() + ", Anexo " + s.Find("anexo").First().Text(), Sources: []models.Source{source}, }, }, }, }, }, &models.Parliamentarian{}) parser.CreateMembermeship(DB, models.Rel{ Id: parliamenrianId, Link: parser.LinkTo("parliamenrians", parliamenrianId), }, models.Rel{ Id: partyId, Link: parser.LinkTo("parties", partyId), }, source, "Filiado", "Partido") parser.CheckError(err) }) }