func (self SaveSenatorsFromIndex) Run(DB database.MongoDB) { indexURL := "http://www.senado.gov.br" if parser.IsCached(indexURL) { parser.Log.Info("SaveSenatorsFromIndex Cached") return } defer parser.DeferedCache(indexURL) source := models.Source{ Url: indexURL, Note: "senado.gov.br website", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(indexURL + "/senadores/"); e != nil { parser.Log.Critical(e.Error()) } doc.Find("#senadores tbody tr").Each(func(i int, s *goquery.Selection) { data := s.Find("td") name := data.Eq(0).Text() link, okLink := data.Eq(0).Find("a").Attr("href") if !okLink { parser.CheckError(errors.New("link not found")) } else { link = indexURL + link } email, okEmail := data.Eq(6).Find("a").Attr("href") if !okEmail { email = "" } else { email = strings.Replace(email, "mailto:", "", -1) } partyId := models.MakeUri(data.Eq(1).Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "classification": "party", }, }, &models.Party{}) parliamenrianId := models.MakeUri(name) q := bson.M{ "id": parliamenrianId, } re := regexp.MustCompile("paginst/senador(.+)a.asp") senatorId := re.FindStringSubmatch(link)[1] _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$addToSet": bson.M{ "sources": source, "contactdetails": bson.M{ "$each": []models.ContactDetail{ { Label: "Telefone", Type: "phone", Value: data.Eq(4).Text(), Sources: []models.Source{source}, }, { Label: "Fax", Type: "fax", Value: data.Eq(5).Text(), Sources: []models.Source{source}, }, }, }, "identifiers": bson.M{ "$each": []models.Identifier{ {Identifier: senatorId, Scheme: "CodSenador"}, }, }, }, "$set": bson.M{ "name": name, "email": email, "link": link, "shortname": models.MakeUri(name), }, }, models.Parliamentarian{}) parser.CheckError(err) docDetails, e := goquery.NewDocument(link) if e != nil { parser.Log.Critical(e.Error()) } info := docDetails.Find(".dadosSenador b") birthdateA := strings.Split(info.Eq(1).Text(), "/") year, _ := strconv.Atoi(birthdateA[2]) month, _ := strconv.Atoi(birthdateA[1]) day, _ := strconv.Atoi(birthdateA[0]) loc, _ := time.LoadLocation("America/Sao_Paulo") birthDate := popolo.Date{time.Date(year, time.Month(month), day, 0, 0, 0, 0, loc)} _, err = DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "birthdate": birthDate, }, "$addToSet": bson.M{ "sources": source, "othernames": models.OtherNames{ Name: info.Eq(0).Text(), Note: "Nome de nascimento", }, "contactdetails": models.ContactDetail{ Label: "Gabinete", Type: "address", Value: info.Eq(4).Text(), Sources: []models.Source{source}, }, }, }, models.Parliamentarian{}) parser.CreateMembermeship(DB, models.Rel{ Id: parliamenrianId, Link: parser.LinkTo("parliamenrians", parliamenrianId), }, models.Rel{ Id: partyId, Link: parser.LinkTo("parties", partyId), }, source, "Filiado", "Partido") parser.CheckError(err) }) }
func (p SaveDeputiesFromXML) Run(DB database.MongoDB) { xmlURL := "http://www.camara.gov.br/SitCamaraWS/Deputados.asmx/ObterDeputados" if parser.IsCached(xmlURL) { parser.Log.Info("SaveDeputiesFromXML Cached") return } defer parser.DeferedCache(xmlURL) source := models.Source{ Url: xmlURL, Note: "Câmara API", } var doc *goquery.Document var e error if doc, e = goquery.NewDocument(xmlURL); e != nil { parser.Log.Critical(e.Error()) } doc.Find("deputado").Each(func(i int, s *goquery.Selection) { name := parser.Titlelize(s.Find("nomeparlamentar").First().Text()) parser.Log.Info("Saving " + name) partyId := models.MakeUri(s.Find("partido").First().Text()) DB.Upsert(bson.M{"id": partyId}, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "id": partyId, "classification": "party", }, }, &models.Party{}) parliamenrianId := models.MakeUri(name) q := bson.M{ "id": parliamenrianId, } fullName := strings.Split(parser.Titlelize(s.Find("nome").First().Text()), " ") _, err := DB.Upsert(q, bson.M{ "$setOnInsert": bson.M{ "createdat": time.Now(), }, "$currentDate": bson.M{ "updatedat": true, }, "$set": bson.M{ "name": &name, "sortname": &name, "id": models.MakeUri(name), "gender": s.Find("sexo").First().Text(), "image": s.Find("urlFoto").First().Text(), "email": s.Find("email").First().Text(), }, "$addToSet": bson.M{ "sources": source, "identifiers": bson.M{ "$each": []models.Identifier{ {Identifier: s.Find("idParlamentar").First().Text(), Scheme: "idParlamentar"}, {Identifier: s.Find("ideCadastro").First().Text(), Scheme: "ideCadastro"}, }, }, "othernames": models.OtherNames{ Name: parser.Titlelize(s.Find("nome").First().Text()), FamilyName: fullName[len(fullName)-1:][0], GivenName: fullName[0], Note: "Nome de nascimento", }, "contactdetails": bson.M{ "$each": []models.ContactDetail{ { Label: "Telefone", Type: "phone", Value: s.Find("fone").First().Text(), Sources: []models.Source{source}, }, { Label: "Gabinete", Type: "address", Value: s.Find("gabinete").First().Text() + ", Anexo " + s.Find("anexo").First().Text(), Sources: []models.Source{source}, }, }, }, }, }, &models.Parliamentarian{}) parser.CreateMembermeship(DB, models.Rel{ Id: parliamenrianId, Link: parser.LinkTo("parliamenrians", parliamenrianId), }, models.Rel{ Id: partyId, Link: parser.LinkTo("parties", partyId), }, source, "Filiado", "Partido") parser.CheckError(err) }) }