Ejemplo n.º 1
0
func getPages(url, id string, DB database.MongoDB) {
	if parser.IsCached(url) {
		return
	}
	defer parser.DeferedCache(url)

	doc, err := goquery.NewDocument(url)
	if err != nil {
		parser.Log.Critical("Problems %s", url)
		return
	}

	_id := strings.Split(url, "nuDeputadoId=")

	doc.Find("#mesAno option").Each(func(_ int, s *goquery.Selection) {
		monthYear, ok := s.Attr("value")
		if ok {
			dateData := strings.Split(monthYear, "-")
			if dateData[1] == "2014" {
				fullQuotasUrl := QUOATAANALITICOURL
				fullQuotasUrl = strings.Replace(fullQuotasUrl, "MONTH", dateData[0], -1)
				fullQuotasUrl = strings.Replace(fullQuotasUrl, "YEAR", dateData[1], -1)
				fullQuotasUrl = strings.Replace(fullQuotasUrl, "ID", _id[1], -1)
				getQuotaPage(id, fullQuotasUrl, DB)
			}
		}
	})
}
func (p SaveDeputiesFromTransparenciaBrasil) Run(DB database.MongoDB) {
	source := models.Source{
		Url:  "http://dev.transparencia.org.br/",
		Note: "Transparencia Brasil",
	}

	if parser.IsCached("http://dev.transparencia.org.br/") {
		parser.Log.Info("SaveDeputiesFromTransparenciaBrasil Cached")
		return
	}
	defer parser.DeferedCache("http://dev.transparencia.org.br/")

	parser.Log.Info("Starting SaveDeputiesFromTransparenciaBrasil")

	c := transparencia.New("kqOfbdNKSlpf")
	query := map[string]string{
		"casa": "1",
	}
	parliamenrians, err := c.Excelencias(query)
	parser.CheckError(err)

	for _, parliamenrian := range parliamenrians {
		uri := models.MakeUri(parliamenrian.Apelido)
		parser.Log.Info("Saving %s", parliamenrian.Nome)

		_, err := DB.Upsert(bson.M{"id": uri}, bson.M{
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$set": bson.M{
				"summary":          parliamenrian.MiniBio,
				"nationalidentify": parliamenrian.CPF,
			},
			"$addToSet": bson.M{
				"sources": source,
				"identifiers": bson.M{
					"$each": []bson.M{
						{
							"identifier": parliamenrian.Id,
							"scheme":     "TransparenciaBrasilID",
						},
						{
							"identifier": parliamenrian.CPF,
							"scheme":     "CPF",
						},
					},
				},
			},
		}, models.Parliamentarian{})
		parser.CheckError(err)
	}
}
func (p SaveDeputiesFromSearch) Run(DB database.MongoDB) {
	searchURL := "http://www2.camara.leg.br/deputados/pesquisa"

	if parser.IsCached(searchURL) {
		parser.Log.Info("SaveDeputiesFromSearch Cached")
		return
	}
	defer parser.DeferedCache(searchURL)

	var doc *goquery.Document
	var e error

	if doc, e = goquery.NewDocument(searchURL); e != nil {
		parser.Log.Critical(e.Error())
	}

	source := models.Source{
		Url:  searchURL,
		Note: "Pesquisa Câmara",
	}

	doc.Find("#deputado option").Each(func(i int, s *goquery.Selection) {
		value, _ := s.Attr("value")
		if value != "" {
			info := regexp.MustCompile("=|%23|!|\\||\\?").Split(value, -1)

			name := parser.Titlelize(info[0])
			q := bson.M{
				"id": models.MakeUri(name),
			}

			_, err := DB.Upsert(q, bson.M{
				"$setOnInsert": bson.M{
					"createdat": time.Now(),
				},
				"$currentDate": bson.M{
					"updatedat": true,
				},
				"$addToSet": bson.M{
					"sources": source,
					"identifiers": models.Identifier{
						Identifier: info[2], Scheme: "nMatricula",
					},
				},
			}, models.Parliamentarian{})
			parser.CheckError(err)
		}
	})
}
Ejemplo n.º 4
0
func (p SaveDeputiesQuotas) Run(DB database.MongoDB) {
	url := "http://www.camara.gov.br/cota-parlamentar/pg-cota-lista-deputados.jsp"

	if parser.IsCached(url) {
		return
	}
	defer parser.DeferedCache(url)

	doc, err := goquery.NewDocument(url)
	if err != nil {
		panic(err)
		return
	}

	doc.Find("#content ul li a").Each(func(_ int, s *goquery.Selection) {
		url, _ := s.Attr("href")
		name_party := strings.Split(s.Text(), "-")
		name := strings.TrimSpace(name_party[0])
		id := models.MakeUri(name)
		if !strings.Contains(id, "lideranca") {
			getPages(CAMARABASEURL+url, id, DB)
		}
	})
}
func (self SaveSenatorsFromIndex) Run(DB database.MongoDB) {
	indexURL := "http://www.senado.gov.br"

	if parser.IsCached(indexURL) {
		parser.Log.Info("SaveSenatorsFromIndex Cached")
		return
	}
	defer parser.DeferedCache(indexURL)

	source := models.Source{
		Url:  indexURL,
		Note: "senado.gov.br website",
	}

	var doc *goquery.Document
	var e error

	if doc, e = goquery.NewDocument(indexURL + "/senadores/"); e != nil {
		parser.Log.Critical(e.Error())
	}

	doc.Find("#senadores tbody tr").Each(func(i int, s *goquery.Selection) {
		data := s.Find("td")
		name := data.Eq(0).Text()
		link, okLink := data.Eq(0).Find("a").Attr("href")
		if !okLink {
			parser.CheckError(errors.New("link not found"))
		} else {
			link = indexURL + link
		}

		email, okEmail := data.Eq(6).Find("a").Attr("href")
		if !okEmail {
			email = ""
		} else {
			email = strings.Replace(email, "mailto:", "", -1)
		}

		partyId := models.MakeUri(data.Eq(1).Text())
		DB.Upsert(bson.M{"id": partyId}, bson.M{
			"$setOnInsert": bson.M{
				"createdat": time.Now(),
			},
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$set": bson.M{
				"id":             partyId,
				"classification": "party",
			},
		}, &models.Party{})

		parliamenrianId := models.MakeUri(name)
		q := bson.M{
			"id": parliamenrianId,
		}

		re := regexp.MustCompile("paginst/senador(.+)a.asp")
		senatorId := re.FindStringSubmatch(link)[1]

		_, err := DB.Upsert(q, bson.M{
			"$setOnInsert": bson.M{
				"createdat": time.Now(),
			},
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$addToSet": bson.M{
				"sources": source,
				"contactdetails": bson.M{
					"$each": []models.ContactDetail{
						{
							Label:   "Telefone",
							Type:    "phone",
							Value:   data.Eq(4).Text(),
							Sources: []models.Source{source},
						},
						{
							Label:   "Fax",
							Type:    "fax",
							Value:   data.Eq(5).Text(),
							Sources: []models.Source{source},
						},
					},
				},
				"identifiers": bson.M{
					"$each": []models.Identifier{
						{Identifier: senatorId, Scheme: "CodSenador"},
					},
				},
			},
			"$set": bson.M{
				"name":      name,
				"email":     email,
				"link":      link,
				"shortname": models.MakeUri(name),
			},
		}, models.Parliamentarian{})
		parser.CheckError(err)

		docDetails, e := goquery.NewDocument(link)
		if e != nil {
			parser.Log.Critical(e.Error())
		}
		info := docDetails.Find(".dadosSenador b")
		birthdateA := strings.Split(info.Eq(1).Text(), "/")
		year, _ := strconv.Atoi(birthdateA[2])
		month, _ := strconv.Atoi(birthdateA[1])
		day, _ := strconv.Atoi(birthdateA[0])
		loc, _ := time.LoadLocation("America/Sao_Paulo")
		birthDate := popolo.Date{time.Date(year, time.Month(month), day, 0, 0, 0, 0, loc)}

		_, err = DB.Upsert(q, bson.M{
			"$setOnInsert": bson.M{
				"createdat": time.Now(),
			},
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$set": bson.M{
				"birthdate": birthDate,
			},
			"$addToSet": bson.M{
				"sources": source,
				"othernames": models.OtherNames{
					Name: info.Eq(0).Text(),
					Note: "Nome de nascimento",
				},
				"contactdetails": models.ContactDetail{
					Label:   "Gabinete",
					Type:    "address",
					Value:   info.Eq(4).Text(),
					Sources: []models.Source{source},
				},
			},
		}, models.Parliamentarian{})

		parser.CreateMembermeship(DB, models.Rel{
			Id:   parliamenrianId,
			Link: parser.LinkTo("parliamenrians", parliamenrianId),
		}, models.Rel{
			Id:   partyId,
			Link: parser.LinkTo("parties", partyId),
		}, source, "Filiado", "Partido")

		parser.CheckError(err)
	})
}
Ejemplo n.º 6
0
func (_ SavePartiesFromTSE) Run(DB database.MongoDB) {
	url := "http://www.tse.jus.br/partidos/partidos-politicos/registrados-no-tse"

	if parser.IsCached(url) {
		parser.Log.Info("SavePartiesFromTSE Cached")
		return
	}
	defer parser.DeferedCache(url)

	source := models.Source{
		Url:  url,
		Note: "Tribunal Superior Eleitoral",
	}

	var doc *goquery.Document
	var e error

	if doc, e = goquery.NewDocument(url); e != nil {
		parser.Log.Critical(e.Error())
	}

	const (
		IDX = iota
		SIGLA_IDX
		NAME_IDX
		DEFERIMENTO_IDX
		PRESIDENT_IDX
		N_IDX
	)

	doc.Find("#textoConteudo table tr").Each(func(i int, s *goquery.Selection) {
		if s.Find(".titulo_tabela").Length() < 6 && s.Find("td").Length() > 1 {
			info := s.Find("td")
			parser.Log.Info("%s - %s - %s - %s - %s - %s",
				info.Eq(IDX).Text(),
				info.Eq(SIGLA_IDX).Text(),
				info.Eq(NAME_IDX).Text(),
				info.Eq(DEFERIMENTO_IDX).Text(),
				info.Eq(PRESIDENT_IDX).Text(),
				info.Eq(N_IDX).Text(),
			)

			partyId := models.MakeUri(info.Eq(SIGLA_IDX).Text())
			DB.Upsert(bson.M{"id": partyId}, bson.M{
				"$setOnInsert": bson.M{
					"createdat": time.Now(),
				},
				"$currentDate": bson.M{
					"updatedat": true,
				},
				"$set": bson.M{
					"id":   partyId,
					"name": parser.Titlelize(info.Eq(NAME_IDX).Text()),
					"othernames": []bson.M{{
						"name": info.Eq(SIGLA_IDX).Text(),
					}},
					"classification": "party",
				},
				"$addToSet": bson.M{
					"sources": []models.Source{source},
				},
			}, &models.Party{})

			urlDetails, b := info.Eq(SIGLA_IDX).Find("a").Attr("href")
			if b {
				docDetails, err := goquery.NewDocument(urlDetails)
				if err != nil {
					parser.Log.Critical(err.Error())
				}

				sourceDetails := models.Source{
					Url:  urlDetails,
					Note: "Tribunal Superior Eleitoral",
				}

				contactdetails := make([]bson.M, 0)

				details := docDetails.Find("#ancora-text-um p")

				address := strings.Split(details.Eq(3).Text(), ":")[1]
				contactdetails = append(contactdetails, bson.M{
					"label":   "Endereço",
					"type":    "address",
					"value":   address,
					"sources": []models.Source{sourceDetails},
				})

				contactdetails = append(contactdetails, bson.M{
					"label":   "CEP",
					"type":    "zipcode",
					"value":   findZipcode(0, details),
					"sources": []models.Source{sourceDetails},
				})

				phoneString := strings.Split(details.Eq(5).Text(), ":")[1]
				phone := strings.Split(phoneString, "/")[0]
				contactdetails = append(contactdetails, bson.M{
					"label":   "Telefone",
					"type":    "phone",
					"value":   phone,
					"sources": []models.Source{sourceDetails},
				})

				faxString := strings.Split(details.Eq(6).Text(), ":")[1]
				fax := strings.Split(faxString, "/")[0]
				contactdetails = append(contactdetails, bson.M{
					"label":   "Fax",
					"type":    "fax",
					"value":   fax,
					"sources": []models.Source{sourceDetails},
				})

				website, ok := details.Eq(7).Find("a").Attr("href")
				if ok {
					contactdetails = append(contactdetails, bson.M{
						"label":   "Site",
						"type":    "website",
						"value":   website,
						"sources": []models.Source{sourceDetails},
					})
				}

				details.Eq(8).Find("a").Each(func(i int, ss *goquery.Selection) {
					email, ok := ss.Attr("href")
					if !ok {
						return
					}
					contactdetails = append(contactdetails, bson.M{
						"label":   "Email",
						"type":    "email",
						"value":   email,
						"sources": []models.Source{sourceDetails},
					})
				})

				data := bson.M{
					"$setOnInsert": bson.M{
						"createdat": time.Now(),
					},
					"$currentDate": bson.M{
						"updatedat": true,
					},
					"$set": bson.M{
						"contactdetails": contactdetails,
					},
				}

				DB.Upsert(bson.M{"id": partyId}, data, models.Party{})
			}
		}
	})
}
Ejemplo n.º 7
0
func getQuotaPage(id, url string, DB database.MongoDB) {
	if parser.IsCached(url) {
		return
	}
	defer parser.DeferedCache(url)

	<-time.After(2 * time.Second)
	doc, err := goquery.NewDocument(url)
	if err != nil {
		parser.Log.Error(err.Error(), url)
		return
	}

	var p models.Parliamentarian
	DB.FindOne(bson.M{
		"id": id,
	}, &p)

	doc.Find(".espacoPadraoInferior2 tr:not(.celulasCentralizadas)").Each(func(i int, s *goquery.Selection) {
		data := s.Find("td")
		cnpj := data.Eq(0).Text()

		if cnpj == "TOTAL" {
			return
		}
		suplier := data.Eq(1).Text()
		orderN := strings.TrimSpace(data.Eq(2).Text())
		companyUri := models.MakeUri(suplier)

		if cnpj == "" {
			cnpj = companyUri
		}

		_, err := DB.Upsert(bson.M{"id": cnpj}, bson.M{
			"$set": bson.M{
				"name": suplier,
				"uri":  companyUri,
			},
		}, models.Company{})
		parser.CheckError(err)

		switch len(data.Nodes) {
		case 4:
			// value :=  data.Eq(3).Text()
			// log.Println("normal:", cnpj, "|", suplier, "|", orderN, value)
			// log.Println("skip")
		case 7:
			sendedAt, _ := time.Parse("2006-01-02", strings.Split(data.Eq(3).Text(), " ")[0])
			value := strings.Replace(data.Eq(6).Text(), "R$", "", -1)
			value = strings.Replace(value, ".", "", -1)
			value = strings.Replace(value, "-", "", -1)
			value = strings.TrimSpace(strings.Replace(value, ",", ".", -1))
			valueF, _ := strconv.ParseFloat(value, 64)

			parser.Log.Debug(orderN)

			orderNS := strings.Split(orderN, ":")
			var ticket string
			if len(orderNS) == 1 {
				ticket = strings.TrimSpace(orderNS[0])
			} else {
				ticket = strings.TrimSpace(orderNS[1])
			}

			_, err = DB.Upsert(bson.M{"order": orderN, "parliamentarian": p.Id}, bson.M{
				"$set": bson.M{
					"company":        cnpj,
					"date":           sendedAt,
					"passenger_name": data.Eq(4).Text(),
					"route":          data.Eq(5).Text(),
					"value":          valueF,
					"ticket":         ticket,
				},
			}, models.Quota{})
			parser.CheckError(err)

		default:
			panic(data.Text())
		}
	})
}
func (p SaveDeputiesFromXML) Run(DB database.MongoDB) {
	xmlURL := "http://www.camara.gov.br/SitCamaraWS/Deputados.asmx/ObterDeputados"

	if parser.IsCached(xmlURL) {
		parser.Log.Info("SaveDeputiesFromXML Cached")
		return
	}
	defer parser.DeferedCache(xmlURL)

	source := models.Source{
		Url:  xmlURL,
		Note: "Câmara API",
	}

	var doc *goquery.Document
	var e error

	if doc, e = goquery.NewDocument(xmlURL); e != nil {
		parser.Log.Critical(e.Error())
	}

	doc.Find("deputado").Each(func(i int, s *goquery.Selection) {
		name := parser.Titlelize(s.Find("nomeparlamentar").First().Text())
		parser.Log.Info("Saving " + name)
		partyId := models.MakeUri(s.Find("partido").First().Text())
		DB.Upsert(bson.M{"id": partyId}, bson.M{
			"$setOnInsert": bson.M{
				"createdat": time.Now(),
			},
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$set": bson.M{
				"id":             partyId,
				"classification": "party",
			},
		}, &models.Party{})

		parliamenrianId := models.MakeUri(name)
		q := bson.M{
			"id": parliamenrianId,
		}
		fullName := strings.Split(parser.Titlelize(s.Find("nome").First().Text()), " ")

		_, err := DB.Upsert(q, bson.M{
			"$setOnInsert": bson.M{
				"createdat": time.Now(),
			},
			"$currentDate": bson.M{
				"updatedat": true,
			},
			"$set": bson.M{
				"name":     &name,
				"sortname": &name,
				"id":       models.MakeUri(name),
				"gender":   s.Find("sexo").First().Text(),
				"image":    s.Find("urlFoto").First().Text(),
				"email":    s.Find("email").First().Text(),
			},
			"$addToSet": bson.M{
				"sources": source,
				"identifiers": bson.M{
					"$each": []models.Identifier{
						{Identifier: s.Find("idParlamentar").First().Text(), Scheme: "idParlamentar"},
						{Identifier: s.Find("ideCadastro").First().Text(), Scheme: "ideCadastro"},
					},
				},
				"othernames": models.OtherNames{
					Name:       parser.Titlelize(s.Find("nome").First().Text()),
					FamilyName: fullName[len(fullName)-1:][0],
					GivenName:  fullName[0],
					Note:       "Nome de nascimento",
				},
				"contactdetails": bson.M{
					"$each": []models.ContactDetail{
						{
							Label:   "Telefone",
							Type:    "phone",
							Value:   s.Find("fone").First().Text(),
							Sources: []models.Source{source},
						},
						{
							Label:   "Gabinete",
							Type:    "address",
							Value:   s.Find("gabinete").First().Text() + ", Anexo " + s.Find("anexo").First().Text(),
							Sources: []models.Source{source},
						},
					},
				},
			},
		}, &models.Parliamentarian{})

		parser.CreateMembermeship(DB, models.Rel{
			Id:   parliamenrianId,
			Link: parser.LinkTo("parliamenrians", parliamenrianId),
		}, models.Rel{
			Id:   partyId,
			Link: parser.LinkTo("parties", partyId),
		}, source, "Filiado", "Partido")
		parser.CheckError(err)
	})
}