Пример #1
0
func charset_reader(charset string, input io.Reader) (io.Reader, error) {
	log.Println("charset-reader", charset)
	switch charset {
	default: // any other encoding should be ignored
		rdr, err := iconv.NewReader(input, charset, "UTF-8")
		return rdr, err
	case "gbk", "gb2312":
		rdr, err := iconv.NewReader(input, "gbk", "UTF-8")
		return rdr, err
	case "utf-8":
		return input, nil
	}
}
Пример #2
0
func main() {
	fi, err := os.Stdin.Stat()

	if err != nil {
		panic(err)
	}

	fmt.Printf("%v\n", fi.Mode())

	if fi.Mode()&os.ModeNamedPipe == 0 {
		fmt.Println("no pipe: ")
	} else {
		reader, _ := iconv.NewReader(os.Stdin, "windows-1252", "utf-8")

		doc, err := goquery.NewDocumentFromReader(reader)
		if err != nil {
			panic(err)
		}

		re := regexp.MustCompile("[^\\d]")

		doc.Find(".item_row").Each(func(i int, s *goquery.Selection) {
			strid, _ := s.Attr("id")
			id := re.ReplaceAllString(strid, "")
			img, _ := s.Find("img").Attr("src")
			desc := s.Find("a.item_link").Text()
			price := s.Find(".list_price").Text()
			link, _ := s.Find(".item_link").Attr("href")
			cat := s.Find(".cat_geo a").Text()

			fmt.Printf("%s;%s;%s;%s;%s;%s\n", id, desc, price, cat, link, img)
		})
	}
}
Пример #3
0
func parseMenu(url string) []mealDay {
	res, err := http.Get(url)
	if err != nil {
		log.Fatal(err)
	}
	defer res.Body.Close()

	// convert to UTF-8
	utfBody, err := iconv.NewReader(res.Body, "iso-8859-1", "utf-8")
	if err != nil {
		log.Fatal(err)
	}

	// parse date
	doc, err := goquery.NewDocumentFromReader(utfBody)
	if err != nil {
		log.Fatal(err)
	}

	sel := doc.Find("table tr").First().Find("td").Last()
	endDateStr := strings.TrimSpace(sel.Text())
	date := parseMenuEndingDate(endDateStr)

	weekDur, err := time.ParseDuration("-144h")
	date = date.Add(weekDur)

	// make slices for breakfast, lunch, dinner, and supper
	var bf, lunch, dinner, supper []string
	bf = make([]string, 7)
	lunch = make([]string, 7)
	dinner = make([]string, 7)
	supper = make([]string, 7)

	// parse rows for each meal
	bfSel := doc.Find("table tr").Eq(2).Find("td")
	lunchSel := doc.Find("table tr").Eq(3).Find("td")
	dinnerSel := doc.Find("table tr").Eq(4).Find("td")
	supperSel := doc.Find("table tr").Eq(5).Find("td")

	for i := 0; i < 7; i++ {
		bf[i] = removeEmptyLines(strings.TrimSpace(bfSel.Eq(i + 1).Text()))
		lunch[i] = removeEmptyLines(strings.TrimSpace(lunchSel.Eq(i + 1).Text()))
		dinner[i] = removeEmptyLines(strings.TrimSpace(dinnerSel.Eq(i + 1).Text()))
		supper[i] = removeEmptyLines(strings.TrimSpace(supperSel.Eq(i + 1).Text()))
	}

	mealdays := make([]mealDay, 7)
	for i := 0; i < 7; i++ {
		mealdays[i] = mealDay{date.Unix(),
			bf[i],
			lunch[i],
			dinner[i],
			supper[i]}
		date = date.AddDate(0, 0, 1)
	}

	return mealdays
}
Пример #4
0
/**
 * This function scrapes the fruit item title and unit price from the downloaded
 * HTML document. First, it downloads the HTML doc from the given URI parameter.
 * Then, it scrapes the products' title, unit price and details URI info from the
 * downloaded HTML doc. Later on, it creates fruit items with these partial values,
 * and then it puts these fruit item objects into fruitInQueue channel.
 */
func fruitInitScrape(client *http.Client, uri string, fruitInQueue chan *FruitItem) {

	var iter int
	var fruitItem *FruitItem

	// Closing the In channel as it is not needed afterwards
	defer close(fruitInQueue)

	// Load the URI
	res, err := client.Get(uri)
	checkErr(err)

	defer res.Body.Close()

	// Convert the "windows-1252" charset of the downloaded HTML to
	// utf-8 encoded HTML.
	utfBody, err := iconv.NewReader(res.Body, "windows-1252", "utf-8")
	checkErr(err)

	// use utfBody using goquery
	doc, err := goquery.NewDocumentFromReader(utfBody)
	checkErr(err)

	fmt.Println("about to find stuff\n")
	// Find required info within the document
	doc.Find("ul.productLister li").Each(func(i int, s *goquery.Selection) {
		product := s.Find(".productInner h3 a")
		title := strings.TrimSpace(product.Text())

		priceStr := strings.TrimSpace(s.Find(".productInner p.pricePerUnit").Text())

		addProduct := s.Find(".crossSellInner h4.crossSellName a")
		addTitle := strings.TrimSpace(addProduct.Text())
		addPriceStr := strings.TrimSpace(s.Find(".crossSellInner p.pricePerUnit").Text())

		// Creating fruit item with partial values
		fruitItem, iter = createFruitItem(title, priceStr, product, iter)

		// Putting partially formed fruitItem on to fruitInQueue channel
		fruitInQueue <- fruitItem

		// These additional fruit items are the cross selling product items
		if len(addTitle) > 0 {
			// Creating fruit item with partial values
			fruitItem, iter = createFruitItem(addTitle, addPriceStr, addProduct, iter)

			// Putting partially formed fruitItem on to fruitInQueue channel
			fruitInQueue <- fruitItem
		}
	})

	fmt.Println("\nfinished finding stuff ... closing channel\n")

}
Пример #5
0
func main() {
	fi, err := os.Stdin.Stat()

	if err != nil {
		panic(err)
	}

	if fi.Mode()&os.ModeNamedPipe == 0 {
		fmt.Println("no pipe: ")
	} else {
		reader, _ := iconv.NewReader(os.Stdin, "windows-1252", "utf-8")

		doc, err := goquery.NewDocumentFromReader(reader)
		if err != nil {
			panic(err)
		}

		re := regexp.MustCompile("[^\\d]")

		json := bytes.NewBufferString("")

		json.WriteString("{")

		header := doc.Find(".subject_large").Text()
		json.WriteString("\"header\":\"" + strings.TrimSpace(header) + "\"")

		price := doc.Find("#vi_price").Text()
		price = re.ReplaceAllString(price, "")

		description := doc.Find(".view_container .body").Text()
		description = strings.TrimSpace(description)
		description = strings.Replace(description, "\n", " ", -1)
		json.WriteString(",\"desc\":\"" + strings.TrimSpace(description) + "\"")

		json.WriteString("}")

		fmt.Printf(json.String())
	}
}
Пример #6
0
func big5ToUTF8(path, outpath string) {
	fmt.Println("Converting " + path + " from Big5 to UTF-8 ...")

	f, err := os.Open(path)
	if err != nil {
		panic(err)
	}
	defer f.Close()

	reader, err := iconv.NewReader(f, "big5", "utf-8")
	if err != nil {
		panic(err)
	}

	fo, err := os.Create(outpath)
	if err != nil {
		panic(err)
	}
	defer fo.Close()

	io.Copy(fo, reader)
}
Пример #7
0
func ImportCSV(filePath string) ([]core.Movimento, error) {
	movimentos := make([]core.Movimento, 0)

	// open file
	f, err := os.Open(filePath)
	if err != nil {
		return movimentos, err
	}

	// convert encoding from iso-8859-1 to utf-8
	r0, err := iconv.NewReader(f, "iso-8859-1", "utf-8")
	if err != nil {
		return movimentos, err
	}

	// configure CSV reader
	r := csv.NewReader(r0)
	r.Comma = '\t'

	// reads first header row
	_, err = r.Read()
	if err != nil {
		return movimentos, err
	}
	/*
		DATA MOVIMENTO
		DATA OPERAÇÃO
		DESCRIÇÃO
		IMPORTÂNCIA
		MOEDA
		SALDO CONTABILÍSTICO
		MOEDA
	*/

	for {
		record, err := r.Read()
		if record == nil {
			break
		}
		if err != nil {
			return movimentos, err
		}

		/*
			2014-09-04
			2014-09-04
			SP-80054855619 Lisboagas
			-52,91
			EUR
			3.554,38
			EUR
		*/

		mov := core.Movimento{
			Descricao: trim(record[2]),
			MoedaI:    trim(record[4]),
			MoedaSC:   trim(record[6]),
		}

		mov.DataMovim, err = core.YMD2Time(record[0])
		if err != nil {
			return movimentos, err
		}

		mov.DataOper, err = core.YMD2Time(record[1])
		if err != nil {
			return movimentos, err
		}

		mov.Importancia, err = core.Money2Int64(record[3])
		if err != nil {
			return movimentos, err
		}

		mov.SaldoContab, err = core.Money2Int64(record[5])
		if err != nil {
			return movimentos, err
		}

		movimentos = append(movimentos, mov)
	}

	return movimentos, nil
}
Пример #8
0
/**
 * This function scrapes the fruit item title and unit price from the downloaded
 * HTML document. First, it downloads the HTML doc from the given URI parameter.
 * Then, it scrapes the products' title, unit price and details URI info from the
 * downloaded HTML doc. Later on, it creates fruit items with these partial values,
 * and then it puts these fruit item objects into fruitInQueue channel.
 */
func fruitInitScrape(client *http.Client, uri string, fruitInQueue chan *FruitItem) {

	var price Number
	var addPrice Number
	var iter int

	// Load the URI
	res, err := client.Get(uri)
	checkErr(err)

	defer res.Body.Close()

	// Convert the "windows-1252" charset of the downloaded HTML to
	// utf-8 encoded HTML.
	utfBody, err := iconv.NewReader(res.Body, "windows-1252", "utf-8")
	checkErr(err)

	// use utfBody using goquery
	doc, err := goquery.NewDocumentFromReader(utfBody)
	checkErr(err)

	fmt.Println("about to find stuff\n")
	// Find required info within the document
	doc.Find("ul.productLister li").Each(func(i int, s *goquery.Selection) {
		product := s.Find(".productInner h3 a")
		title := strings.TrimSpace(product.Text())
		prodUri := getUri(product)

		priceStr := strings.TrimSpace(s.Find(".productInner p.pricePerUnit").Text())

		addProduct := s.Find(".crossSellInner h4.crossSellName a")
		addTitle := strings.TrimSpace(addProduct.Text())
		addPriceStr := strings.TrimSpace(s.Find(".crossSellInner p.pricePerUnit").Text())

		// Parsing float32 value
		price = extractFloat32(priceStr)

		iter++
		// Creating fruit item with partial values
		fruitItem := &FruitItem{Title: title, UnitPrice: price, Size: "0kb", Description: "", DetailsUri: prodUri}
		// Pretty-printing the progress of in channel processing
		fmt.Print("Found stuff")
		for i := 0; i < iter; i++ {
			fmt.Print("=")
		}
		fmt.Println(">\n")

		// Putting partially formed fruitItem on to fruitInQueue channel
		fruitInQueue <- fruitItem

		// These additional fruit items are the cross selling product items
		if len(addTitle) > 0 {
			addPrice = extractFloat32(addPriceStr)
			addProdUri := getUri(addProduct)

			iter++
			// Creating fruit item with partial values
			fruitItem := &FruitItem{Title: addTitle, UnitPrice: addPrice, Size: "0kb", Description: "", DetailsUri: addProdUri}
			// Pretty-printing the progress of in channel processing
			fmt.Print("Found stuff")
			for i := 0; i < iter; i++ {
				fmt.Print("=")
			}
			fmt.Println(">\n")

			// Putting partially formed fruitItem on to fruitInQueue channel
			fruitInQueue <- fruitItem
		}
	})

	fmt.Println("\nfinished finding stuff ... closing channel\n")
	// Closing the In channel as it is not needed
	close(fruitInQueue)

}
Пример #9
0
func main() {
	// read bytes from sample.utf8
	utf8Bytes, err := ioutil.ReadFile("sample.utf8")

	if err != nil {
		fmt.Println("Could not open 'sample.utf8': ", err)
	}

	// read bytes from sample.ebcdic-us
	ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us")

	if err != nil {
		fmt.Println("Could not open 'sample.ebcdic-us': ", err)
	}

	// use iconv to check conversions both ways
	utf8String := string(utf8Bytes)
	ebcdicString := string(ebcdicBytes)

	// convert from utf-8 to ebcdic
	utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us")

	if err != nil || ebcdicString != utf8ConvertedString {
		// generate hex string
		ebcdicHexString := hex.EncodeToString(ebcdicBytes)
		utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString))

		fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err)
		fmt.Println(ebcdicHexString, " - ", len(ebcdicString))
		fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString))
	} else {
		fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString")
	}

	// convert from ebcdic to utf-8
	ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8")

	if err != nil || utf8String != ebcdicConvertedString {
		// generate hex string
		utf8HexString := hex.EncodeToString(utf8Bytes)
		ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString))

		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err)
		fmt.Println(utf8HexString, " - ", len(utf8String))
		fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString))
	} else {
		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString")
	}

	testBuffer := make([]byte, len(ebcdicBytes)*2)

	// convert from ebdic bytes to utf-8 bytes
	bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8")

	if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) {
		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err)
	} else {
		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert")
	}

	// convert from utf-8 bytes to ebcdic bytes
	bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us")

	if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) {
		fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err)
	} else {
		fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert")
	}

	// test iconv.Reader
	utf8File, _ := os.Open("sample.utf8")
	utf8Reader, _ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us")
	bytesRead, err = utf8Reader.Read(testBuffer)

	if err != nil || bytesRead != len(ebcdicBytes) {
		fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err)
	} else {
		fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader")
	}

	ebcdicFile, _ := os.Open("sample.ebcdic-us")
	ebcdicReader, _ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8")
	bytesRead, err = ebcdicReader.Read(testBuffer)

	if err != nil || bytesRead != len(utf8Bytes) {
		fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err)

		if bytesRead > 0 {
			fmt.Println(string(testBuffer[:bytesRead]))
			fmt.Println(hex.EncodeToString(testBuffer[:bytesRead]))
			fmt.Println(hex.EncodeToString(utf8Bytes))
		}
	} else {
		fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader")
	}
}