func charset_reader(charset string, input io.Reader) (io.Reader, error) { log.Println("charset-reader", charset) switch charset { default: // any other encoding should be ignored rdr, err := iconv.NewReader(input, charset, "UTF-8") return rdr, err case "gbk", "gb2312": rdr, err := iconv.NewReader(input, "gbk", "UTF-8") return rdr, err case "utf-8": return input, nil } }
func main() { fi, err := os.Stdin.Stat() if err != nil { panic(err) } fmt.Printf("%v\n", fi.Mode()) if fi.Mode()&os.ModeNamedPipe == 0 { fmt.Println("no pipe: ") } else { reader, _ := iconv.NewReader(os.Stdin, "windows-1252", "utf-8") doc, err := goquery.NewDocumentFromReader(reader) if err != nil { panic(err) } re := regexp.MustCompile("[^\\d]") doc.Find(".item_row").Each(func(i int, s *goquery.Selection) { strid, _ := s.Attr("id") id := re.ReplaceAllString(strid, "") img, _ := s.Find("img").Attr("src") desc := s.Find("a.item_link").Text() price := s.Find(".list_price").Text() link, _ := s.Find(".item_link").Attr("href") cat := s.Find(".cat_geo a").Text() fmt.Printf("%s;%s;%s;%s;%s;%s\n", id, desc, price, cat, link, img) }) } }
func parseMenu(url string) []mealDay { res, err := http.Get(url) if err != nil { log.Fatal(err) } defer res.Body.Close() // convert to UTF-8 utfBody, err := iconv.NewReader(res.Body, "iso-8859-1", "utf-8") if err != nil { log.Fatal(err) } // parse date doc, err := goquery.NewDocumentFromReader(utfBody) if err != nil { log.Fatal(err) } sel := doc.Find("table tr").First().Find("td").Last() endDateStr := strings.TrimSpace(sel.Text()) date := parseMenuEndingDate(endDateStr) weekDur, err := time.ParseDuration("-144h") date = date.Add(weekDur) // make slices for breakfast, lunch, dinner, and supper var bf, lunch, dinner, supper []string bf = make([]string, 7) lunch = make([]string, 7) dinner = make([]string, 7) supper = make([]string, 7) // parse rows for each meal bfSel := doc.Find("table tr").Eq(2).Find("td") lunchSel := doc.Find("table tr").Eq(3).Find("td") dinnerSel := doc.Find("table tr").Eq(4).Find("td") supperSel := doc.Find("table tr").Eq(5).Find("td") for i := 0; i < 7; i++ { bf[i] = removeEmptyLines(strings.TrimSpace(bfSel.Eq(i + 1).Text())) lunch[i] = removeEmptyLines(strings.TrimSpace(lunchSel.Eq(i + 1).Text())) dinner[i] = removeEmptyLines(strings.TrimSpace(dinnerSel.Eq(i + 1).Text())) supper[i] = removeEmptyLines(strings.TrimSpace(supperSel.Eq(i + 1).Text())) } mealdays := make([]mealDay, 7) for i := 0; i < 7; i++ { mealdays[i] = mealDay{date.Unix(), bf[i], lunch[i], dinner[i], supper[i]} date = date.AddDate(0, 0, 1) } return mealdays }
/** * This function scrapes the fruit item title and unit price from the downloaded * HTML document. First, it downloads the HTML doc from the given URI parameter. * Then, it scrapes the products' title, unit price and details URI info from the * downloaded HTML doc. Later on, it creates fruit items with these partial values, * and then it puts these fruit item objects into fruitInQueue channel. */ func fruitInitScrape(client *http.Client, uri string, fruitInQueue chan *FruitItem) { var iter int var fruitItem *FruitItem // Closing the In channel as it is not needed afterwards defer close(fruitInQueue) // Load the URI res, err := client.Get(uri) checkErr(err) defer res.Body.Close() // Convert the "windows-1252" charset of the downloaded HTML to // utf-8 encoded HTML. utfBody, err := iconv.NewReader(res.Body, "windows-1252", "utf-8") checkErr(err) // use utfBody using goquery doc, err := goquery.NewDocumentFromReader(utfBody) checkErr(err) fmt.Println("about to find stuff\n") // Find required info within the document doc.Find("ul.productLister li").Each(func(i int, s *goquery.Selection) { product := s.Find(".productInner h3 a") title := strings.TrimSpace(product.Text()) priceStr := strings.TrimSpace(s.Find(".productInner p.pricePerUnit").Text()) addProduct := s.Find(".crossSellInner h4.crossSellName a") addTitle := strings.TrimSpace(addProduct.Text()) addPriceStr := strings.TrimSpace(s.Find(".crossSellInner p.pricePerUnit").Text()) // Creating fruit item with partial values fruitItem, iter = createFruitItem(title, priceStr, product, iter) // Putting partially formed fruitItem on to fruitInQueue channel fruitInQueue <- fruitItem // These additional fruit items are the cross selling product items if len(addTitle) > 0 { // Creating fruit item with partial values fruitItem, iter = createFruitItem(addTitle, addPriceStr, addProduct, iter) // Putting partially formed fruitItem on to fruitInQueue channel fruitInQueue <- fruitItem } }) fmt.Println("\nfinished finding stuff ... closing channel\n") }
func main() { fi, err := os.Stdin.Stat() if err != nil { panic(err) } if fi.Mode()&os.ModeNamedPipe == 0 { fmt.Println("no pipe: ") } else { reader, _ := iconv.NewReader(os.Stdin, "windows-1252", "utf-8") doc, err := goquery.NewDocumentFromReader(reader) if err != nil { panic(err) } re := regexp.MustCompile("[^\\d]") json := bytes.NewBufferString("") json.WriteString("{") header := doc.Find(".subject_large").Text() json.WriteString("\"header\":\"" + strings.TrimSpace(header) + "\"") price := doc.Find("#vi_price").Text() price = re.ReplaceAllString(price, "") description := doc.Find(".view_container .body").Text() description = strings.TrimSpace(description) description = strings.Replace(description, "\n", " ", -1) json.WriteString(",\"desc\":\"" + strings.TrimSpace(description) + "\"") json.WriteString("}") fmt.Printf(json.String()) } }
func big5ToUTF8(path, outpath string) { fmt.Println("Converting " + path + " from Big5 to UTF-8 ...") f, err := os.Open(path) if err != nil { panic(err) } defer f.Close() reader, err := iconv.NewReader(f, "big5", "utf-8") if err != nil { panic(err) } fo, err := os.Create(outpath) if err != nil { panic(err) } defer fo.Close() io.Copy(fo, reader) }
func ImportCSV(filePath string) ([]core.Movimento, error) { movimentos := make([]core.Movimento, 0) // open file f, err := os.Open(filePath) if err != nil { return movimentos, err } // convert encoding from iso-8859-1 to utf-8 r0, err := iconv.NewReader(f, "iso-8859-1", "utf-8") if err != nil { return movimentos, err } // configure CSV reader r := csv.NewReader(r0) r.Comma = '\t' // reads first header row _, err = r.Read() if err != nil { return movimentos, err } /* DATA MOVIMENTO DATA OPERAÇÃO DESCRIÇÃO IMPORTÂNCIA MOEDA SALDO CONTABILÍSTICO MOEDA */ for { record, err := r.Read() if record == nil { break } if err != nil { return movimentos, err } /* 2014-09-04 2014-09-04 SP-80054855619 Lisboagas -52,91 EUR 3.554,38 EUR */ mov := core.Movimento{ Descricao: trim(record[2]), MoedaI: trim(record[4]), MoedaSC: trim(record[6]), } mov.DataMovim, err = core.YMD2Time(record[0]) if err != nil { return movimentos, err } mov.DataOper, err = core.YMD2Time(record[1]) if err != nil { return movimentos, err } mov.Importancia, err = core.Money2Int64(record[3]) if err != nil { return movimentos, err } mov.SaldoContab, err = core.Money2Int64(record[5]) if err != nil { return movimentos, err } movimentos = append(movimentos, mov) } return movimentos, nil }
/** * This function scrapes the fruit item title and unit price from the downloaded * HTML document. First, it downloads the HTML doc from the given URI parameter. * Then, it scrapes the products' title, unit price and details URI info from the * downloaded HTML doc. Later on, it creates fruit items with these partial values, * and then it puts these fruit item objects into fruitInQueue channel. */ func fruitInitScrape(client *http.Client, uri string, fruitInQueue chan *FruitItem) { var price Number var addPrice Number var iter int // Load the URI res, err := client.Get(uri) checkErr(err) defer res.Body.Close() // Convert the "windows-1252" charset of the downloaded HTML to // utf-8 encoded HTML. utfBody, err := iconv.NewReader(res.Body, "windows-1252", "utf-8") checkErr(err) // use utfBody using goquery doc, err := goquery.NewDocumentFromReader(utfBody) checkErr(err) fmt.Println("about to find stuff\n") // Find required info within the document doc.Find("ul.productLister li").Each(func(i int, s *goquery.Selection) { product := s.Find(".productInner h3 a") title := strings.TrimSpace(product.Text()) prodUri := getUri(product) priceStr := strings.TrimSpace(s.Find(".productInner p.pricePerUnit").Text()) addProduct := s.Find(".crossSellInner h4.crossSellName a") addTitle := strings.TrimSpace(addProduct.Text()) addPriceStr := strings.TrimSpace(s.Find(".crossSellInner p.pricePerUnit").Text()) // Parsing float32 value price = extractFloat32(priceStr) iter++ // Creating fruit item with partial values fruitItem := &FruitItem{Title: title, UnitPrice: price, Size: "0kb", Description: "", DetailsUri: prodUri} // Pretty-printing the progress of in channel processing fmt.Print("Found stuff") for i := 0; i < iter; i++ { fmt.Print("=") } fmt.Println(">\n") // Putting partially formed fruitItem on to fruitInQueue channel fruitInQueue <- fruitItem // These additional fruit items are the cross selling product items if len(addTitle) > 0 { addPrice = extractFloat32(addPriceStr) addProdUri := getUri(addProduct) iter++ // Creating fruit item with partial values fruitItem := &FruitItem{Title: addTitle, UnitPrice: addPrice, Size: "0kb", Description: "", DetailsUri: addProdUri} // Pretty-printing the progress of in channel processing fmt.Print("Found stuff") for i := 0; i < iter; i++ { fmt.Print("=") } fmt.Println(">\n") // Putting partially formed fruitItem on to fruitInQueue channel fruitInQueue <- fruitItem } }) fmt.Println("\nfinished finding stuff ... closing channel\n") // Closing the In channel as it is not needed close(fruitInQueue) }
func main() { // read bytes from sample.utf8 utf8Bytes, err := ioutil.ReadFile("sample.utf8") if err != nil { fmt.Println("Could not open 'sample.utf8': ", err) } // read bytes from sample.ebcdic-us ebcdicBytes, err := ioutil.ReadFile("sample.ebcdic-us") if err != nil { fmt.Println("Could not open 'sample.ebcdic-us': ", err) } // use iconv to check conversions both ways utf8String := string(utf8Bytes) ebcdicString := string(ebcdicBytes) // convert from utf-8 to ebcdic utf8ConvertedString, err := iconv.ConvertString(utf8String, "utf-8", "ebcdic-us") if err != nil || ebcdicString != utf8ConvertedString { // generate hex string ebcdicHexString := hex.EncodeToString(ebcdicBytes) utf8ConvertedHexString := hex.EncodeToString([]byte(utf8ConvertedString)) fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.ConvertString, error: ", err) fmt.Println(ebcdicHexString, " - ", len(ebcdicString)) fmt.Println(utf8ConvertedHexString, " - ", len(utf8ConvertedString)) } else { fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.ConvertString") } // convert from ebcdic to utf-8 ebcdicConvertedString, err := iconv.ConvertString(ebcdicString, "ebcdic-us", "utf-8") if err != nil || utf8String != ebcdicConvertedString { // generate hex string utf8HexString := hex.EncodeToString(utf8Bytes) ebcdicConvertedHexString := hex.EncodeToString([]byte(ebcdicConvertedString)) fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.ConvertString, error: ", err) fmt.Println(utf8HexString, " - ", len(utf8String)) fmt.Println(ebcdicConvertedHexString, " - ", len(ebcdicConvertedString)) } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.ConvertString") } testBuffer := make([]byte, len(ebcdicBytes)*2) // convert from ebdic bytes to utf-8 bytes bytesRead, bytesWritten, err := iconv.Convert(ebcdicBytes, testBuffer, "ebcdic-us", "utf-8") if err != nil || bytesRead != len(ebcdicBytes) || bytesWritten != len(utf8Bytes) { fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Convert, error: ", err) } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Convert") } // convert from utf-8 bytes to ebcdic bytes bytesRead, bytesWritten, err = iconv.Convert(utf8Bytes, testBuffer, "utf-8", "ebcdic-us") if err != nil || bytesRead != len(utf8Bytes) || bytesWritten != len(ebcdicBytes) { fmt.Println("utf-8 was not properly converted to ebcdic-us by iconv.Convert, error: ", err) } else { fmt.Println("utf-8 was properly converted to ebcdic-us by iconv.Convert") } // test iconv.Reader utf8File, _ := os.Open("sample.utf8") utf8Reader, _ := iconv.NewReader(utf8File, "utf-8", "ebcdic-us") bytesRead, err = utf8Reader.Read(testBuffer) if err != nil || bytesRead != len(ebcdicBytes) { fmt.Println("utf8 was not properly converted to ebcdic-us by iconv.Reader", err) } else { fmt.Println("utf8 was property converted to ebcdic-us by iconv.Reader") } ebcdicFile, _ := os.Open("sample.ebcdic-us") ebcdicReader, _ := iconv.NewReader(ebcdicFile, "ebcdic-us", "utf-8") bytesRead, err = ebcdicReader.Read(testBuffer) if err != nil || bytesRead != len(utf8Bytes) { fmt.Println("ebcdic-us was not properly converted to utf-8 by iconv.Reader: ", err) if bytesRead > 0 { fmt.Println(string(testBuffer[:bytesRead])) fmt.Println(hex.EncodeToString(testBuffer[:bytesRead])) fmt.Println(hex.EncodeToString(utf8Bytes)) } } else { fmt.Println("ebcdic-us was properly converted to utf-8 by iconv.Reader") } }