// Convert unicode text to ASCII text // using specific codepage mapping. func convertUnicodeToAscii(text string, codepage encoding.Encoding) []byte { b := []byte(text) // fmt.Printf("Text length: %d\n", len(b)) var buf bytes.Buffer if codepage == nil { codepage = charmap.Windows1252 } w := transform.NewWriter(&buf, codepage.NewEncoder()) defer w.Close() w.Write(b) // fmt.Printf("Buffer length: %d\n", len(buf.Bytes())) return buf.Bytes() }
//「Golangで文字コード判定」qiita.com/nobuhito/items/ff782f64e32f7ed95e43 func transEnc(text string, encode string) (string, error) { body := []byte(text) var f []byte encodings := []string{"sjis", "utf-8"} if encode != "" { encodings = append([]string{encode}, encodings...) } for _, enc := range encodings { if enc != "" { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(body) if err != nil { continue } err = ic.Close() if err != nil { continue } f = buf.Bytes() break } } return string(f), nil }
func TestReaderEncodings(t *testing.T) { for _, test := range tests { t.Logf("test codec: %v", test.encoding) codec, ok := findEncoding(test.encoding) if !ok { t.Errorf("can not find encoding '%v'", test.encoding) continue } buffer := bytes.NewBuffer(nil) // write with encoding to buffer writer := transform.NewWriter(buffer, codec.NewEncoder()) var expectedCount []int for _, line := range test.strings { writer.Write([]byte(line)) writer.Write([]byte{'\n'}) expectedCount = append(expectedCount, buffer.Len()) } // create line reader reader, err := newLineReader(buffer, codec, 1024) if err != nil { t.Errorf("failed to initialize reader: %v", err) continue } // read decodec lines from buffer var readLines []string var byteCounts []int current := 0 for { bytes, sz, err := reader.next() if sz > 0 { readLines = append(readLines, string(bytes[:len(bytes)-1])) } if err != nil { break } current += sz byteCounts = append(byteCounts, current) } // validate lines and byte offsets if len(test.strings) != len(readLines) { t.Errorf("number of lines mismatch (expected=%v actual=%v)", len(test.strings), len(readLines)) continue } for i := range test.strings { expected := test.strings[i] actual := readLines[i] assert.Equal(t, expected, actual) assert.Equal(t, expectedCount[i], byteCounts[i]) } } }
func getWriter(writer io.Writer) io.Writer { encoding := getEncoding() if encoding == nil { return writer } return transform.NewWriter(writer, encoding.NewEncoder()) }
// Shift-JIS -> UTF-8 func to_utf8(str string) (string, error) { body, err := ioutil.ReadAll(transform.NewReader(strings.NewReader(str), japanese.ShiftJIS.NewEncoder())) if err != nil { return "", err } var f []byte encodings := []string{"sjis", "utf-8"} for _, enc := range encodings { if enc != "" { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(body) if err != nil { continue } err = ic.Close() if err != nil { continue } f = buf.Bytes() break } } return string(f), nil }
func TestReaderPartialWithEncodings(t *testing.T) { for _, test := range tests { t.Logf("test codec: %v", test.encoding) codecFactory, ok := encoding.FindEncoding(test.encoding) if !ok { t.Errorf("can not find encoding '%v'", test.encoding) continue } buffer := bytes.NewBuffer(nil) codec, _ := codecFactory(buffer) writer := transform.NewWriter(buffer, codec.NewEncoder()) reader, err := newLineReader(buffer, codec, 1024) if err != nil { t.Errorf("failed to initialize reader: %v", err) continue } var expected []string var partials []string lastString := "" for _, str := range test.strings { writer.Write([]byte(str)) lastString += str expected = append(expected, lastString) line, sz, err := reader.next() assert.NotNil(t, err) assert.Equal(t, 0, sz) assert.Nil(t, line) partial, _, err := reader.partial() partials = append(partials, string(partial)) t.Logf("partials: %v", partials) } // finish line: writer.Write([]byte{'\n'}) // finally read line line, _, err := reader.next() assert.Nil(t, err) t.Logf("line: '%v'", line) // validate partial lines if len(test.strings) != len(expected) { t.Errorf("number of lines mismatch (expected=%v actual=%v)", len(test.strings), len(partials)) continue } for i := range expected { assert.Equal(t, expected[i], partials[i]) } assert.Equal(t, lastString+"\n", string(line)) } }
func newWriter(option *Option) io.Writer { encoder := func() io.Writer { switch option.OutputEncode { case "sjis": return transform.NewWriter(os.Stdout, japanese.ShiftJIS.NewEncoder()) case "euc": return transform.NewWriter(os.Stdout, japanese.EUCJP.NewEncoder()) case "jis": return transform.NewWriter(os.Stdout, japanese.ISO2022JP.NewEncoder()) default: return os.Stdout } }() if option.EnableColor { return ansicolor.NewAnsiColorWriter(encoder) } return encoder }
func ConvTo(b []byte, e encoding.Encoding) (result []byte, err error) { w := new(bytes.Buffer) writer := transform.NewWriter(w, e.NewEncoder()) defer writer.Close() if _, err = writer.Write(b); err != nil { return } return w.Bytes(), nil }
func (r *ResultCSV) Do(ctx WebContext) error { w := ctx.ResponseWriter() w.Header().Set("Content-Type", "text/csv; char=utf-8") if r.Attachment { w.Header().Set("Content-Disposition", "attachment") } w.WriteHeader(r.Code) csvw := csv.NewWriter(transform.NewWriter(w, japanese.ShiftJIS.NewEncoder())) return csvw.WriteAll(r.Data) }
func decodeBy(charset string, dst io.Writer) io.Writer { if "UTF-8" == strings.ToUpper(charset) || "UTF8" == strings.ToUpper(charset) { return dst } cs := GetCharset(charset) if nil == cs { panic("charset '" + charset + "' is not exists.") } return transform.NewWriter(dst, cs.NewDecoder()) }
func EncodeReader(s io.Reader, enc string) ([]byte, error) { e, _ := charset.Lookup(enc) if e == nil { return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc)) } var buf bytes.Buffer writer := transform.NewWriter(&buf, e.NewEncoder()) _, err := io.Copy(writer, s) if err != nil { return nil, err } return buf.Bytes(), nil }
func saveDatasetAsCsv(dataset []Data, filePath string) { f, err := os.Create(filePath) if err != nil { log.Fatal(err) } defer f.Close() var w *csv.Writer if runtime.GOOS == "windows" { // on Windows, use Shift-JIS to open csv file via Microsoft Excel. converter := bufio.NewWriter(transform.NewWriter(f, japanese.ShiftJIS.NewEncoder())) w = csv.NewWriter(converter) } else { w = csv.NewWriter(f) } defer w.Flush() // Write header first header := []string{ // "日付", "証券会社名", "n225_sell", "n225_buy", "n225_net", "topix_sell", "topix_buy", "topix_net", "net_total", } w.Write(header) // Write dataset for _, data := range dataset { var record []string // record = append(record, obj.InfoDate) record = append(record, data.Company) record = append(record, data.N225Sell) record = append(record, data.N225Buy) record = append(record, data.N225Net) record = append(record, data.TopixSell) record = append(record, data.TopixBuy) record = append(record, data.TopixNet) record = append(record, data.NetTotal) w.Write(record) } }
//Conversion func Conversion(inStream io.Reader, outStream io.Writer) error { //read from stream (Shift-JIS to UTF-8) scanner := bufio.NewScanner(transform.NewReader(inStream, japanese.ShiftJIS.NewDecoder())) list := make([]string, 0) for scanner.Scan() { list = append(list, scanner.Text()) } if err := scanner.Err(); err != nil { return err } //write to stream (UTF-8 to EUC-JP) writer := bufio.NewWriter(transform.NewWriter(outStream, japanese.EUCJP.NewEncoder())) for _, line := range list { if _, err := fmt.Fprintln(writer, line); err != nil { return err } } return writer.Flush() }
func (w *ReportWriter) writeCsv(reports []Report) error { wr := w.w if w.dialect.Encoding == "sjis" { log.Info("use ShiftJIS encoder for output.") encoder := japanese.ShiftJIS.NewEncoder() wr = transform.NewWriter(wr, encoder) } writer := csv.NewWriter(wr) if w.dialect.Comma != 0 { writer.Comma = w.dialect.Comma } for i, report := range reports { if i > 0 { writer.Write(nil) } log.Debugf("[%d] write csv file", i+1) w.writeCsvOne(writer, report) } writer.Flush() return writer.Error() }
func encode(data []byte, charsetName string) ([]byte, error) { encoding, _ := charset.Lookup(charsetName) if encoding == nil { return nil, fmt.Errorf("Unsupported charset: %v", charsetName) } reader := bytes.NewReader(data) var b bytes.Buffer writer := bufio.NewWriter(&b) encodeWriter := transform.NewWriter(writer, encoding.NewEncoder()) if _, err := io.Copy(encodeWriter, reader); err != nil { return nil, err } if err := writer.Flush(); err != nil { return nil, err } if isUTF8Charset(charsetName) { return addBOM(b.Bytes()), nil } return b.Bytes(), nil }
// NewWriter returns a writer which encodes to the given encoding, utf8. // // If enc is nil, then only an utf8-enforcing replacement writer // (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables) // is used. func NewWriter(w io.Writer, enc encoding.Encoding) io.WriteCloser { if enc == nil || enc == encoding.Replacement { return transform.NewWriter(w, encoding.Replacement.NewEncoder()) } return transform.NewWriter(w, transform.Chain(enc.NewEncoder())) }
// Writer wraps another Writer to encode its UTF-8 output. // // The Encoder may not be used for any other operation as long as the returned // Writer is in use. func (e *Encoder) Writer(w io.Writer) io.Writer { return transform.NewWriter(w, e) }
func main() { var args []string argv := os.Args argc := len(argv) for n := 1; n < argc; n++ { if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] != '-' { switch argv[n][1] { case 'A': if n < argc-1 { after, _ = strconv.Atoi(argv[n+1]) n++ continue } case 'B': if n < argc-1 { before, _ = strconv.Atoi(argv[n+1]) n++ continue } case '8': utf8out = true case 'F': fixed = true case 'R': recursive = true case 'S': verbose = true case 'c': count = true case 'r': fullpath = false case 'i': ignorecase = true case 'I': ignorebinary = true case 'l': list = true case 'n': number = true case 'P': perl = true case 'G': basic = true case 'v': invert = true case 'o': only = true case 'f': if n < argc-1 { infile = argv[n+1] n++ continue } case 'z': zeroData = true case 'Z': zeroFile = true case 'V': fmt.Fprintf(os.Stdout, "%s\n", version) os.Exit(0) default: usage(true) } if len(argv[n]) > 2 { argv[n] = "-" + argv[n][2:] n-- } } else if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] == '-' { name := argv[n][2:] switch { case strings.HasPrefix(name, "enc="): encs = name[4:] case name == "enc" && n < argc-1: encs = argv[n+1] n++ case strings.HasPrefix(name, "exclude="): exclude = name[8:] case name == "exclude" && n < argc-1: exclude = argv[n+1] n++ case strings.HasPrefix(name, "color="): color = name[6:] case name == "color" && n < argc-1: color = argv[n+1] n++ case strings.HasPrefix(name, "separator="): separator = name[10:] case name == "separator": separator = argv[n+1] n++ case name == "null": zeroFile = true case name == "null-data": zeroData = true case name == "help": usage(false) default: usage(true) } } else { args = append(args, argv[n]) } } if len(args) == 0 { usage(true) } var err error var pattern interface{} if encs != "" { encodings = strings.Split(encs, ",") } else { enc_env := os.Getenv("JVGREP_ENCODINGS") if enc_env != "" { encodings = strings.Split(enc_env, ",") } } out_enc := os.Getenv("JVGREP_OUTPUT_ENCODING") if out_enc != "" { ee, _ := charset.Lookup(out_enc) if ee == nil { errorline(fmt.Sprintf("unknown encoding: %s", out_enc)) os.Exit(1) } oc = transform.NewWriter(os.Stdout, ee.NewEncoder()) } instr := "" argindex := 0 if len(infile) > 0 { b, err := ioutil.ReadFile(infile) if err != nil { errorline(err.Error()) os.Exit(1) } instr = strings.TrimSpace(string(b)) } else { instr = args[0] argindex = 1 } if fixed { pattern = instr } else if perl { re, err := syntax.Parse(instr, syntax.Perl) if err != nil { errorline(err.Error()) os.Exit(1) } rec, err := syntax.Compile(re) if err != nil { errorline(err.Error()) os.Exit(1) } instr = rec.String() if ignorecase { instr = "(?i:" + instr + ")" } pattern, err = regexp.Compile(instr) if err != nil { errorline(err.Error()) os.Exit(1) } } else { if ignorecase { instr = "(?i:" + instr + ")" } pattern, err = regexp.Compile(instr) if err != nil { errorline(err.Error()) os.Exit(1) } } if exclude == "" { exclude = os.Getenv("JVGREP_EXCLUDE") } if exclude == "" { exclude = excludeDefaults } ere, err := regexp.Compile(exclude) if err != nil { errorline(err.Error()) os.Exit(1) } atty := false if color == "" { color = os.Getenv("JVGREP_COLOR") } if color == "" || color == "auto" { atty = isatty.IsTerminal(os.Stdout.Fd()) } else if color == "always" { atty = true } else if color == "never" { atty = false } else { usage(true) } if atty { sc := make(chan os.Signal, 10) signal.Notify(sc, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) go func() { for _ = range sc { ct.ResetColor() os.Exit(0) } }() } if len(args) == 1 && argindex != 0 { Grep(&GrepArg{ pattern: pattern, input: os.Stdin, single: true, atty: atty, }) return } envre := regexp.MustCompile(`^(\$[a-zA-Z][a-zA-Z0-9_]+|\$\([a-zA-Z][a-zA-Z0-9_]+\))$`) globmask := "" ch := make(chan *GrepArg, 10) done := make(chan int) go GoGrep(ch, done) nargs := len(args[argindex:]) for _, arg := range args[argindex:] { globmask = "" root := "" arg = strings.Trim(arg, `"`) for n, i := range strings.Split(filepath.ToSlash(arg), "/") { if root == "" && strings.Index(i, "*") != -1 { if globmask == "" { root = "." } else { root = filepath.ToSlash(globmask) } } if n == 0 && i == "~" { if runtime.GOOS == "windows" { i = os.Getenv("USERPROFILE") } else { i = os.Getenv("HOME") } } if envre.MatchString(i) { i = strings.Trim(strings.Trim(os.Getenv(i[1:]), "()"), `"`) } globmask = filepath.Join(globmask, i) if n == 0 { if runtime.GOOS == "windows" && filepath.VolumeName(i) != "" { globmask = i + "/" } else if len(globmask) == 0 { globmask = "/" } } } if root == "" { path, _ := filepath.Abs(arg) fi, err := os.Stat(path) if err != nil { errorline(fmt.Sprintf("jvgrep: %s: No such file or directory", arg)) os.Exit(1) } if !fi.IsDir() { if verbose { println("search:", path) } ch <- &GrepArg{ pattern: pattern, input: path, single: nargs == 1, atty: atty, } continue } else { root = path if fi.IsDir() { globmask = "**/*" } else { globmask = "**/" + globmask } } } if globmask == "" { globmask = "." } globmask = filepath.ToSlash(filepath.Clean(globmask)) if recursive { if strings.Index(globmask, "/") > -1 { globmask += "/" } else { globmask = "**/" + globmask } } cc := []rune(globmask) dirmask := "" filemask := "" for i := 0; i < len(cc); i++ { if cc[i] == '*' { if i < len(cc)-2 && cc[i+1] == '*' && cc[i+2] == '/' { filemask += "(.*/)?" dirmask = filemask i += 2 } else { filemask += "[^/]*" } } else { c := cc[i] if c == '/' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || 255 < c { filemask += string(c) } else { filemask += fmt.Sprintf("[\\x%x]", c) } if c == '/' && dirmask == "" && strings.Index(filemask, "*") != -1 { dirmask = filemask } } } if dirmask == "" { dirmask = filemask } if len(filemask) > 0 && filemask[len(filemask)-1] == '/' { if root == "" { root = filemask } filemask += "[^/]*" } if runtime.GOOS == "windows" || runtime.GOOS == "darwin" { dirmask = "(?i:" + dirmask + ")" filemask = "(?i:" + filemask + ")" } dre := regexp.MustCompile("^" + dirmask) fre := regexp.MustCompile("^" + filemask + "$") root = filepath.Clean(root) if verbose { println("dirmask:", dirmask) println("filemask:", filemask) println("root:", root) } filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if info == nil { return err } path = filepath.ToSlash(path) if ere != nil && ere.MatchString(path) { if info.IsDir() { return filepath.SkipDir } return nil } if info.IsDir() { if path == "." || recursive || len(path) <= len(root) || dre.MatchString(path+"/") { return nil } return filepath.SkipDir } if fre.MatchString(path) && info.Mode().IsRegular() { if verbose { println("search:", path) } ch <- &GrepArg{ pattern: pattern, input: path, single: false, atty: atty, } } return nil }) } ch <- nil if count { fmt.Println(countMatch) } <-done }
func doGrep(path string, f []byte, arg *GrepArg) { encs := encodings if ignorebinary { if bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { return } } if len(f) > 2 { if f[0] == 0xfe && f[1] == 0xff { arg.bom = f[0:2] f = f[2:] } else if f[0] == 0xff && f[1] == 0xfe { arg.bom = f[0:2] f = f[2:] } } if len(arg.bom) > 0 { if arg.bom[0] == 0xfe && arg.bom[1] == 0xff { encs = []string{"utf-16be"} } else if arg.bom[0] == 0xff && arg.bom[1] == 0xfe { encs = []string{"utf-16le"} } } for _, enc := range encs { if verbose { println("trying("+enc+"):", path) } if len(arg.bom) > 0 && enc != "utf-16be" && enc != "utf-16le" { continue } did := false var t []byte var n, l, size, next, prev int if enc != "" { if len(arg.bom) > 0 || bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) == -1 { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(f) if err != nil { next = -1 continue } lf := false if len(arg.bom) > 0 && len(f)%2 != 0 { ic.Write([]byte{0}) lf = true } err = ic.Close() if err != nil { if verbose { println(err.Error()) } next = -1 continue } f = buf.Bytes() if lf { f = f[:len(f)-1] } } } size = len(f) if size == 0 { continue } for next != -1 { for { if next >= size { next = -1 break } if f[next] == '\n' { break } next++ } n++ if next == -1 { t = f[prev:] } else { t = f[prev:next] prev = next + 1 next++ } l = len(t) if l > 0 && t[l-1] == '\r' { t = t[:l-1] l-- } var match bool if only { var matches []string ts := string(t) if re, ok := arg.pattern.(*regexp.Regexp); ok { matches = re.FindAllString(ts, -1) } else if s, ok := arg.pattern.(string); ok { if ignorecase { ts = strings.ToLower(ts) } ti := 0 tl := len(ts) for ti != -1 && ti < tl-1 { ti = strings.Index(ts[ti:], s) if ti != -1 { matches = append(matches, s) ti++ } } } match = len(matches) > 0 // skip if not match without invert, or match with invert. if match == invert { continue } if verbose { println("found("+enc+"):", path) } if list { matchedfile(path) did = true break } for _, m := range matches { countMatch++ if count { continue } if strings.IndexFunc( m, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } else { if number { if utf8.ValidString(m) { matchedline(path, n, m, arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } else { if utf8.ValidString(m) { matchedline("", 0, m, arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } } } } else { if re, ok := arg.pattern.(*regexp.Regexp); ok { if len(re.FindAllIndex(t, 1)) > 0 { match = true } } else if s, ok := arg.pattern.(string); ok { if ignorecase { if strings.Index(strings.ToLower(string(t)), strings.ToLower(s)) > -1 { match = true } } else { if strings.Index(string(t), s) > -1 { match = true } } } // skip if not match without invert, or match with invert. if match == invert { continue } if verbose { println("found("+enc+"):", path) } if list { matchedfile(path) did = true break } countMatch++ if count { did = true continue } if arg.single && !number { if utf8.Valid(t) { matchedline("", -1, string(t), arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } else { if bytes.IndexFunc( t, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } else if utf8.Valid(t) { if after <= 0 && before <= 0 { matchedline(path, n, string(t), arg) } else { if countMatch > 1 { os.Stdout.WriteString("---\n") } bprev, bnext := next-l-2, next-l-2 lines := make([]string, 0) for i := 0; i < before && bprev > 0; i++ { for { if bprev == 0 || f[bprev-1] == '\n' { lines = append(lines, string(f[bprev:bnext])) bnext = bprev - 1 bprev-- break } bprev-- } } for i := len(lines); i > 0; i-- { matchedline(path, i-n, lines[i-1], arg) } matchedline(path, n, string(t), arg) lines = make([]string, 0) aprev, anext := next, next for i := 0; i < after && anext >= 0 && anext < size; i++ { for { if anext == size || f[anext] == '\n' { lines = append(lines, string(f[aprev:anext])) aprev = anext + 1 anext++ break } anext++ } } for i := 0; i < len(lines); i++ { matchedline(path, -n-i-1, lines[i], arg) } } } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } } did = true } runtime.GC() if did || next == -1 { break } } }
func TestUtf16BOMEncodings(t *testing.T) { expectedLE := utf16Map[littleEndian] expectedBE := utf16Map[bigEndian] var tests = []struct { name string testEndianess unicode.Endianness testBOMPolicy unicode.BOMPolicy expectedEncoding Encoding expectedError error expectedOffset int }{ {"utf-16-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, {"utf-16-bom", unicode.BigEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0}, {"utf-16-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, {"utf-16-bom", unicode.LittleEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0}, // big endian based encoding {"utf-16be-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, {"utf-16be-bom", unicode.BigEndian, unicode.IgnoreBOM, expectedBE, nil, 0}, {"utf-16be-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, // little endian baed encoding {"utf-16le-bom", unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2}, {"utf-16le-bom", unicode.LittleEndian, unicode.IgnoreBOM, expectedLE, nil, 0}, {"utf-16le-bom", unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2}, } text := []byte("hello world") for _, test := range tests { t.Logf("testing: codec=%v, bigendian=%v, bomPolicy=%v", test.name, test.testEndianess, test.testBOMPolicy) buf := bytes.NewBuffer(nil) writeEncoding := unicode.UTF16(test.testEndianess, test.testBOMPolicy) writer := transform.NewWriter(buf, writeEncoding.NewEncoder()) writer.Write(text) writer.Close() rawReader := bytes.NewReader(buf.Bytes()) contentLen := rawReader.Len() encodingFactory, ok := FindEncoding(test.name) if !ok { t.Errorf("Failed to load encoding: %v", test.name) continue } encoding, err := encodingFactory(rawReader) contentOffset := contentLen - rawReader.Len() assert.Equal(t, test.expectedEncoding, encoding) assert.Equal(t, test.expectedError, err) assert.Equal(t, test.expectedOffset, contentOffset) if err == nil { reader := transform.NewReader(rawReader, encoding.NewDecoder()) content, _ := ioutil.ReadAll(reader) assert.Equal(t, text, content) } } }
func executeDiff(cmd *cobra.Command, args []string) { var ( err error match *regexp.Regexp ignore *regexp.Regexp csvMap = make(map[string][]string) fisList = make([]FileInfos, 0) q = make(chan info) wg = new(sync.WaitGroup) ) if len(args) == 0 { cmd.Help() return } // Get glob file args. args, err = core.GetGlobArgs(args) if err != nil { log.Fatalln(err) } // Recheck args. if len(args) <= 1 { cmd.Help() return } // Load csv and store. for _, csvPath := range args { fmt.Println("Open:", csvPath) c, err := os.Open(csvPath) if err != nil { log.Fatalln(err) } defer c.Close() var reader *csv.Reader if sjisIn { reader = csv.NewReader(transform.NewReader(c, japanese.ShiftJIS.NewDecoder())) } else { reader = csv.NewReader(c) } reader.Comma = '\t' // Skip header. _, err = reader.Read() if err != nil { log.Fatalln(err) } left, err := reader.ReadAll() if err != nil { log.Fatalln(err) } // Change data to FileInfos struct. fis := make(FileInfos, 0) for _, r := range left { fis = append(fis, *csvToFileInfo(r)) } fisList = append(fisList, fis) } // Compile if given matches and ignores. if len(matches) != 0 { match, err = core.CompileStrs(matches) if err != nil { log.Fatalln(err) } } if len(ignores) != 0 { ignore, err = core.CompileStrs(ignores) if err != nil { log.Fatalln(err) } } for i, one := range fisList { wg.Add(1) go func(i int, one FileInfos) { defer wg.Done() // Diff fileinfo. for _, oneFi := range one { if fileOnly && oneFi.Type == DIR { continue } if dirOnly && oneFi.Type == FILE { continue } // Ignore check. if ignore != nil && ignore.MatchString(oneFi.Full) { continue } // Match check. if match != nil && !match.MatchString(oneFi.Full) { continue } for j, other := range fisList { if i == j { continue } // Get other's same full path info. otherFi, err := findFileInfo(other, oneFi) if err == nil { // Diff Time. if oneFi.Time != otherFi.Time { q <- info{ path: args[i], index: i, full: oneFi.Full, diff: FileTime, value: oneFi.Time, ford: oneFi.Type, } } // Diff Size. if oneFi.Size != otherFi.Size { q <- info{ path: args[i], index: i, full: oneFi.Full, diff: FileSize, value: oneFi.Size, ford: oneFi.Type, } } // Diff Mode. if oneFi.Mode != otherFi.Mode { q <- info{ path: args[i], index: i, full: oneFi.Full, diff: FileMode, value: oneFi.Mode, ford: oneFi.Type, } } } else { q <- info{ path: args[i], index: i, full: oneFi.Full, diff: FileFull, value: oneFi.Full, ford: oneFi.Type, } } } } }(i, one) } // Async wait. go func() { wg.Wait() close(q) }() // Receive diff and store to array. for info := range q { cnt++ if !silent { fmt.Fprintf(os.Stderr, "Count: %d\r", cnt) } key := info.full + fmt.Sprint(info.diff) if _, ok := csvMap[key]; ok { csvMap[key][info.index+3] = info.value } else { s := make([]string, len(args)+3) s[0] = info.full s[1] = info.ford s[2] = fmt.Sprint(info.diff) s[info.index+3] = info.value csvMap[key] = s } } if len(csvMap) == 0 { fmt.Println("There is no difference !") return } // Output to csv. os.MkdirAll(filepath.Dir(out), os.ModePerm) c, err := os.Create(out) if err != nil { log.Fatalln(err) } defer c.Close() var writer *csv.Writer if sjisOut { writer = csv.NewWriter(transform.NewWriter(c, japanese.ShiftJIS.NewEncoder())) } else { writer = csv.NewWriter(c) } writer.Comma = '\t' writer.UseCRLF = true // Write header. err = writer.Write(append(strings.Split(DiffHeader, "\t"), args...)) if err != nil { log.Fatalln(err) } // map to array. var csvArray records for _, v := range csvMap { csvArray = append(csvArray, v) } // sort if sorts == "" { sorts = "0,2" } sort.Sort(csvArray) for _, v := range csvArray { err = writer.Write(v) if err != nil { log.Fatalln(err) } } writer.Flush() fmt.Printf("Write to [%s]. ([%d] row)\n", out, cnt) }
// NewWriter returns an io.Writer that converts LF line endings to CRLF. func NewWriter(w io.Writer) io.Writer { return transform.NewWriter(w, ToCRLF{}) }