Beispiel #1
1
// Convert unicode text to ASCII text
// using specific codepage mapping.
func convertUnicodeToAscii(text string,
	codepage encoding.Encoding) []byte {
	b := []byte(text)
	// fmt.Printf("Text length: %d\n", len(b))
	var buf bytes.Buffer
	if codepage == nil {
		codepage = charmap.Windows1252
	}
	w := transform.NewWriter(&buf, codepage.NewEncoder())
	defer w.Close()
	w.Write(b)
	// fmt.Printf("Buffer length: %d\n", len(buf.Bytes()))
	return buf.Bytes()
}
Beispiel #2
0
//「Golangで文字コード判定」qiita.com/nobuhito/items/ff782f64e32f7ed95e43
func transEnc(text string, encode string) (string, error) {
	body := []byte(text)
	var f []byte

	encodings := []string{"sjis", "utf-8"}
	if encode != "" {
		encodings = append([]string{encode}, encodings...)
	}
	for _, enc := range encodings {
		if enc != "" {
			ee, _ := charset.Lookup(enc)
			if ee == nil {
				continue
			}
			var buf bytes.Buffer
			ic := transform.NewWriter(&buf, ee.NewDecoder())
			_, err := ic.Write(body)
			if err != nil {
				continue
			}
			err = ic.Close()
			if err != nil {
				continue
			}
			f = buf.Bytes()
			break
		}
	}
	return string(f), nil
}
Beispiel #3
0
func TestReaderEncodings(t *testing.T) {
	for _, test := range tests {
		t.Logf("test codec: %v", test.encoding)

		codec, ok := findEncoding(test.encoding)
		if !ok {
			t.Errorf("can not find encoding '%v'", test.encoding)
			continue
		}

		buffer := bytes.NewBuffer(nil)

		// write with encoding to buffer
		writer := transform.NewWriter(buffer, codec.NewEncoder())
		var expectedCount []int
		for _, line := range test.strings {
			writer.Write([]byte(line))
			writer.Write([]byte{'\n'})
			expectedCount = append(expectedCount, buffer.Len())
		}

		// create line reader
		reader, err := newLineReader(buffer, codec, 1024)
		if err != nil {
			t.Errorf("failed to initialize reader: %v", err)
			continue
		}

		// read decodec lines from buffer
		var readLines []string
		var byteCounts []int
		current := 0
		for {
			bytes, sz, err := reader.next()
			if sz > 0 {
				readLines = append(readLines, string(bytes[:len(bytes)-1]))
			}

			if err != nil {
				break
			}

			current += sz
			byteCounts = append(byteCounts, current)
		}

		// validate lines and byte offsets
		if len(test.strings) != len(readLines) {
			t.Errorf("number of lines mismatch (expected=%v actual=%v)",
				len(test.strings), len(readLines))
			continue
		}
		for i := range test.strings {
			expected := test.strings[i]
			actual := readLines[i]
			assert.Equal(t, expected, actual)
			assert.Equal(t, expectedCount[i], byteCounts[i])
		}
	}
}
Beispiel #4
0
func getWriter(writer io.Writer) io.Writer {
	encoding := getEncoding()
	if encoding == nil {
		return writer
	}
	return transform.NewWriter(writer, encoding.NewEncoder())
}
// Shift-JIS -> UTF-8
func to_utf8(str string) (string, error) {
	body, err := ioutil.ReadAll(transform.NewReader(strings.NewReader(str), japanese.ShiftJIS.NewEncoder()))
	if err != nil {
		return "", err
	}

	var f []byte
	encodings := []string{"sjis", "utf-8"}
	for _, enc := range encodings {
		if enc != "" {
			ee, _ := charset.Lookup(enc)
			if ee == nil {
				continue
			}
			var buf bytes.Buffer
			ic := transform.NewWriter(&buf, ee.NewDecoder())
			_, err := ic.Write(body)
			if err != nil {
				continue
			}
			err = ic.Close()
			if err != nil {
				continue
			}
			f = buf.Bytes()
			break
		}
	}
	return string(f), nil
}
Beispiel #6
0
func TestReaderPartialWithEncodings(t *testing.T) {
	for _, test := range tests {
		t.Logf("test codec: %v", test.encoding)

		codecFactory, ok := encoding.FindEncoding(test.encoding)
		if !ok {
			t.Errorf("can not find encoding '%v'", test.encoding)
			continue
		}

		buffer := bytes.NewBuffer(nil)
		codec, _ := codecFactory(buffer)

		writer := transform.NewWriter(buffer, codec.NewEncoder())
		reader, err := newLineReader(buffer, codec, 1024)
		if err != nil {
			t.Errorf("failed to initialize reader: %v", err)
			continue
		}

		var expected []string
		var partials []string
		lastString := ""
		for _, str := range test.strings {
			writer.Write([]byte(str))
			lastString += str
			expected = append(expected, lastString)

			line, sz, err := reader.next()
			assert.NotNil(t, err)
			assert.Equal(t, 0, sz)
			assert.Nil(t, line)

			partial, _, err := reader.partial()
			partials = append(partials, string(partial))
			t.Logf("partials: %v", partials)
		}

		// finish line:
		writer.Write([]byte{'\n'})

		// finally read line
		line, _, err := reader.next()
		assert.Nil(t, err)
		t.Logf("line: '%v'", line)

		// validate partial lines
		if len(test.strings) != len(expected) {
			t.Errorf("number of lines mismatch (expected=%v actual=%v)",
				len(test.strings), len(partials))
			continue
		}
		for i := range expected {
			assert.Equal(t, expected[i], partials[i])
		}

		assert.Equal(t, lastString+"\n", string(line))
	}
}
Beispiel #7
0
func newWriter(option *Option) io.Writer {
	encoder := func() io.Writer {
		switch option.OutputEncode {
		case "sjis":
			return transform.NewWriter(os.Stdout, japanese.ShiftJIS.NewEncoder())
		case "euc":
			return transform.NewWriter(os.Stdout, japanese.EUCJP.NewEncoder())
		case "jis":
			return transform.NewWriter(os.Stdout, japanese.ISO2022JP.NewEncoder())
		default:
			return os.Stdout
		}
	}()
	if option.EnableColor {
		return ansicolor.NewAnsiColorWriter(encoder)
	}
	return encoder
}
Beispiel #8
0
func ConvTo(b []byte, e encoding.Encoding) (result []byte, err error) {
	w := new(bytes.Buffer)
	writer := transform.NewWriter(w, e.NewEncoder())
	defer writer.Close()

	if _, err = writer.Write(b); err != nil {
		return
	}
	return w.Bytes(), nil
}
Beispiel #9
0
func (r *ResultCSV) Do(ctx WebContext) error {
	w := ctx.ResponseWriter()
	w.Header().Set("Content-Type", "text/csv; char=utf-8")
	if r.Attachment {
		w.Header().Set("Content-Disposition", "attachment")
	}
	w.WriteHeader(r.Code)
	csvw := csv.NewWriter(transform.NewWriter(w, japanese.ShiftJIS.NewEncoder()))
	return csvw.WriteAll(r.Data)
}
Beispiel #10
0
func decodeBy(charset string, dst io.Writer) io.Writer {
	if "UTF-8" == strings.ToUpper(charset) || "UTF8" == strings.ToUpper(charset) {
		return dst
	}
	cs := GetCharset(charset)
	if nil == cs {
		panic("charset '" + charset + "' is not exists.")
	}

	return transform.NewWriter(dst, cs.NewDecoder())
}
Beispiel #11
0
func EncodeReader(s io.Reader, enc string) ([]byte, error) {
	e, _ := charset.Lookup(enc)
	if e == nil {
		return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc))
	}
	var buf bytes.Buffer
	writer := transform.NewWriter(&buf, e.NewEncoder())
	_, err := io.Copy(writer, s)
	if err != nil {
		return nil, err
	}
	return buf.Bytes(), nil
}
Beispiel #12
0
func saveDatasetAsCsv(dataset []Data, filePath string) {
	f, err := os.Create(filePath)
	if err != nil {
		log.Fatal(err)
	}
	defer f.Close()

	var w *csv.Writer
	if runtime.GOOS == "windows" {
		// on Windows, use Shift-JIS to open csv file via Microsoft Excel.
		converter := bufio.NewWriter(transform.NewWriter(f, japanese.ShiftJIS.NewEncoder()))
		w = csv.NewWriter(converter)
	} else {
		w = csv.NewWriter(f)
	}
	defer w.Flush()

	// Write header first
	header := []string{
		// "日付",
		"証券会社名",
		"n225_sell",
		"n225_buy",
		"n225_net",
		"topix_sell",
		"topix_buy",
		"topix_net",
		"net_total",
	}
	w.Write(header)

	// Write dataset
	for _, data := range dataset {
		var record []string
		// record = append(record, obj.InfoDate)
		record = append(record, data.Company)
		record = append(record, data.N225Sell)
		record = append(record, data.N225Buy)
		record = append(record, data.N225Net)
		record = append(record, data.TopixSell)
		record = append(record, data.TopixBuy)
		record = append(record, data.TopixNet)
		record = append(record, data.NetTotal)
		w.Write(record)
	}
}
//Conversion
func Conversion(inStream io.Reader, outStream io.Writer) error {
	//read from stream (Shift-JIS to UTF-8)
	scanner := bufio.NewScanner(transform.NewReader(inStream, japanese.ShiftJIS.NewDecoder()))
	list := make([]string, 0)
	for scanner.Scan() {
		list = append(list, scanner.Text())
	}
	if err := scanner.Err(); err != nil {
		return err
	}
	//write to stream (UTF-8 to EUC-JP)
	writer := bufio.NewWriter(transform.NewWriter(outStream, japanese.EUCJP.NewEncoder()))
	for _, line := range list {
		if _, err := fmt.Fprintln(writer, line); err != nil {
			return err
		}
	}
	return writer.Flush()
}
Beispiel #14
0
func (w *ReportWriter) writeCsv(reports []Report) error {
	wr := w.w
	if w.dialect.Encoding == "sjis" {
		log.Info("use ShiftJIS encoder for output.")
		encoder := japanese.ShiftJIS.NewEncoder()
		wr = transform.NewWriter(wr, encoder)
	}
	writer := csv.NewWriter(wr)
	if w.dialect.Comma != 0 {
		writer.Comma = w.dialect.Comma
	}
	for i, report := range reports {
		if i > 0 {
			writer.Write(nil)
		}
		log.Debugf("[%d] write csv file", i+1)
		w.writeCsvOne(writer, report)
	}
	writer.Flush()
	return writer.Error()
}
Beispiel #15
0
func encode(data []byte, charsetName string) ([]byte, error) {
	encoding, _ := charset.Lookup(charsetName)
	if encoding == nil {
		return nil, fmt.Errorf("Unsupported charset: %v", charsetName)
	}

	reader := bytes.NewReader(data)
	var b bytes.Buffer
	writer := bufio.NewWriter(&b)

	encodeWriter := transform.NewWriter(writer, encoding.NewEncoder())
	if _, err := io.Copy(encodeWriter, reader); err != nil {
		return nil, err
	}
	if err := writer.Flush(); err != nil {
		return nil, err
	}

	if isUTF8Charset(charsetName) {
		return addBOM(b.Bytes()), nil
	}
	return b.Bytes(), nil
}
Beispiel #16
0
// NewWriter returns a writer which encodes to the given encoding, utf8.
//
// If enc is nil, then only an utf8-enforcing replacement writer
// (see http://godoc.org/code.google.com/p/go.text/encoding#pkg-variables)
// is used.
func NewWriter(w io.Writer, enc encoding.Encoding) io.WriteCloser {
	if enc == nil || enc == encoding.Replacement {
		return transform.NewWriter(w, encoding.Replacement.NewEncoder())
	}
	return transform.NewWriter(w, transform.Chain(enc.NewEncoder()))
}
Beispiel #17
0
// Writer wraps another Writer to encode its UTF-8 output.
//
// The Encoder may not be used for any other operation as long as the returned
// Writer is in use.
func (e *Encoder) Writer(w io.Writer) io.Writer {
	return transform.NewWriter(w, e)
}
Beispiel #18
0
func main() {
	var args []string

	argv := os.Args
	argc := len(argv)
	for n := 1; n < argc; n++ {
		if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] != '-' {
			switch argv[n][1] {
			case 'A':
				if n < argc-1 {
					after, _ = strconv.Atoi(argv[n+1])
					n++
					continue
				}
			case 'B':
				if n < argc-1 {
					before, _ = strconv.Atoi(argv[n+1])
					n++
					continue
				}
			case '8':
				utf8out = true
			case 'F':
				fixed = true
			case 'R':
				recursive = true
			case 'S':
				verbose = true
			case 'c':
				count = true
			case 'r':
				fullpath = false
			case 'i':
				ignorecase = true
			case 'I':
				ignorebinary = true
			case 'l':
				list = true
			case 'n':
				number = true
			case 'P':
				perl = true
			case 'G':
				basic = true
			case 'v':
				invert = true
			case 'o':
				only = true
			case 'f':
				if n < argc-1 {
					infile = argv[n+1]
					n++
					continue
				}
			case 'z':
				zeroData = true
			case 'Z':
				zeroFile = true
			case 'V':
				fmt.Fprintf(os.Stdout, "%s\n", version)
				os.Exit(0)
			default:
				usage(true)
			}
			if len(argv[n]) > 2 {
				argv[n] = "-" + argv[n][2:]
				n--
			}
		} else if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] == '-' {
			name := argv[n][2:]
			switch {
			case strings.HasPrefix(name, "enc="):
				encs = name[4:]
			case name == "enc" && n < argc-1:
				encs = argv[n+1]
				n++
			case strings.HasPrefix(name, "exclude="):
				exclude = name[8:]
			case name == "exclude" && n < argc-1:
				exclude = argv[n+1]
				n++
			case strings.HasPrefix(name, "color="):
				color = name[6:]
			case name == "color" && n < argc-1:
				color = argv[n+1]
				n++
			case strings.HasPrefix(name, "separator="):
				separator = name[10:]
			case name == "separator":
				separator = argv[n+1]
				n++
			case name == "null":
				zeroFile = true
			case name == "null-data":
				zeroData = true
			case name == "help":
				usage(false)
			default:
				usage(true)
			}
		} else {
			args = append(args, argv[n])
		}
	}

	if len(args) == 0 {
		usage(true)
	}

	var err error
	var pattern interface{}
	if encs != "" {
		encodings = strings.Split(encs, ",")
	} else {
		enc_env := os.Getenv("JVGREP_ENCODINGS")
		if enc_env != "" {
			encodings = strings.Split(enc_env, ",")
		}
	}
	out_enc := os.Getenv("JVGREP_OUTPUT_ENCODING")
	if out_enc != "" {
		ee, _ := charset.Lookup(out_enc)
		if ee == nil {
			errorline(fmt.Sprintf("unknown encoding: %s", out_enc))
			os.Exit(1)
		}
		oc = transform.NewWriter(os.Stdout, ee.NewEncoder())
	}

	instr := ""
	argindex := 0
	if len(infile) > 0 {
		b, err := ioutil.ReadFile(infile)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		instr = strings.TrimSpace(string(b))
	} else {
		instr = args[0]
		argindex = 1
	}
	if fixed {
		pattern = instr
	} else if perl {
		re, err := syntax.Parse(instr, syntax.Perl)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		rec, err := syntax.Compile(re)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
		instr = rec.String()
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
	} else {
		if ignorecase {
			instr = "(?i:" + instr + ")"
		}
		pattern, err = regexp.Compile(instr)
		if err != nil {
			errorline(err.Error())
			os.Exit(1)
		}
	}

	if exclude == "" {
		exclude = os.Getenv("JVGREP_EXCLUDE")
	}
	if exclude == "" {
		exclude = excludeDefaults
	}
	ere, err := regexp.Compile(exclude)
	if err != nil {
		errorline(err.Error())
		os.Exit(1)
	}

	atty := false
	if color == "" {
		color = os.Getenv("JVGREP_COLOR")
	}
	if color == "" || color == "auto" {
		atty = isatty.IsTerminal(os.Stdout.Fd())
	} else if color == "always" {
		atty = true
	} else if color == "never" {
		atty = false
	} else {
		usage(true)
	}

	if atty {
		sc := make(chan os.Signal, 10)
		signal.Notify(sc, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP)
		go func() {
			for _ = range sc {
				ct.ResetColor()
				os.Exit(0)
			}
		}()
	}

	if len(args) == 1 && argindex != 0 {
		Grep(&GrepArg{
			pattern: pattern,
			input:   os.Stdin,
			single:  true,
			atty:    atty,
		})
		return
	}

	envre := regexp.MustCompile(`^(\$[a-zA-Z][a-zA-Z0-9_]+|\$\([a-zA-Z][a-zA-Z0-9_]+\))$`)
	globmask := ""

	ch := make(chan *GrepArg, 10)
	done := make(chan int)
	go GoGrep(ch, done)
	nargs := len(args[argindex:])
	for _, arg := range args[argindex:] {
		globmask = ""
		root := ""
		arg = strings.Trim(arg, `"`)
		for n, i := range strings.Split(filepath.ToSlash(arg), "/") {
			if root == "" && strings.Index(i, "*") != -1 {
				if globmask == "" {
					root = "."
				} else {
					root = filepath.ToSlash(globmask)
				}
			}
			if n == 0 && i == "~" {
				if runtime.GOOS == "windows" {
					i = os.Getenv("USERPROFILE")
				} else {
					i = os.Getenv("HOME")
				}
			}
			if envre.MatchString(i) {
				i = strings.Trim(strings.Trim(os.Getenv(i[1:]), "()"), `"`)
			}

			globmask = filepath.Join(globmask, i)
			if n == 0 {
				if runtime.GOOS == "windows" && filepath.VolumeName(i) != "" {
					globmask = i + "/"
				} else if len(globmask) == 0 {
					globmask = "/"
				}
			}
		}
		if root == "" {
			path, _ := filepath.Abs(arg)
			fi, err := os.Stat(path)
			if err != nil {
				errorline(fmt.Sprintf("jvgrep: %s: No such file or directory", arg))
				os.Exit(1)
			}
			if !fi.IsDir() {
				if verbose {
					println("search:", path)
				}
				ch <- &GrepArg{
					pattern: pattern,
					input:   path,
					single:  nargs == 1,
					atty:    atty,
				}
				continue
			} else {
				root = path
				if fi.IsDir() {
					globmask = "**/*"
				} else {
					globmask = "**/" + globmask
				}
			}
		}
		if globmask == "" {
			globmask = "."
		}
		globmask = filepath.ToSlash(filepath.Clean(globmask))
		if recursive {
			if strings.Index(globmask, "/") > -1 {
				globmask += "/"
			} else {
				globmask = "**/" + globmask
			}
		}

		cc := []rune(globmask)
		dirmask := ""
		filemask := ""
		for i := 0; i < len(cc); i++ {
			if cc[i] == '*' {
				if i < len(cc)-2 && cc[i+1] == '*' && cc[i+2] == '/' {
					filemask += "(.*/)?"
					dirmask = filemask
					i += 2
				} else {
					filemask += "[^/]*"
				}
			} else {
				c := cc[i]
				if c == '/' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || 255 < c {
					filemask += string(c)
				} else {
					filemask += fmt.Sprintf("[\\x%x]", c)
				}
				if c == '/' && dirmask == "" && strings.Index(filemask, "*") != -1 {
					dirmask = filemask
				}
			}
		}
		if dirmask == "" {
			dirmask = filemask
		}
		if len(filemask) > 0 && filemask[len(filemask)-1] == '/' {
			if root == "" {
				root = filemask
			}
			filemask += "[^/]*"
		}
		if runtime.GOOS == "windows" || runtime.GOOS == "darwin" {
			dirmask = "(?i:" + dirmask + ")"
			filemask = "(?i:" + filemask + ")"
		}
		dre := regexp.MustCompile("^" + dirmask)
		fre := regexp.MustCompile("^" + filemask + "$")

		root = filepath.Clean(root)

		if verbose {
			println("dirmask:", dirmask)
			println("filemask:", filemask)
			println("root:", root)
		}
		filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
			if info == nil {
				return err
			}

			path = filepath.ToSlash(path)

			if ere != nil && ere.MatchString(path) {
				if info.IsDir() {
					return filepath.SkipDir
				}
				return nil
			}

			if info.IsDir() {
				if path == "." || recursive || len(path) <= len(root) || dre.MatchString(path+"/") {
					return nil
				}
				return filepath.SkipDir
			}

			if fre.MatchString(path) && info.Mode().IsRegular() {
				if verbose {
					println("search:", path)
				}
				ch <- &GrepArg{
					pattern: pattern,
					input:   path,
					single:  false,
					atty:    atty,
				}
			}
			return nil
		})
	}
	ch <- nil
	if count {
		fmt.Println(countMatch)
	}
	<-done
}
Beispiel #19
0
func doGrep(path string, f []byte, arg *GrepArg) {
	encs := encodings

	if ignorebinary {
		if bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 {
			return
		}
	}

	if len(f) > 2 {
		if f[0] == 0xfe && f[1] == 0xff {
			arg.bom = f[0:2]
			f = f[2:]
		} else if f[0] == 0xff && f[1] == 0xfe {
			arg.bom = f[0:2]
			f = f[2:]
		}
	}
	if len(arg.bom) > 0 {
		if arg.bom[0] == 0xfe && arg.bom[1] == 0xff {
			encs = []string{"utf-16be"}
		} else if arg.bom[0] == 0xff && arg.bom[1] == 0xfe {
			encs = []string{"utf-16le"}
		}
	}

	for _, enc := range encs {
		if verbose {
			println("trying("+enc+"):", path)
		}
		if len(arg.bom) > 0 && enc != "utf-16be" && enc != "utf-16le" {
			continue
		}

		did := false
		var t []byte
		var n, l, size, next, prev int

		if enc != "" {
			if len(arg.bom) > 0 || bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) == -1 {
				ee, _ := charset.Lookup(enc)
				if ee == nil {
					continue
				}
				var buf bytes.Buffer
				ic := transform.NewWriter(&buf, ee.NewDecoder())
				_, err := ic.Write(f)
				if err != nil {
					next = -1
					continue
				}
				lf := false
				if len(arg.bom) > 0 && len(f)%2 != 0 {
					ic.Write([]byte{0})
					lf = true
				}
				err = ic.Close()
				if err != nil {
					if verbose {
						println(err.Error())
					}
					next = -1
					continue
				}
				f = buf.Bytes()
				if lf {
					f = f[:len(f)-1]
				}
			}
		}
		size = len(f)
		if size == 0 {
			continue
		}

		for next != -1 {
			for {
				if next >= size {
					next = -1
					break
				}
				if f[next] == '\n' {
					break
				}
				next++
			}
			n++
			if next == -1 {
				t = f[prev:]
			} else {
				t = f[prev:next]
				prev = next + 1
				next++
			}

			l = len(t)
			if l > 0 && t[l-1] == '\r' {
				t = t[:l-1]
				l--
			}

			var match bool
			if only {
				var matches []string
				ts := string(t)
				if re, ok := arg.pattern.(*regexp.Regexp); ok {
					matches = re.FindAllString(ts, -1)
				} else if s, ok := arg.pattern.(string); ok {
					if ignorecase {
						ts = strings.ToLower(ts)
					}
					ti := 0
					tl := len(ts)
					for ti != -1 && ti < tl-1 {
						ti = strings.Index(ts[ti:], s)
						if ti != -1 {
							matches = append(matches, s)
							ti++
						}
					}
				}
				match = len(matches) > 0
				// skip if not match without invert, or match with invert.
				if match == invert {
					continue
				}
				if verbose {
					println("found("+enc+"):", path)
				}
				if list {
					matchedfile(path)
					did = true
					break
				}
				for _, m := range matches {
					countMatch++
					if count {
						continue
					}
					if strings.IndexFunc(
						m, func(r rune) bool {
							return 0 < r && r < 0x9
						}) != -1 {
						errorline(fmt.Sprintf("matched binary file: %s", path))
						did = true
						break
					} else {
						if number {
							if utf8.ValidString(m) {
								matchedline(path, n, m, arg)
							} else {
								errorline(fmt.Sprintf("matched binary file: %s", path))
								did = true
								break
							}
						} else {
							if utf8.ValidString(m) {
								matchedline("", 0, m, arg)
							} else {
								errorline(fmt.Sprintf("matched binary file: %s", path))
								did = true
								break
							}
						}
					}
				}
			} else {
				if re, ok := arg.pattern.(*regexp.Regexp); ok {
					if len(re.FindAllIndex(t, 1)) > 0 {
						match = true
					}
				} else if s, ok := arg.pattern.(string); ok {
					if ignorecase {
						if strings.Index(strings.ToLower(string(t)),
							strings.ToLower(s)) > -1 {
							match = true
						}
					} else {
						if strings.Index(string(t), s) > -1 {
							match = true
						}
					}
				}
				// skip if not match without invert, or match with invert.
				if match == invert {
					continue
				}
				if verbose {
					println("found("+enc+"):", path)
				}
				if list {
					matchedfile(path)
					did = true
					break
				}
				countMatch++
				if count {
					did = true
					continue
				}
				if arg.single && !number {
					if utf8.Valid(t) {
						matchedline("", -1, string(t), arg)
					} else {
						errorline(fmt.Sprintf("matched binary file: %s", path))
						did = true
						break
					}
				} else {
					if bytes.IndexFunc(
						t, func(r rune) bool {
							return 0 < r && r < 0x9
						}) != -1 {
						errorline(fmt.Sprintf("matched binary file: %s", path))
						did = true
						break
					} else if utf8.Valid(t) {
						if after <= 0 && before <= 0 {
							matchedline(path, n, string(t), arg)
						} else {
							if countMatch > 1 {
								os.Stdout.WriteString("---\n")
							}
							bprev, bnext := next-l-2, next-l-2
							lines := make([]string, 0)
							for i := 0; i < before && bprev > 0; i++ {
								for {
									if bprev == 0 || f[bprev-1] == '\n' {
										lines = append(lines, string(f[bprev:bnext]))
										bnext = bprev - 1
										bprev--
										break
									}
									bprev--
								}
							}
							for i := len(lines); i > 0; i-- {
								matchedline(path, i-n, lines[i-1], arg)
							}
							matchedline(path, n, string(t), arg)
							lines = make([]string, 0)
							aprev, anext := next, next
							for i := 0; i < after && anext >= 0 && anext < size; i++ {
								for {
									if anext == size || f[anext] == '\n' {
										lines = append(lines, string(f[aprev:anext]))
										aprev = anext + 1
										anext++
										break
									}
									anext++
								}
							}
							for i := 0; i < len(lines); i++ {
								matchedline(path, -n-i-1, lines[i], arg)
							}
						}
					} else {
						errorline(fmt.Sprintf("matched binary file: %s", path))
						did = true
						break
					}
				}
			}
			did = true
		}
		runtime.GC()
		if did || next == -1 {
			break
		}
	}
}
Beispiel #20
0
func TestUtf16BOMEncodings(t *testing.T) {
	expectedLE := utf16Map[littleEndian]
	expectedBE := utf16Map[bigEndian]

	var tests = []struct {
		name             string
		testEndianess    unicode.Endianness
		testBOMPolicy    unicode.BOMPolicy
		expectedEncoding Encoding
		expectedError    error
		expectedOffset   int
	}{
		{"utf-16-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
		{"utf-16-bom",
			unicode.BigEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0},
		{"utf-16-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},
		{"utf-16-bom",
			unicode.LittleEndian, unicode.IgnoreBOM, nil, unicode.ErrMissingBOM, 0},

		// big endian based encoding
		{"utf-16be-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
		{"utf-16be-bom",
			unicode.BigEndian, unicode.IgnoreBOM, expectedBE, nil, 0},
		{"utf-16be-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},

		// little endian baed encoding
		{"utf-16le-bom",
			unicode.LittleEndian, unicode.ExpectBOM, expectedLE, nil, 2},
		{"utf-16le-bom",
			unicode.LittleEndian, unicode.IgnoreBOM, expectedLE, nil, 0},
		{"utf-16le-bom",
			unicode.BigEndian, unicode.ExpectBOM, expectedBE, nil, 2},
	}

	text := []byte("hello world")

	for _, test := range tests {
		t.Logf("testing: codec=%v, bigendian=%v, bomPolicy=%v",
			test.name, test.testEndianess, test.testBOMPolicy)

		buf := bytes.NewBuffer(nil)
		writeEncoding := unicode.UTF16(test.testEndianess, test.testBOMPolicy)
		writer := transform.NewWriter(buf, writeEncoding.NewEncoder())
		writer.Write(text)
		writer.Close()

		rawReader := bytes.NewReader(buf.Bytes())
		contentLen := rawReader.Len()
		encodingFactory, ok := FindEncoding(test.name)
		if !ok {
			t.Errorf("Failed to load encoding: %v", test.name)
			continue
		}

		encoding, err := encodingFactory(rawReader)
		contentOffset := contentLen - rawReader.Len()

		assert.Equal(t, test.expectedEncoding, encoding)
		assert.Equal(t, test.expectedError, err)
		assert.Equal(t, test.expectedOffset, contentOffset)
		if err == nil {
			reader := transform.NewReader(rawReader, encoding.NewDecoder())
			content, _ := ioutil.ReadAll(reader)
			assert.Equal(t, text, content)
		}
	}
}
Beispiel #21
0
func executeDiff(cmd *cobra.Command, args []string) {

	var (
		err error

		match  *regexp.Regexp
		ignore *regexp.Regexp

		csvMap  = make(map[string][]string)
		fisList = make([]FileInfos, 0)
		q       = make(chan info)
		wg      = new(sync.WaitGroup)
	)

	if len(args) == 0 {
		cmd.Help()
		return
	}

	// Get glob file args.
	args, err = core.GetGlobArgs(args)
	if err != nil {
		log.Fatalln(err)
	}

	// Recheck args.
	if len(args) <= 1 {
		cmd.Help()
		return
	}

	// Load csv and store.
	for _, csvPath := range args {
		fmt.Println("Open:", csvPath)
		c, err := os.Open(csvPath)
		if err != nil {
			log.Fatalln(err)
		}
		defer c.Close()
		var reader *csv.Reader
		if sjisIn {
			reader = csv.NewReader(transform.NewReader(c, japanese.ShiftJIS.NewDecoder()))
		} else {
			reader = csv.NewReader(c)
		}
		reader.Comma = '\t'
		// Skip header.
		_, err = reader.Read()
		if err != nil {
			log.Fatalln(err)
		}
		left, err := reader.ReadAll()
		if err != nil {
			log.Fatalln(err)
		}

		// Change data to FileInfos struct.
		fis := make(FileInfos, 0)
		for _, r := range left {
			fis = append(fis, *csvToFileInfo(r))
		}
		fisList = append(fisList, fis)
	}

	// Compile if given matches and ignores.
	if len(matches) != 0 {
		match, err = core.CompileStrs(matches)
		if err != nil {
			log.Fatalln(err)
		}
	}
	if len(ignores) != 0 {
		ignore, err = core.CompileStrs(ignores)
		if err != nil {
			log.Fatalln(err)
		}
	}

	for i, one := range fisList {
		wg.Add(1)
		go func(i int, one FileInfos) {
			defer wg.Done()

			// Diff fileinfo.
			for _, oneFi := range one {
				if fileOnly && oneFi.Type == DIR {
					continue
				}
				if dirOnly && oneFi.Type == FILE {
					continue
				}

				// Ignore check.
				if ignore != nil && ignore.MatchString(oneFi.Full) {
					continue
				}

				// Match check.
				if match != nil && !match.MatchString(oneFi.Full) {
					continue
				}

				for j, other := range fisList {
					if i == j {
						continue
					}

					// Get other's same full path info.
					otherFi, err := findFileInfo(other, oneFi)
					if err == nil {
						// Diff Time.
						if oneFi.Time != otherFi.Time {
							q <- info{
								path:  args[i],
								index: i,
								full:  oneFi.Full,
								diff:  FileTime,
								value: oneFi.Time,
								ford:  oneFi.Type,
							}
						}
						// Diff Size.
						if oneFi.Size != otherFi.Size {
							q <- info{
								path:  args[i],
								index: i,
								full:  oneFi.Full,
								diff:  FileSize,
								value: oneFi.Size,
								ford:  oneFi.Type,
							}
						}
						// Diff Mode.
						if oneFi.Mode != otherFi.Mode {
							q <- info{
								path:  args[i],
								index: i,
								full:  oneFi.Full,
								diff:  FileMode,
								value: oneFi.Mode,
								ford:  oneFi.Type,
							}
						}
					} else {
						q <- info{
							path:  args[i],
							index: i,
							full:  oneFi.Full,
							diff:  FileFull,
							value: oneFi.Full,
							ford:  oneFi.Type,
						}
					}
				}
			}
		}(i, one)
	}

	// Async wait.
	go func() {
		wg.Wait()
		close(q)
	}()

	// Receive diff and store to array.
	for info := range q {
		cnt++
		if !silent {
			fmt.Fprintf(os.Stderr, "Count: %d\r", cnt)
		}
		key := info.full + fmt.Sprint(info.diff)
		if _, ok := csvMap[key]; ok {
			csvMap[key][info.index+3] = info.value
		} else {
			s := make([]string, len(args)+3)
			s[0] = info.full
			s[1] = info.ford
			s[2] = fmt.Sprint(info.diff)
			s[info.index+3] = info.value
			csvMap[key] = s
		}
	}

	if len(csvMap) == 0 {
		fmt.Println("There is no difference !")
		return
	}

	// Output to csv.
	os.MkdirAll(filepath.Dir(out), os.ModePerm)
	c, err := os.Create(out)
	if err != nil {
		log.Fatalln(err)
	}
	defer c.Close()
	var writer *csv.Writer
	if sjisOut {
		writer = csv.NewWriter(transform.NewWriter(c, japanese.ShiftJIS.NewEncoder()))
	} else {
		writer = csv.NewWriter(c)
	}
	writer.Comma = '\t'
	writer.UseCRLF = true

	// Write header.
	err = writer.Write(append(strings.Split(DiffHeader, "\t"), args...))
	if err != nil {
		log.Fatalln(err)
	}

	// map to array.
	var csvArray records
	for _, v := range csvMap {
		csvArray = append(csvArray, v)
	}

	// sort
	if sorts == "" {
		sorts = "0,2"
	}
	sort.Sort(csvArray)

	for _, v := range csvArray {
		err = writer.Write(v)
		if err != nil {
			log.Fatalln(err)
		}
	}
	writer.Flush()
	fmt.Printf("Write to [%s]. ([%d] row)\n", out, cnt)
}
Beispiel #22
0
// NewWriter returns an io.Writer that converts LF line endings to CRLF.
func NewWriter(w io.Writer) io.Writer {
	return transform.NewWriter(w, ToCRLF{})
}