//「Golangで文字コード判定」qiita.com/nobuhito/items/ff782f64e32f7ed95e43 func transEnc(text string, encode string) (string, error) { body := []byte(text) var f []byte encodings := []string{"sjis", "utf-8"} if encode != "" { encodings = append([]string{encode}, encodings...) } for _, enc := range encodings { if enc != "" { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(body) if err != nil { continue } err = ic.Close() if err != nil { continue } f = buf.Bytes() break } } return string(f), nil }
func defaultCharsetReader(cs string, input io.Reader) (io.Reader, error) { e, _ := charset.Lookup(cs) if e == nil { return nil, fmt.Errorf("cannot decode charset %v", cs) } return transform.NewReader(input, e.NewDecoder()), nil }
// Encoding returns an Encoding for the response body. func (res *Response) Encoding() (encoding.Encoding, error) { enc, _ := charset.Lookup(res.Charset) if enc == nil { return nil, fmt.Errorf("no encoding found for %s", res.Charset) } return enc, nil }
func ToUtf8WithErr(content []byte) (error, string) { charsetLabel, err := DetectEncoding(content) if err != nil { return err, "" } if charsetLabel == "utf8" { return nil, string(content) } encoding, _ := charset.Lookup(charsetLabel) if encoding == nil { return fmt.Errorf("unknow char decoder %s", charsetLabel), string(content) } result, n, err := transform.String(encoding.NewDecoder(), string(content)) // If there is an error, we concatenate the nicely decoded part and the // original left over. This way we won't loose data. if err != nil { result = result + string(content[n:]) } return err, result }
// Shift-JIS -> UTF-8 func to_utf8(str string) (string, error) { body, err := ioutil.ReadAll(transform.NewReader(strings.NewReader(str), japanese.ShiftJIS.NewEncoder())) if err != nil { return "", err } var f []byte encodings := []string{"sjis", "utf-8"} for _, enc := range encodings { if enc != "" { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(body) if err != nil { continue } err = ic.Close() if err != nil { continue } f = buf.Bytes() break } } return string(f), nil }
func NewUTF8Reader(label string, r io.Reader) (io.Reader, error) { e, _ := charset.Lookup(label) if e == nil { return nil, fmt.Errorf("unsupported charset: %q", label) } return transform.NewReader(r, unicode.BOMOverride(e.NewDecoder())), nil }
func encodeString(input_str, encode string) (output string, err_out error) { enc, _ := charset.Lookup(encode) r := transform.NewReader(strings.NewReader(input_str), enc.NewEncoder()) b, err := ioutil.ReadAll(r) if err != nil { return } return string(b), nil }
func parseHTML(content []byte, cs string) (*html.Node, error) { var r io.Reader = bytes.NewReader(content) if cs != "utf-8" { e, _ := charset.Lookup(cs) r = transform.NewReader(r, e.NewDecoder()) } return html.Parse(r) }
// DecodeCharset detects charset of str decodes it. func decodeCharset(str, label string) (nstr string, err error) { enc, _ := charset.Lookup(label) if enc == nil { enc, _, _ = charset.DetermineEncoding([]byte(str), "text/plain") } nstr, _, err = transform.String(enc.NewDecoder(), str) if err != nil { return nstr, err } return stripNonUTF8(nstr), nil }
func EncodeReader(s io.Reader, enc string) ([]byte, error) { e, _ := charset.Lookup(enc) if e == nil { return nil, errors.New(fmt.Sprintf("unsupported charset: %q", enc)) } var buf bytes.Buffer writer := transform.NewWriter(&buf, e.NewEncoder()) _, err := io.Copy(writer, s) if err != nil { return nil, err } return buf.Bytes(), nil }
// scanContent scans the content of a document for phrases, // and updates tally. func (conf *config) scanContent(content []byte, contentType, cs string, tally map[rule]int) { if strings.Contains(contentType, "javascript") { conf.scanJSContent(content, tally) return } transformers := make([]transform.Transformer, 0, 3) if cs != "utf-8" { e, _ := charset.Lookup(cs) transformers = append(transformers, e.NewDecoder()) } if strings.Contains(contentType, "html") { transformers = append(transformers, entityDecoder{}) } transformers = append(transformers, new(wordTransformer)) ps := newPhraseScanner(conf.ContentPhraseList, func(s string) { tally[rule{t: contentPhrase, content: s}]++ }) ps.scanByte(' ') var t transform.Transformer if len(transformers) == 1 { t = transformers[0] } else { t = transform.Chain(transformers...) } r := transform.NewReader(bytes.NewReader(content), t) buf := make([]byte, 4096) for { n, err := r.Read(buf) for _, c := range buf[:n] { ps.scanByte(c) } if err != nil { if err != io.EOF { log.Println("Error decoding page content:", err) } break } } ps.scanByte(' ') }
func convertToUtf8(s string) string { b := []byte(s) d := chardet.NewTextDetector() r, err := d.DetectBest(b) if err != nil { return fmt.Sprintf("<Can't detect string charset: %s>", err.Error()) } encoding, _ := charset.Lookup(r.Charset) if encoding == nil { return fmt.Sprintf("<Can't find encoding: %s>", r.Charset) } str, _, err := transform.String(encoding.NewDecoder(), s) if err != nil { return fmt.Sprintf("<Can't convert string from encoding %s to UTF8: %s>", r.Charset, err.Error()) } return str }
func (r *Response) convToUTF8(preview []byte, query func(*url.URL) string) { // Convert to UTF-8 if media.IsHTML(r.ContentType) { e, name, certain := charset.DetermineEncoding( preview, r.ContentType, ) // according to charset package source, default unknown charset is windows-1252. if !certain && name == "windows-1252" { if e, name = charset.Lookup(query(r.URL)); e != nil { certain = true } } r.Charset, r.CertainCharset, r.Encoding = name, certain, e if name != "" && e != nil { r.Body, _ = util.NewUTF8Reader(name, r.Body) } } }
func decode(data []byte, charsetName string) ([]byte, error) { encoding, _ := charset.Lookup(charsetName) if encoding == nil { return nil, fmt.Errorf("Unsupported charset: %v", charsetName) } reader := bytes.NewReader(data) var b bytes.Buffer writer := bufio.NewWriter(&b) decodeReader := transform.NewReader(reader, encoding.NewDecoder()) if _, err := io.Copy(writer, decodeReader); err != nil { return nil, err } if err := writer.Flush(); err != nil { return nil, err } if isUTF8Charset(charsetName) { return stripBOM(b.Bytes()), nil } return b.Bytes(), nil }
func main() { flag.Parse() var in io.Reader in = os.Stdin if *cs != "utf-8" { e, _ := charset.Lookup(*cs) in = transform.NewReader(in, e.NewDecoder()) } s := bufio.NewScanner(in) for s.Scan() { line := s.Text() if strings.Contains(line, ">,<") { continue } endPhrase := strings.Index(line, "><") if endPhrase != -1 { phrase := line[:endPhrase+1] rest := line[endPhrase+2:] endScore := strings.Index(rest, ">") if endScore != -1 { score := rest[:endScore] rest = strings.TrimSpace(rest[endScore+1:]) fmt.Println(phrase, score, rest) continue } } fmt.Println(line) } if err := s.Err(); err != nil { log.Println(err) } }
// UTF8encode converts a string from the source character set to UTF-8, skipping invalid byte sequences // @see http://stackoverflow.com/questions/32512500/ignore-illegal-bytes-when-decoding-text-with-go func UTF8encode(raw string, sourceCharset string) string { enc, name := charset.Lookup(sourceCharset) if nil == enc { fmt.Println("Cannot convert from", sourceCharset, ":", name) return raw } dst := make([]byte, len(raw)) d := enc.NewDecoder() var ( in int out int ) for in < len(raw) { // Do the transformation ndst, nsrc, err := d.Transform(dst[out:], []byte(raw[in:]), true) in += nsrc out += ndst if err == nil { // Completed transformation break } if err == transform.ErrShortDst { // Our output buffer is too small, so we need to grow it t := make([]byte, (cap(dst)+1)*2) copy(t, dst) dst = t continue } // We're here because of at least one illegal character. Skip over the current rune // and try again. _, width := utf8.DecodeRuneInString(raw[in:]) in += width } return string(dst) }
func ParsePatch(pid int64, maxlines int, cmd *exec.Cmd, reader io.Reader) (*Diff, error) { scanner := bufio.NewScanner(reader) var ( curFile *DiffFile curSection = &DiffSection{ Lines: make([]*DiffLine, 0, 10), } leftLine, rightLine int isTooLong bool // FIXME: Should use cache in the future. buf bytes.Buffer ) diff := &Diff{Files: make([]*DiffFile, 0)} var i int for scanner.Scan() { line := scanner.Text() // fmt.Println(i, line) if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") { continue } if line == "" { continue } i = i + 1 // Diff data too large, we only show the first about maxlines lines if i == maxlines { isTooLong = true log.Warn("Diff data too large") } switch { case line[0] == ' ': diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '@': if isTooLong { break } curSection = &DiffSection{} curFile.Sections = append(curFile.Sections, curSection) ss := strings.Split(line, "@@") diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line} curSection.Lines = append(curSection.Lines, diffLine) // Parse line number. ranges := strings.Split(ss[1][1:], " ") leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int() if len(ranges) > 1 { rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int() } else { log.Warn("Parse line number failed: %v", line) rightLine = leftLine } continue case line[0] == '+': curFile.Addition++ diff.TotalAddition++ diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine} rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '-': curFile.Deletion++ diff.TotalDeletion++ diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine} if leftLine > 0 { leftLine++ } curSection.Lines = append(curSection.Lines, diffLine) case strings.HasPrefix(line, "Binary"): curFile.IsBin = true continue } // Get new file. if strings.HasPrefix(line, DIFF_HEAD) { if isTooLong { break } beg := len(DIFF_HEAD) a := line[beg : (len(line)-beg)/2+beg] // In case file name is surrounded by double quotes(it happens only in git-shell). if a[0] == '"' { a = a[1 : len(a)-1] a = strings.Replace(a, `\"`, `"`, -1) } curFile = &DiffFile{ Name: a[strings.Index(a, "/")+1:], Index: len(diff.Files) + 1, Type: DIFF_FILE_CHANGE, Sections: make([]*DiffSection, 0, 10), } diff.Files = append(diff.Files, curFile) // Check file diff type. for scanner.Scan() { switch { case strings.HasPrefix(scanner.Text(), "new file"): curFile.Type = DIFF_FILE_ADD curFile.IsDeleted = false curFile.IsCreated = true case strings.HasPrefix(scanner.Text(), "deleted"): curFile.Type = DIFF_FILE_DEL curFile.IsCreated = false curFile.IsDeleted = true case strings.HasPrefix(scanner.Text(), "index"): curFile.Type = DIFF_FILE_CHANGE curFile.IsCreated = false curFile.IsDeleted = false } if curFile.Type > 0 { break } } } } for _, f := range diff.Files { buf.Reset() for _, sec := range f.Sections { for _, l := range sec.Lines { buf.WriteString(l.Content) buf.WriteString("\n") } } charsetLabel, err := base.DetectEncoding(buf.Bytes()) if charsetLabel != "UTF-8" && err == nil { encoding, _ := charset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, sec := range f.Sections { for _, l := range sec.Lines { if c, _, err := transform.String(d, l.Content); err == nil { l.Content = c } } } } } } return diff, nil }
func ParsePatch(maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) (*Diff, error) { var ( diff = &Diff{Files: make([]*DiffFile, 0)} curFile *DiffFile curSection = &DiffSection{ Lines: make([]*DiffLine, 0, 10), } leftLine, rightLine int lineCount int curFileLinesCount int ) input := bufio.NewReader(reader) isEOF := false for !isEOF { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { isEOF = true } else { return nil, fmt.Errorf("ReadString: %v", err) } } if len(line) > 0 && line[len(line)-1] == '\n' { // Remove line break. line = line[:len(line)-1] } if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 { continue } curFileLinesCount++ lineCount++ // Diff data too large, we only show the first about maxlines lines if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres { curFile.IsIncomplete = true } switch { case line[0] == ' ': diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '@': curSection = &DiffSection{} curFile.Sections = append(curFile.Sections, curSection) ss := strings.Split(line, "@@") diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line} curSection.Lines = append(curSection.Lines, diffLine) // Parse line number. ranges := strings.Split(ss[1][1:], " ") leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int() if len(ranges) > 1 { rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int() } else { log.Warn("Parse line number failed: %v", line) rightLine = leftLine } continue case line[0] == '+': curFile.Addition++ diff.TotalAddition++ diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine} rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '-': curFile.Deletion++ diff.TotalDeletion++ diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine} if leftLine > 0 { leftLine++ } curSection.Lines = append(curSection.Lines, diffLine) case strings.HasPrefix(line, "Binary"): curFile.IsBin = true continue } // Get new file. if strings.HasPrefix(line, DIFF_HEAD) { middle := -1 // Note: In case file name is surrounded by double quotes (it happens only in git-shell). // e.g. diff --git "a/xxx" "b/xxx" hasQuote := line[len(DIFF_HEAD)] == '"' if hasQuote { middle = strings.Index(line, ` "b/`) } else { middle = strings.Index(line, " b/") } beg := len(DIFF_HEAD) a := line[beg+2 : middle] b := line[middle+3:] if hasQuote { a = string(git.UnescapeChars([]byte(a[1 : len(a)-1]))) b = string(git.UnescapeChars([]byte(b[1 : len(b)-1]))) } curFile = &DiffFile{ Name: a, Index: len(diff.Files) + 1, Type: DIFF_FILE_CHANGE, Sections: make([]*DiffSection, 0, 10), } diff.Files = append(diff.Files, curFile) if len(diff.Files) >= maxFiles { diff.IsIncomplete = true io.Copy(ioutil.Discard, reader) break } curFileLinesCount = 0 // Check file diff type and is submodule. for { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { isEOF = true } else { return nil, fmt.Errorf("ReadString: %v", err) } } switch { case strings.HasPrefix(line, "new file"): curFile.Type = DIFF_FILE_ADD curFile.IsCreated = true case strings.HasPrefix(line, "deleted"): curFile.Type = DIFF_FILE_DEL curFile.IsDeleted = true case strings.HasPrefix(line, "index"): curFile.Type = DIFF_FILE_CHANGE case strings.HasPrefix(line, "similarity index 100%"): curFile.Type = DIFF_FILE_RENAME curFile.IsRenamed = true curFile.OldName = curFile.Name curFile.Name = b } if curFile.Type > 0 { if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } break } } } } // FIXME: detect encoding while parsing. var buf bytes.Buffer for _, f := range diff.Files { buf.Reset() for _, sec := range f.Sections { for _, l := range sec.Lines { buf.WriteString(l.Content) buf.WriteString("\n") } } charsetLabel, err := base.DetectEncoding(buf.Bytes()) if charsetLabel != "UTF-8" && err == nil { encoding, _ := charset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, sec := range f.Sections { for _, l := range sec.Lines { if c, _, err := transform.String(d, l.Content); err == nil { l.Content = c } } } } } } return diff, nil }
func main() { var args []string argv := os.Args argc := len(argv) for n := 1; n < argc; n++ { if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] != '-' { switch argv[n][1] { case 'A': if n < argc-1 { after, _ = strconv.Atoi(argv[n+1]) n++ continue } case 'B': if n < argc-1 { before, _ = strconv.Atoi(argv[n+1]) n++ continue } case '8': utf8out = true case 'F': fixed = true case 'R': recursive = true case 'S': verbose = true case 'c': count = true case 'r': fullpath = false case 'i': ignorecase = true case 'I': ignorebinary = true case 'l': list = true case 'n': number = true case 'P': perl = true case 'G': basic = true case 'v': invert = true case 'o': only = true case 'f': if n < argc-1 { infile = argv[n+1] n++ continue } case 'z': zeroData = true case 'Z': zeroFile = true case 'V': fmt.Fprintf(os.Stdout, "%s\n", version) os.Exit(0) default: usage(true) } if len(argv[n]) > 2 { argv[n] = "-" + argv[n][2:] n-- } } else if len(argv[n]) > 1 && argv[n][0] == '-' && argv[n][1] == '-' { name := argv[n][2:] switch { case strings.HasPrefix(name, "enc="): encs = name[4:] case name == "enc" && n < argc-1: encs = argv[n+1] n++ case strings.HasPrefix(name, "exclude="): exclude = name[8:] case name == "exclude" && n < argc-1: exclude = argv[n+1] n++ case strings.HasPrefix(name, "color="): color = name[6:] case name == "color" && n < argc-1: color = argv[n+1] n++ case strings.HasPrefix(name, "separator="): separator = name[10:] case name == "separator": separator = argv[n+1] n++ case name == "null": zeroFile = true case name == "null-data": zeroData = true case name == "help": usage(false) default: usage(true) } } else { args = append(args, argv[n]) } } if len(args) == 0 { usage(true) } var err error var pattern interface{} if encs != "" { encodings = strings.Split(encs, ",") } else { enc_env := os.Getenv("JVGREP_ENCODINGS") if enc_env != "" { encodings = strings.Split(enc_env, ",") } } out_enc := os.Getenv("JVGREP_OUTPUT_ENCODING") if out_enc != "" { ee, _ := charset.Lookup(out_enc) if ee == nil { errorline(fmt.Sprintf("unknown encoding: %s", out_enc)) os.Exit(1) } oc = transform.NewWriter(os.Stdout, ee.NewEncoder()) } instr := "" argindex := 0 if len(infile) > 0 { b, err := ioutil.ReadFile(infile) if err != nil { errorline(err.Error()) os.Exit(1) } instr = strings.TrimSpace(string(b)) } else { instr = args[0] argindex = 1 } if fixed { pattern = instr } else if perl { re, err := syntax.Parse(instr, syntax.Perl) if err != nil { errorline(err.Error()) os.Exit(1) } rec, err := syntax.Compile(re) if err != nil { errorline(err.Error()) os.Exit(1) } instr = rec.String() if ignorecase { instr = "(?i:" + instr + ")" } pattern, err = regexp.Compile(instr) if err != nil { errorline(err.Error()) os.Exit(1) } } else { if ignorecase { instr = "(?i:" + instr + ")" } pattern, err = regexp.Compile(instr) if err != nil { errorline(err.Error()) os.Exit(1) } } if exclude == "" { exclude = os.Getenv("JVGREP_EXCLUDE") } if exclude == "" { exclude = excludeDefaults } ere, err := regexp.Compile(exclude) if err != nil { errorline(err.Error()) os.Exit(1) } atty := false if color == "" { color = os.Getenv("JVGREP_COLOR") } if color == "" || color == "auto" { atty = isatty.IsTerminal(os.Stdout.Fd()) } else if color == "always" { atty = true } else if color == "never" { atty = false } else { usage(true) } if atty { sc := make(chan os.Signal, 10) signal.Notify(sc, syscall.SIGTERM, syscall.SIGINT, syscall.SIGHUP) go func() { for _ = range sc { ct.ResetColor() os.Exit(0) } }() } if len(args) == 1 && argindex != 0 { Grep(&GrepArg{ pattern: pattern, input: os.Stdin, single: true, atty: atty, }) return } envre := regexp.MustCompile(`^(\$[a-zA-Z][a-zA-Z0-9_]+|\$\([a-zA-Z][a-zA-Z0-9_]+\))$`) globmask := "" ch := make(chan *GrepArg, 10) done := make(chan int) go GoGrep(ch, done) nargs := len(args[argindex:]) for _, arg := range args[argindex:] { globmask = "" root := "" arg = strings.Trim(arg, `"`) for n, i := range strings.Split(filepath.ToSlash(arg), "/") { if root == "" && strings.Index(i, "*") != -1 { if globmask == "" { root = "." } else { root = filepath.ToSlash(globmask) } } if n == 0 && i == "~" { if runtime.GOOS == "windows" { i = os.Getenv("USERPROFILE") } else { i = os.Getenv("HOME") } } if envre.MatchString(i) { i = strings.Trim(strings.Trim(os.Getenv(i[1:]), "()"), `"`) } globmask = filepath.Join(globmask, i) if n == 0 { if runtime.GOOS == "windows" && filepath.VolumeName(i) != "" { globmask = i + "/" } else if len(globmask) == 0 { globmask = "/" } } } if root == "" { path, _ := filepath.Abs(arg) fi, err := os.Stat(path) if err != nil { errorline(fmt.Sprintf("jvgrep: %s: No such file or directory", arg)) os.Exit(1) } if !fi.IsDir() { if verbose { println("search:", path) } ch <- &GrepArg{ pattern: pattern, input: path, single: nargs == 1, atty: atty, } continue } else { root = path if fi.IsDir() { globmask = "**/*" } else { globmask = "**/" + globmask } } } if globmask == "" { globmask = "." } globmask = filepath.ToSlash(filepath.Clean(globmask)) if recursive { if strings.Index(globmask, "/") > -1 { globmask += "/" } else { globmask = "**/" + globmask } } cc := []rune(globmask) dirmask := "" filemask := "" for i := 0; i < len(cc); i++ { if cc[i] == '*' { if i < len(cc)-2 && cc[i+1] == '*' && cc[i+2] == '/' { filemask += "(.*/)?" dirmask = filemask i += 2 } else { filemask += "[^/]*" } } else { c := cc[i] if c == '/' || ('0' <= c && c <= '9') || ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || 255 < c { filemask += string(c) } else { filemask += fmt.Sprintf("[\\x%x]", c) } if c == '/' && dirmask == "" && strings.Index(filemask, "*") != -1 { dirmask = filemask } } } if dirmask == "" { dirmask = filemask } if len(filemask) > 0 && filemask[len(filemask)-1] == '/' { if root == "" { root = filemask } filemask += "[^/]*" } if runtime.GOOS == "windows" || runtime.GOOS == "darwin" { dirmask = "(?i:" + dirmask + ")" filemask = "(?i:" + filemask + ")" } dre := regexp.MustCompile("^" + dirmask) fre := regexp.MustCompile("^" + filemask + "$") root = filepath.Clean(root) if verbose { println("dirmask:", dirmask) println("filemask:", filemask) println("root:", root) } filepath.Walk(root, func(path string, info os.FileInfo, err error) error { if info == nil { return err } path = filepath.ToSlash(path) if ere != nil && ere.MatchString(path) { if info.IsDir() { return filepath.SkipDir } return nil } if info.IsDir() { if path == "." || recursive || len(path) <= len(root) || dre.MatchString(path+"/") { return nil } return filepath.SkipDir } if fre.MatchString(path) && info.Mode().IsRegular() { if verbose { println("search:", path) } ch <- &GrepArg{ pattern: pattern, input: path, single: false, atty: atty, } } return nil }) } ch <- nil if count { fmt.Println(countMatch) } <-done }
func doGrep(path string, f []byte, arg *GrepArg) { encs := encodings if ignorebinary { if bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { return } } if len(f) > 2 { if f[0] == 0xfe && f[1] == 0xff { arg.bom = f[0:2] f = f[2:] } else if f[0] == 0xff && f[1] == 0xfe { arg.bom = f[0:2] f = f[2:] } } if len(arg.bom) > 0 { if arg.bom[0] == 0xfe && arg.bom[1] == 0xff { encs = []string{"utf-16be"} } else if arg.bom[0] == 0xff && arg.bom[1] == 0xfe { encs = []string{"utf-16le"} } } for _, enc := range encs { if verbose { println("trying("+enc+"):", path) } if len(arg.bom) > 0 && enc != "utf-16be" && enc != "utf-16le" { continue } did := false var t []byte var n, l, size, next, prev int if enc != "" { if len(arg.bom) > 0 || bytes.IndexFunc(f, func(r rune) bool { return 0 < r && r < 0x9 }) == -1 { ee, _ := charset.Lookup(enc) if ee == nil { continue } var buf bytes.Buffer ic := transform.NewWriter(&buf, ee.NewDecoder()) _, err := ic.Write(f) if err != nil { next = -1 continue } lf := false if len(arg.bom) > 0 && len(f)%2 != 0 { ic.Write([]byte{0}) lf = true } err = ic.Close() if err != nil { if verbose { println(err.Error()) } next = -1 continue } f = buf.Bytes() if lf { f = f[:len(f)-1] } } } size = len(f) if size == 0 { continue } for next != -1 { for { if next >= size { next = -1 break } if f[next] == '\n' { break } next++ } n++ if next == -1 { t = f[prev:] } else { t = f[prev:next] prev = next + 1 next++ } l = len(t) if l > 0 && t[l-1] == '\r' { t = t[:l-1] l-- } var match bool if only { var matches []string ts := string(t) if re, ok := arg.pattern.(*regexp.Regexp); ok { matches = re.FindAllString(ts, -1) } else if s, ok := arg.pattern.(string); ok { if ignorecase { ts = strings.ToLower(ts) } ti := 0 tl := len(ts) for ti != -1 && ti < tl-1 { ti = strings.Index(ts[ti:], s) if ti != -1 { matches = append(matches, s) ti++ } } } match = len(matches) > 0 // skip if not match without invert, or match with invert. if match == invert { continue } if verbose { println("found("+enc+"):", path) } if list { matchedfile(path) did = true break } for _, m := range matches { countMatch++ if count { continue } if strings.IndexFunc( m, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } else { if number { if utf8.ValidString(m) { matchedline(path, n, m, arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } else { if utf8.ValidString(m) { matchedline("", 0, m, arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } } } } else { if re, ok := arg.pattern.(*regexp.Regexp); ok { if len(re.FindAllIndex(t, 1)) > 0 { match = true } } else if s, ok := arg.pattern.(string); ok { if ignorecase { if strings.Index(strings.ToLower(string(t)), strings.ToLower(s)) > -1 { match = true } } else { if strings.Index(string(t), s) > -1 { match = true } } } // skip if not match without invert, or match with invert. if match == invert { continue } if verbose { println("found("+enc+"):", path) } if list { matchedfile(path) did = true break } countMatch++ if count { did = true continue } if arg.single && !number { if utf8.Valid(t) { matchedline("", -1, string(t), arg) } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } else { if bytes.IndexFunc( t, func(r rune) bool { return 0 < r && r < 0x9 }) != -1 { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } else if utf8.Valid(t) { if after <= 0 && before <= 0 { matchedline(path, n, string(t), arg) } else { if countMatch > 1 { os.Stdout.WriteString("---\n") } bprev, bnext := next-l-2, next-l-2 lines := make([]string, 0) for i := 0; i < before && bprev > 0; i++ { for { if bprev == 0 || f[bprev-1] == '\n' { lines = append(lines, string(f[bprev:bnext])) bnext = bprev - 1 bprev-- break } bprev-- } } for i := len(lines); i > 0; i-- { matchedline(path, i-n, lines[i-1], arg) } matchedline(path, n, string(t), arg) lines = make([]string, 0) aprev, anext := next, next for i := 0; i < after && anext >= 0 && anext < size; i++ { for { if anext == size || f[anext] == '\n' { lines = append(lines, string(f[aprev:anext])) aprev = anext + 1 anext++ break } anext++ } } for i := 0; i < len(lines); i++ { matchedline(path, -n-i-1, lines[i], arg) } } } else { errorline(fmt.Sprintf("matched binary file: %s", path)) did = true break } } } did = true } runtime.GC() if did || next == -1 { break } } }