func ToUTF8WithErr(content []byte) (error, string) { charsetLabel, err := base.DetectEncoding(content) if err != nil { return err, "" } else if charsetLabel == "UTF-8" { return nil, string(content) } encoding, _ := charset.Lookup(charsetLabel) if encoding == nil { return fmt.Errorf("Unknown encoding: %s", charsetLabel), string(content) } // If there is an error, we concatenate the nicely decoded part and the // original left over. This way we won't loose data. result, n, err := transform.String(encoding.NewDecoder(), string(content)) if err != nil { result = result + string(content[n:]) } return err, result }
func ParsePatch(pid int64, maxlines int, cmd *exec.Cmd, reader io.Reader) (*Diff, error) { scanner := bufio.NewScanner(reader) var ( curFile *DiffFile curSection = &DiffSection{ Lines: make([]*DiffLine, 0, 10), } leftLine, rightLine int isTooLong bool // FIXME: use first 30 lines to detect file encoding. Should use cache in the future. buf bytes.Buffer ) diff := &Diff{Files: make([]*DiffFile, 0)} var i int for scanner.Scan() { line := scanner.Text() // fmt.Println(i, line) if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") { continue } if line == "" { continue } i = i + 1 // FIXME: use first 30 lines to detect file encoding. if i <= 30 { buf.WriteString(line) } // Diff data too large, we only show the first about maxlines lines if i == maxlines { isTooLong = true log.Warn("Diff data too large") //return &Diff{}, nil } switch { case line[0] == ' ': diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '@': if isTooLong { return diff, nil } curSection = &DiffSection{} curFile.Sections = append(curFile.Sections, curSection) ss := strings.Split(line, "@@") diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line} curSection.Lines = append(curSection.Lines, diffLine) // Parse line number. ranges := strings.Split(ss[len(ss)-2][1:], " ") leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int() rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int() continue case line[0] == '+': curFile.Addition++ diff.TotalAddition++ diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine} rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '-': curFile.Deletion++ diff.TotalDeletion++ diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine} if leftLine > 0 { leftLine++ } curSection.Lines = append(curSection.Lines, diffLine) case strings.HasPrefix(line, "Binary"): curFile.IsBin = true continue } // Get new file. if strings.HasPrefix(line, DIFF_HEAD) { if isTooLong { return diff, nil } fs := strings.Split(line[len(DIFF_HEAD):], " ") a := fs[0] curFile = &DiffFile{ Name: a[strings.Index(a, "/")+1:], Index: len(diff.Files) + 1, Type: DIFF_FILE_CHANGE, Sections: make([]*DiffSection, 0, 10), } diff.Files = append(diff.Files, curFile) // Check file diff type. for scanner.Scan() { switch { case strings.HasPrefix(scanner.Text(), "new file"): curFile.Type = DIFF_FILE_ADD curFile.IsDeleted = false curFile.IsCreated = true case strings.HasPrefix(scanner.Text(), "deleted"): curFile.Type = DIFF_FILE_DEL curFile.IsCreated = false curFile.IsDeleted = true case strings.HasPrefix(scanner.Text(), "index"): curFile.Type = DIFF_FILE_CHANGE curFile.IsCreated = false curFile.IsDeleted = false } if curFile.Type > 0 { break } } } } // FIXME: use first 30 lines to detect file encoding. charsetLabel, err := base.DetectEncoding(buf.Bytes()) if charsetLabel != "utf8" && err == nil { encoding, _ := charset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, f := range diff.Files { for _, sec := range f.Sections { for _, l := range sec.Lines { if c, _, err := transform.String(d, l.Content); err == nil { l.Content = c } } } } } } return diff, nil }
// TODO: move this function to gogits/git-module func ParsePatch(maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) (*Diff, error) { var ( diff = &Diff{Files: make([]*DiffFile, 0)} curFile *DiffFile curSection = &DiffSection{ Lines: make([]*DiffLine, 0, 10), } leftLine, rightLine int lineCount int curFileLinesCount int ) input := bufio.NewReader(reader) isEOF := false for !isEOF { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { isEOF = true } else { return nil, fmt.Errorf("ReadString: %v", err) } } if len(line) > 0 && line[len(line)-1] == '\n' { // Remove line break. line = line[:len(line)-1] } if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 { continue } curFileLinesCount++ lineCount++ // Diff data too large, we only show the first about maxlines lines if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres { curFile.IsIncomplete = true } switch { case line[0] == ' ': diffLine := &DiffLine{Type: DIFF_LINE_PLAIN, Content: line, LeftIdx: leftLine, RightIdx: rightLine} leftLine++ rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '@': curSection = &DiffSection{} curFile.Sections = append(curFile.Sections, curSection) ss := strings.Split(line, "@@") diffLine := &DiffLine{Type: DIFF_LINE_SECTION, Content: line} curSection.Lines = append(curSection.Lines, diffLine) // Parse line number. ranges := strings.Split(ss[1][1:], " ") leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int() if len(ranges) > 1 { rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int() } else { log.Warn("Parse line number failed: %v", line) rightLine = leftLine } continue case line[0] == '+': curFile.Addition++ diff.TotalAddition++ diffLine := &DiffLine{Type: DIFF_LINE_ADD, Content: line, RightIdx: rightLine} rightLine++ curSection.Lines = append(curSection.Lines, diffLine) continue case line[0] == '-': curFile.Deletion++ diff.TotalDeletion++ diffLine := &DiffLine{Type: DIFF_LINE_DEL, Content: line, LeftIdx: leftLine} if leftLine > 0 { leftLine++ } curSection.Lines = append(curSection.Lines, diffLine) case strings.HasPrefix(line, "Binary"): curFile.IsBin = true continue } // Get new file. if strings.HasPrefix(line, DIFF_HEAD) { middle := -1 // Note: In case file name is surrounded by double quotes (it happens only in git-shell). // e.g. diff --git "a/xxx" "b/xxx" hasQuote := line[len(DIFF_HEAD)] == '"' if hasQuote { middle = strings.Index(line, ` "b/`) } else { middle = strings.Index(line, " b/") } beg := len(DIFF_HEAD) a := line[beg+2 : middle] b := line[middle+3:] if hasQuote { a = string(git.UnescapeChars([]byte(a[1 : len(a)-1]))) b = string(git.UnescapeChars([]byte(b[1 : len(b)-1]))) } curFile = &DiffFile{ Name: a, Index: len(diff.Files) + 1, Type: DIFF_FILE_CHANGE, Sections: make([]*DiffSection, 0, 10), } diff.Files = append(diff.Files, curFile) if len(diff.Files) >= maxFiles { diff.IsIncomplete = true io.Copy(ioutil.Discard, reader) break } curFileLinesCount = 0 // Check file diff type and is submodule. for { line, err := input.ReadString('\n') if err != nil { if err == io.EOF { isEOF = true } else { return nil, fmt.Errorf("ReadString: %v", err) } } switch { case strings.HasPrefix(line, "new file"): curFile.Type = DIFF_FILE_ADD curFile.IsCreated = true case strings.HasPrefix(line, "deleted"): curFile.Type = DIFF_FILE_DEL curFile.IsDeleted = true case strings.HasPrefix(line, "index"): curFile.Type = DIFF_FILE_CHANGE case strings.HasPrefix(line, "similarity index 100%"): curFile.Type = DIFF_FILE_RENAME curFile.IsRenamed = true curFile.OldName = curFile.Name curFile.Name = b } if curFile.Type > 0 { if strings.HasSuffix(line, " 160000\n") { curFile.IsSubmodule = true } break } } } } // FIXME: detect encoding while parsing. var buf bytes.Buffer for _, f := range diff.Files { buf.Reset() for _, sec := range f.Sections { for _, l := range sec.Lines { buf.WriteString(l.Content) buf.WriteString("\n") } } charsetLabel, err := base.DetectEncoding(buf.Bytes()) if charsetLabel != "UTF-8" && err == nil { encoding, _ := charset.Lookup(charsetLabel) if encoding != nil { d := encoding.NewDecoder() for _, sec := range f.Sections { for _, l := range sec.Lines { if c, _, err := transform.String(d, l.Content); err == nil { l.Content = c } } } } } } return diff, nil }