func proceed(i *TypeOneI, rdr fancy.Reader) { for !i.Done { t, _ := ps.Token(rdr) // fmt.Printf("Stack: %v\n", util.StringArray(i.St.Dump())); // fmt.Printf("--- %s\n", t); if len(t) < 1 { break } b, _ := rdr.ReadByte() if b > 32 { rdr.UnreadByte() } if len(t) == 0 { break } if d, ok := find(i, "/"+string(t)); ok { if d[0] == '{' { proceed(i, fancy.SliceReader(d[1:len(d)-1])) } else { i.St.Push(d) } } else if f, ok := Ops[string(t)]; ok { f(i) } else { i.St.Push(t) } } return }
func Page(pd *pdfread.PdfReaderT, page int, xmlDecl bool) []byte { pg := pd.Pages() if page >= len(pg) { complain("Page does not exist!\n") } mbox := util.StringArray(pd.Arr(pd.Att("/MediaBox", pg[page]))) drw := svgdraw.NewTestSvg() svgtext.New(pd, drw).Page = page w := strm.Mul(strm.Sub(mbox[2], mbox[0]), "1.25") h := strm.Mul(strm.Sub(mbox[3], mbox[1]), "1.25") decl := "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n" if !xmlDecl { decl = "" } drw.Write.Out("%s"+ "<svg\n"+ " xmlns:svg=\"http://www.w3.org/2000/svg\"\n"+ " xmlns=\"http://www.w3.org/2000/svg\"\n"+ " version=\"1.0\"\n"+ " width=\"%s\"\n"+ " height=\"%s\">\n"+ "<g transform=\"matrix(1.25,0,0,-1.25,%s,%s)\">\n", decl, w, h, strm.Mul(mbox[0], "-1.25"), strm.Mul(mbox[3], "1.25")) cont := pd.ForcedArray(pd.Dic(pg[page])["/Contents"]) _, ps := pd.DecodedStream(cont[0]) drw.Interpret(fancy.SliceReader(ps)) drw.Draw.CloseDrawing() drw.Write.Out("</g>\n</svg>\n") return drw.Write.Content }
func main() { a, _ := ioutil.ReadFile(os.Args[1]) if a[0] == 128 { a = pfb.Decode(a) } g := type1.Read(fancy.SliceReader(a)) fmt.Printf("%v\n", util.StringArray(g.St.Dump())) dumpT1(g) }
func (t *TypeOneI) op_ifelse(a [][]byte) { p := a[2] if string(a[0]) == "true" { p = a[1] } if len(p) > 2 && p[0] == '{' { proceed(t, fancy.SliceReader(p[1:len(p)-1])) } }
// Array() extracts an array from PDF data. func Array(s []byte) [][]byte { if len(s) < 2 || s[0] != '[' || s[len(s)-1] != ']' { return nil } rdr := fancy.SliceReader(s[1 : len(s)-1]) r := make([][]byte, MAX_PDF_ARRAYSIZE) b := 0 for { r[b], _ = refToken(rdr) if len(r[b]) == 0 { break } b++ } if b == 0 { return nil } return r[0:b] }
func (t *SvgTextT) cmap(font string) (r *cmapi.CharMapperT) { var ok bool if r, ok = t.cmaps[font]; ok { return } r = cm_identity // setup default if t.fonts == nil { t.fonts = t.Pdf.PageFonts(t.Pdf.Pages()[t.Page]) if t.fonts == nil { return } } if dr, ok := t.fonts[font]; ok { d := t.Pdf.Dic(dr) if tu, ok := d["/ToUnicode"]; ok { _, cm := t.Pdf.DecodedStream(tu) r = cmapi.Read(fancy.SliceReader(cm)) t.cmaps[font] = r } } return }
// Dictionary() makes a map/hash from PDF dictionary data. func Dictionary(s []byte) DictionaryT { if len(s) < 4 { return nil } e := len(s) - 1 if s[0] != s[1] || s[0] != '<' || s[e] != s[e-1] || s[e] != '>' { return nil } r := make(DictionaryT) rdr := fancy.SliceReader(s[2 : e-1]) for { t, _ := ps.Token(rdr) if len(t) == 0 { break } if t[0] != '/' { return nil } k := string(t) t, _ = refToken(rdr) r[k] = t } return r }
func init() { var ops = map[string]func(t *TypeOneI){ "array": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(t.NewArray(strm.Int(string(a), 1))) }, "begin": func(t *TypeOneI) { a := t.St.Pop() if a[0] != 'D' { panic("Wrong dictionary!\n") } t.DicSp++ t.DicSt[t.DicSp] = t.Dicts[strm.Int(string(a[1:]), 1)] }, "bind": func(t *TypeOneI) { }, "cleartomark": func(t *TypeOneI) { a := t.St.Pop() for string(a) != "mark" { a = t.St.Pop() } }, "closefile": func(t *TypeOneI) { a := t.St.Pop() t.Done = true _ = a }, "currentdict": func(t *TypeOneI) { t.St.Push(t.DicSt[t.DicSp].Name) }, "currentfile": func(t *TypeOneI) { t.St.Push([]byte{'?'}) }, "def": func(t *TypeOneI) { a := t.St.Drop(2) t.DicSt[t.DicSp].Defs[string(a[0])] = a[1] }, "definefont": func(t *TypeOneI) { a := t.St.Drop(2) t.Fonts[string(a[0])] = string(a[1]) t.St.Push(util.Bytes("<FONT>")) // FIXME, we need this. _ = a }, "defineresource": func(t *TypeOneI) { a := t.St.Drop(3) t.St.Push([]byte{'?'}) _ = a }, "dict": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(t.NewDic()) _ = a }, "dup": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(a) t.St.Push(a) }, "eexec": func(t *TypeOneI) { a := t.St.Pop() b := eexec(t.Rdr) old := t.Rdr t.Rdr = fancy.SliceReader(b) //proceed(t, t.Rdr) t.Rdr = old t.Done = false _ = a }, "end": func(t *TypeOneI) { t.DicSp-- }, "exch": func(t *TypeOneI) { a := t.St.Drop(2) a0 := a[0] t.St.Push(a[1]) t.St.Push(a0) }, "executeonly": func(t *TypeOneI) { }, "findresource": func(t *TypeOneI) { a := t.St.Drop(2) t.St.Push([]byte{'?'}) _ = a }, "for": func(t *TypeOneI) { a := t.St.Drop(4) // FIXME _ = a }, "get": func(t *TypeOneI) { a := t.St.Drop(2) i := strm.Int(string(a[0][1:]), 1) if a[0][0] == 'D' { t.St.Push(t.Dicts[i].Defs[string(a[1])]) } else if a[0][0] == 'A' { t.St.Push(t.Arrays[i][strm.Int(string(a[1]), 1)]) } else { panic("Can not 'get' from!\n") } }, "if": func(t *TypeOneI) { a := t.St.Drop(2) t.op_ifelse([][]byte{a[0], a[1], []byte{}}) }, "ifelse": func(t *TypeOneI) { a := t.St.Drop(3) t.op_ifelse(a) }, "index": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(t.St.Index(strm.Int(string(a), 1) + 1)) }, "known": func(t *TypeOneI) { a := t.St.Drop(2) t.St.Push(util.Bytes("false")) // FIX ME knows nothing ;) _ = a }, "noaccess": func(t *TypeOneI) { }, "pop": func(t *TypeOneI) { a := t.St.Pop() _ = a }, "put": func(t *TypeOneI) { a := t.St.Drop(3) if a[0][0] == 'D' { t.Dicts[strm.Int(string(a[0][1:]), 1)].Defs[string(a[1])] = a[2] } else if a[0][0] == 'A' { t.Arrays[strm.Int(string(a[0][1:]), 1)][strm.Int(string(a[1]), 1)] = a[2] } else { panic("Wrong dictionary or array!\n") } }, "readonly": func(t *TypeOneI) { }, "readstring": func(t *TypeOneI) { a := t.St.Drop(2) c, _ := t.Rdr.Read(a[1]) t.St.Push(a[1][0:c]) t.St.Push(util.Bytes("true")) }, "string": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(make([]byte, strm.Int(string(a), 1))) }, "userdict": func(t *TypeOneI) { t.St.Push(util.Bytes("D0")) }, "where": func(t *TypeOneI) { a := t.St.Pop() t.St.Push(util.Bytes("false")) _ = a }, } for k, v := range ops { Ops[k] = v } }
// Load() loads a PDF file of a given name. func Load(fn string) *PdfReaderT { var rr [][2]int // list of entries to resolve r := new(PdfReaderT) r.File = fn r.rdr = fancy.FileReader(fn) if r.rdr == nil { util.Log(fn, "FileReader error") return nil } v := make([]byte, 16) r.rdr.ReadAt(v, 0) if v[0] != '%' || v[1] != 'P' || v[2] != 'D' || v[3] != 'F' { util.Log(string(v), "not a PDF") r.rdr.Close() return nil } x := bytes.IndexByte(v, '\r') if x > 0 { v = v[:x] } r.Version = string(v) if r.Startxref = xrefStart(r.rdr); r.Startxref == -1 { util.Log(fn, "xrefStart error") r.rdr.Close() return nil } if r.Xref, r.Trailer = xrefReadTable(r.rdr, r.Startxref); r.Xref == nil { r.Xref, rr, r.Trailer = xrefReadStream(r.rdr, r.Startxref) } if r.Xref == nil { util.Log(fn, "xrefRead error") r.rdr.Close() return nil } if r.Trailer == nil { r.rdr.Seek(int64(xrefSkip(r.rdr, r.Startxref)), 0) s, _ := ps.Token(r.rdr) if string(s) != "trailer" { util.Log(fn, "no trailer") r.rdr.Close() return nil } s, _ = ps.Token(r.rdr) if r.Trailer = Dictionary(s); r.Trailer == nil { util.Log(fn, "no trailer dictionary") r.rdr.Close() return nil } } r.rcache = make(map[string][]byte) r.rncache = make(map[string]int) r.dicache = make(map[string]DictionaryT) if rr != nil { curr := -1 var dic DictionaryT var s []byte for _, v := range rr { o, i := v[0], v[1] if o != curr { curr = o dic, s = r.DecodedStream(util.MakeRef(curr)) first := num(dic["/First"]) n := num(dic["/N"]) rdr := fancy.SliceReader(s) p := tuple(rdr, n*2) util.Log("Object-Stream", curr) for i := 0; i < len(p); i += 2 { oo := num(p[i+0]) offs := num(p[i+1]) util.Log(oo, first+offs) rdr.Seek(int64(first+offs), 0) s, _ := ps.Token(rdr) util.Log(string(s)) ref := string(util.MakeRef(oo)) r.rcache[ref] = s r.rncache[ref] = -1 } } util.Log(o, i) } } r.PageMode = string(r.Dic(r.Trailer["/Root"])["/PageMode"]) return r }
func decodeStream(dic DictionaryT, data []byte) []byte { if f, ok := dic["/Filter"]; ok { filter := ForcedArray(f) var decos [][]byte if d, ok := dic["/DecodeParms"]; ok { decos = ForcedArray(d) } else { decos = make([][]byte, len(filter)) } for ff := range filter { // XXX: if there are multiple filters but only one DecodeParams, // it should be used for all filters deco := Dictionary(decos[ff]) switch string(filter[ff]) { case "/FlateDecode": data = fancy.ReadAndClose(zlib.NewReader(fancy.SliceReader(data))) case "/LZWDecode": early := true if deco != nil { if s, ok := deco["/EarlyChange"]; ok { early = num(s) == 1 } } data = lzw.Decode(data, early) case "/ASCII85Decode": ds := data for len(ds) > 1 && ds[len(ds)-1] < 33 { ds = ds[0 : len(ds)-1] } if len(ds) >= 2 && ds[len(ds)-1] == '>' && ds[len(ds)-2] == '~' { ds = ds[0 : len(ds)-2] } data = fancy.ReadAll(ascii85.NewDecoder(fancy.SliceReader(ds))) case "/ASCIIHexDecode": data, _ = hex.DecodeString(string(data)) default: util.Log("Unsupported filter", string(filter[ff])) data = []byte{} } if s, ok := deco["/Predictor"]; ok { pred := num(s) switch { case pred == 1: // no predictor case pred > 10: colors := numdef(deco["/Colors"], 1) columns := numdef(deco["/Columns"], 1) bitspercomponent := numdef(deco["/BitsPerComponent"], 8) util.Log("applying predictor", pred, colors, columns, bitspercomponent) data = util.ApplyPNGPredictor(pred, colors, columns, bitspercomponent, data) default: util.Log("Unsupported predictor", pred) return nil } } } } return data }
func extract(pd *pdfread.PdfReaderT, page int, t *TiffBuilder, next bool) { pg := pd.Pages()[page-1] mbox := util.StringArray(pd.Arr(pd.Att("/MediaBox", pg))) fmt.Println("Page", page) fmt.Println(" MediaBox", mbox) resources := pd.Dic(pd.Att("/Resources", pg)) if xo := pd.Dic(resources["/XObject"]); xo != nil { for name, ref := range xo { dic, data := pd.Stream(ref) printdic(dic, name, " ") if string(dic["/Subtype"]) != "/Image" { continue } switch string(dic["/Filter"]) { case "/CCITTFaxDecode": // TIFF if string(dic["/ColorSpace"]) != "/DeviceGray" { log.Fatal("cannot convert /CCITTFaxDecode ", string(pd.Obj(dic["/ColorSpace"]))) } dparms := pd.Dic(dic["/DecodeParms"]) width := pd.Num(dparms["/Columns"]) height := pd.Num(dparms["/Rows"]) k := pd.Num(dparms["/K"]) bpc := pd.Num(dic["/BitsPerComponent"]) if k >= 0 { // can't do this right now log.Fatal("can't do encoding with K=", k) } t.AddLong(TAG_IMAGE_WIDTH, uint32(width)) t.AddLong(TAG_IMAGE_LENGTH, uint32(height)) t.AddShort(TAG_BITS_PER_SAMPLE, uint16(bpc)) t.AddShort(TAG_COMPRESSION, 4) // CCITT Group 4 t.AddShort(TAG_PHOTOMETRIC_INTERPRETATION, 0) // white is zero t.AddLong(TAG_STRIP_OFFSETS, 0) //t.AddShort(TAG_ORIENTATION, 1) //t.AddShort(TAG_SAMPLES_PER_PIXEL, 1) t.AddLong(TAG_ROWS_PER_STRIP, uint32(height)) t.AddLong(TAG_STRIP_BYTE_COUNTS, uint32(len(data))) //t.AddRational(TAG_X_RESOLUTION, 300, 1) // 300 dpi (300/1) //t.AddRational(TAG_Y_RESOLUTION, 300, 1) // 300 dpi (300/1) //t.AddShort(TAG_RESOLUTION_UNIT, 2) // pixels/inch t.WriteIFD(data, next) case "/DCTDecode": // JPEG /* width := pd.Num(dic["/Width"]) height := pd.Num(dic["/Height"]) bpc := pd.Num(dic["/BitsPerComponent"]) */ f, err := os.Create("test.jpg") if err != nil { log.Fatal(err) } f.Write(data) f.Close() case "/FlateDecode": // compressed bitmap data = fancy.ReadAndClose(zlib.NewReader(fancy.SliceReader(data))) width := pd.Num(dic["/Width"]) height := pd.Num(dic["/Height"]) bpc := pd.Num(dic["/BitsPerComponent"]) if bpc != 8 { log.Fatal("cannot convert /FlateDecode bpc:", bpc) } if string(dic["/ColorSpace"]) != "/DeviceRGB" { log.Fatal("cannot convert /FlateDecode ", string(pd.Obj(dic["/ColorSpace"]))) } ima := image.NewRGBA(image.Rect(0, 0, width, height)) for y := 0; y < height; y++ { for x := 0; x < width; x++ { ima.Set(x, y, color.RGBA{R: data[0], G: data[1], B: data[2], A: 255}) data = data[3:] } } f, err := os.Create("test.png") if err != nil { log.Fatal(err) } png.Encode(f, ima) f.Close() default: log.Fatal("cannot decode ", string(dic["/Filter"])) } } } }