func refToken(f fancy.Reader) ([]byte, int64) { tok, p := ps.Token(f) if len(tok) > 0 && tok[0] >= '0' && tok[0] <= '9' { ps.Token(f) r, q := ps.Token(f) if string(r) == "R" { f.Seek(p, 0) tok = f.Slice(int(1 + q - p)) } else { f.Seek(p+int64(len(tok)), 0) } } return tok, p }
func proceed(i *TypeOneI, rdr fancy.Reader) { for !i.Done { t, _ := ps.Token(rdr) // fmt.Printf("Stack: %v\n", util.StringArray(i.St.Dump())); // fmt.Printf("--- %s\n", t); if len(t) < 1 { break } b, _ := rdr.ReadByte() if b > 32 { rdr.UnreadByte() } if len(t) == 0 { break } if d, ok := find(i, "/"+string(t)); ok { if d[0] == '{' { proceed(i, fancy.SliceReader(d[1:len(d)-1])) } else { i.St.Push(d) } } else if f, ok := Ops[string(t)]; ok { f(i) } else { i.St.Push(t) } } return }
func tuple(f fancy.Reader, count int) [][]byte { r := make([][]byte, count) for i := 0; i < count; i++ { r[i], _ = ps.Token(f) } return r }
// xrefReadTable() reads the xref table(s) of a PDF file. This is not recursive // in favour of not to have to keep track of already used starting points // for xrefs. func xrefReadTable(f fancy.Reader, p int) (map[int]int, DictionaryT) { var trailer DictionaryT var back [MAX_PDF_UPDATES]int b := 0 s := _Bytes for ok := true; ok; { back[b] = p b++ p = xrefSkip(f, p) f.Seek(int64(p), 0) s, _ = ps.Token(f) if string(s) != "trailer" { util.Log("no trailer / xref table") return nil, nil } s, _ = ps.Token(f) if trailer == nil { trailer = Dictionary(s) } s, ok = Dictionary(s)["/Prev"] p = num(s) } r := make(map[int]int) for b != 0 { b-- f.Seek(int64(back[b]), 0) ps.Token(f) // skip "xref" for { m := tuple(f, 2) if string(m[0]) == "trailer" { break } ps.SkipLE(f) o := num(m[0]) dat := f.Slice(num(m[1]) * 20) for i := 0; i < len(dat); i += 20 { if dat[i+17] != 'n' { delete(r, o) } else { r[o] = num(dat[i : i+10]) } o++ } } } return r, trailer }
// xrefSkip() queries the start of the trailer for a (partial) xref-table. func xrefSkip(f fancy.Reader, xref int) int { f.Seek(int64(xref), 0) t, p := ps.Token(f) if string(t) != "xref" { return -1 } for { t, p = ps.Token(f) if t[0] < '0' || t[0] > '9' { f.Seek(p, 0) break } t, _ = ps.Token(f) ps.SkipLE(f) f.Seek(int64(num(t)*20), 1) } r, _ := f.Seek(0, 1) return int(r) }
// pd.Stream() returns contents of a stream. func (pd *PdfReaderT) Stream(reference []byte) (DictionaryT, []byte) { q, d := pd.Resolve(reference) dic := pd.Dic(d) l := pd.Num(dic["/Length"]) pd.rdr.Seek(int64(q), 0) t, _ := ps.Token(pd.rdr) if string(t) != "stream" { util.Log("Stream", string(reference), "not a stream", string(t)) return nil, []byte{} } ps.SkipLE(pd.rdr) return dic, pd.rdr.Slice(l) }
func (pd *PdfDrawerT) Interpret(rdr fancy.Reader) { for { t, _ := ps.Token(rdr) if len(t) == 0 { break } if f, ok := pd.Ops[string(t)]; ok { util.Logf("%v %#v %s", string(t), *pd.ConfigD, pd.Stack.Dump()) f(pd) } else { pd.Stack.Push(t) } } }
func Read(rdr fancy.Reader) (r *CharMapperT) { r = New() if rdr == nil { // make identity setup r.Uni.AddRange(0, 256, 0) r.Ranges.AddDef(0, 256, 1) return } cm := NewInterpreter(r) for { t, _ := ps.Token(rdr) if len(t) == 0 { break } if f, ok := Ops[string(t)]; ok { util.Log(string(t), cm) f(cm) } else { cm.St.Push(t) } } return }
// Dictionary() makes a map/hash from PDF dictionary data. func Dictionary(s []byte) DictionaryT { if len(s) < 4 { return nil } e := len(s) - 1 if s[0] != s[1] || s[0] != '<' || s[e] != s[e-1] || s[e] != '>' { return nil } r := make(DictionaryT) rdr := fancy.SliceReader(s[2 : e-1]) for { t, _ := ps.Token(rdr) if len(t) == 0 { break } if t[0] != '/' { return nil } k := string(t) t, _ = refToken(rdr) r[k] = t } return r }
// Load() loads a PDF file of a given name. func Load(fn string) *PdfReaderT { var rr [][2]int // list of entries to resolve r := new(PdfReaderT) r.File = fn r.rdr = fancy.FileReader(fn) if r.rdr == nil { util.Log(fn, "FileReader error") return nil } v := make([]byte, 16) r.rdr.ReadAt(v, 0) if v[0] != '%' || v[1] != 'P' || v[2] != 'D' || v[3] != 'F' { util.Log(string(v), "not a PDF") r.rdr.Close() return nil } x := bytes.IndexByte(v, '\r') if x > 0 { v = v[:x] } r.Version = string(v) if r.Startxref = xrefStart(r.rdr); r.Startxref == -1 { util.Log(fn, "xrefStart error") r.rdr.Close() return nil } if r.Xref, r.Trailer = xrefReadTable(r.rdr, r.Startxref); r.Xref == nil { r.Xref, rr, r.Trailer = xrefReadStream(r.rdr, r.Startxref) } if r.Xref == nil { util.Log(fn, "xrefRead error") r.rdr.Close() return nil } if r.Trailer == nil { r.rdr.Seek(int64(xrefSkip(r.rdr, r.Startxref)), 0) s, _ := ps.Token(r.rdr) if string(s) != "trailer" { util.Log(fn, "no trailer") r.rdr.Close() return nil } s, _ = ps.Token(r.rdr) if r.Trailer = Dictionary(s); r.Trailer == nil { util.Log(fn, "no trailer dictionary") r.rdr.Close() return nil } } r.rcache = make(map[string][]byte) r.rncache = make(map[string]int) r.dicache = make(map[string]DictionaryT) if rr != nil { curr := -1 var dic DictionaryT var s []byte for _, v := range rr { o, i := v[0], v[1] if o != curr { curr = o dic, s = r.DecodedStream(util.MakeRef(curr)) first := num(dic["/First"]) n := num(dic["/N"]) rdr := fancy.SliceReader(s) p := tuple(rdr, n*2) util.Log("Object-Stream", curr) for i := 0; i < len(p); i += 2 { oo := num(p[i+0]) offs := num(p[i+1]) util.Log(oo, first+offs) rdr.Seek(int64(first+offs), 0) s, _ := ps.Token(rdr) util.Log(string(s)) ref := string(util.MakeRef(oo)) r.rcache[ref] = s r.rncache[ref] = -1 } } util.Log(o, i) } } r.PageMode = string(r.Dic(r.Trailer["/Root"])["/PageMode"]) return r }
// xrefReadStream() reads the xref stream(s) of a PDF file. This is not recursive // in favour of not to have to keep track of already used starting points // for xrefs. func xrefReadStream(f fancy.Reader, p int) (xr map[int]int, r [][2]int, trailer DictionaryT) { s := _Bytes xr = map[int]int{} r = [][2]int{} for ok := true; ok; { f.Seek(int64(p), 0) ps.Token(f) // skip "xref" //for { m := tuple(f, 2) if string(m[1]) != "obj" { util.Logf("unexpected %q\n", m) return nil, nil, nil } s, _ = ps.Token(f) dic := Dictionary(s) if trailer == nil { trailer = dic } s, ok = dic["/Prev"] p = num(s) s, _ = ps.Token(f) if string(s) != "stream" { util.Log("not a stream", s) return nil, nil, nil } ps.SkipLE(f) for k, v := range dic { util.Logf("%s %s", k, v) } size := num(dic["/Size"]) index := []int{0, size} if _, ok := dic["/Index"]; ok { a := Array(dic["/Index"]) index[0] = num(a[0]) index[1] = num(a[1]) // can len(index) be != 2 ? } l := num(dic["/Length"]) xref := f.Slice(l) w := Array(dic["/W"]) if len(w) != 3 { util.Log("unexpected /W", w) } fl1 := num(w[0]) fl2 := num(w[1]) fl3 := num(w[2]) width := fl1 + fl2 + fl3 xref = decodeStream(dic, xref) s, _ = ps.Token(f) // endstream s, _ = ps.Token(f) // endobj pos := index[0] for i := 0; i < len(xref); i += width { ent := xref[i : i+width] f1 := bnum(ent[0:fl1]) f2 := bnum(ent[fl1 : fl1+fl2]) f3 := bnum(ent[fl1+fl2:]) switch f1 { case 0: // free object util.Log("free", f2, f3) // delete(r, f2) case 1: // regular object util.Log("ref", pos, f3, f2) xr[pos] = f2 case 2: // compressed object util.Log("cref", pos, f2, f3) r = append(r, [2]int{f2, f3}) } pos += 1 } //} } return xr, r, trailer }