Exemple #1
0
func proceed(i *TypeOneI, rdr fancy.Reader) {
	for !i.Done {
		t, _ := ps.Token(rdr)
		//    fmt.Printf("Stack: %v\n", util.StringArray(i.St.Dump()));
		//    fmt.Printf("--- %s\n", t);
		if len(t) < 1 {
			break
		}
		b, _ := rdr.ReadByte()
		if b > 32 {
			rdr.UnreadByte()
		}
		if len(t) == 0 {
			break
		}
		if d, ok := find(i, "/"+string(t)); ok {
			if d[0] == '{' {
				proceed(i, fancy.SliceReader(d[1:len(d)-1]))
			} else {
				i.St.Push(d)
			}
		} else if f, ok := Ops[string(t)]; ok {
			f(i)
		} else {
			i.St.Push(t)
		}
	}
	return
}
Exemple #2
0
func Page(pd *pdfread.PdfReaderT, page int, xmlDecl bool) []byte {
	pg := pd.Pages()
	if page >= len(pg) {
		complain("Page does not exist!\n")
	}
	mbox := util.StringArray(pd.Arr(pd.Att("/MediaBox", pg[page])))
	drw := svgdraw.NewTestSvg()
	svgtext.New(pd, drw).Page = page
	w := strm.Mul(strm.Sub(mbox[2], mbox[0]), "1.25")
	h := strm.Mul(strm.Sub(mbox[3], mbox[1]), "1.25")
	decl := "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"
	if !xmlDecl {
		decl = ""
	}

	drw.Write.Out("%s"+
		"<svg\n"+
		"   xmlns:svg=\"http://www.w3.org/2000/svg\"\n"+
		"   xmlns=\"http://www.w3.org/2000/svg\"\n"+
		"   version=\"1.0\"\n"+
		"   width=\"%s\"\n"+
		"   height=\"%s\">\n"+
		"<g transform=\"matrix(1.25,0,0,-1.25,%s,%s)\">\n",
		decl,
		w, h,
		strm.Mul(mbox[0], "-1.25"),
		strm.Mul(mbox[3], "1.25"))
	cont := pd.ForcedArray(pd.Dic(pg[page])["/Contents"])
	_, ps := pd.DecodedStream(cont[0])
	drw.Interpret(fancy.SliceReader(ps))
	drw.Draw.CloseDrawing()
	drw.Write.Out("</g>\n</svg>\n")
	return drw.Write.Content
}
Exemple #3
0
func main() {
	a, _ := ioutil.ReadFile(os.Args[1])
	if a[0] == 128 {
		a = pfb.Decode(a)
	}
	g := type1.Read(fancy.SliceReader(a))
	fmt.Printf("%v\n", util.StringArray(g.St.Dump()))
	dumpT1(g)
}
Exemple #4
0
func (t *TypeOneI) op_ifelse(a [][]byte) {
	p := a[2]
	if string(a[0]) == "true" {
		p = a[1]
	}
	if len(p) > 2 && p[0] == '{' {
		proceed(t, fancy.SliceReader(p[1:len(p)-1]))
	}
}
Exemple #5
0
// Array() extracts an array from PDF data.
func Array(s []byte) [][]byte {
	if len(s) < 2 || s[0] != '[' || s[len(s)-1] != ']' {
		return nil
	}
	rdr := fancy.SliceReader(s[1 : len(s)-1])
	r := make([][]byte, MAX_PDF_ARRAYSIZE)
	b := 0
	for {
		r[b], _ = refToken(rdr)
		if len(r[b]) == 0 {
			break
		}
		b++
	}
	if b == 0 {
		return nil
	}
	return r[0:b]
}
Exemple #6
0
func (t *SvgTextT) cmap(font string) (r *cmapi.CharMapperT) {
	var ok bool
	if r, ok = t.cmaps[font]; ok {
		return
	}
	r = cm_identity // setup default
	if t.fonts == nil {
		t.fonts = t.Pdf.PageFonts(t.Pdf.Pages()[t.Page])
		if t.fonts == nil {
			return
		}
	}
	if dr, ok := t.fonts[font]; ok {
		d := t.Pdf.Dic(dr)
		if tu, ok := d["/ToUnicode"]; ok {
			_, cm := t.Pdf.DecodedStream(tu)
			r = cmapi.Read(fancy.SliceReader(cm))
			t.cmaps[font] = r
		}
	}
	return
}
Exemple #7
0
// Dictionary() makes a map/hash from PDF dictionary data.
func Dictionary(s []byte) DictionaryT {
	if len(s) < 4 {
		return nil
	}
	e := len(s) - 1
	if s[0] != s[1] || s[0] != '<' || s[e] != s[e-1] || s[e] != '>' {
		return nil
	}
	r := make(DictionaryT)
	rdr := fancy.SliceReader(s[2 : e-1])
	for {
		t, _ := ps.Token(rdr)
		if len(t) == 0 {
			break
		}
		if t[0] != '/' {
			return nil
		}
		k := string(t)
		t, _ = refToken(rdr)
		r[k] = t
	}
	return r
}
Exemple #8
0
func init() {
	var ops = map[string]func(t *TypeOneI){
		"array": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(t.NewArray(strm.Int(string(a), 1)))
		},
		"begin": func(t *TypeOneI) {
			a := t.St.Pop()
			if a[0] != 'D' {
				panic("Wrong dictionary!\n")
			}
			t.DicSp++
			t.DicSt[t.DicSp] = t.Dicts[strm.Int(string(a[1:]), 1)]
		},
		"bind": func(t *TypeOneI) {
		},
		"cleartomark": func(t *TypeOneI) {
			a := t.St.Pop()
			for string(a) != "mark" {
				a = t.St.Pop()
			}
		},
		"closefile": func(t *TypeOneI) {
			a := t.St.Pop()
			t.Done = true
			_ = a
		},
		"currentdict": func(t *TypeOneI) {
			t.St.Push(t.DicSt[t.DicSp].Name)
		},
		"currentfile": func(t *TypeOneI) {
			t.St.Push([]byte{'?'})
		},
		"def": func(t *TypeOneI) {
			a := t.St.Drop(2)
			t.DicSt[t.DicSp].Defs[string(a[0])] = a[1]
		},
		"definefont": func(t *TypeOneI) {
			a := t.St.Drop(2)
			t.Fonts[string(a[0])] = string(a[1])
			t.St.Push(util.Bytes("<FONT>")) // FIXME, we need this.
			_ = a
		},
		"defineresource": func(t *TypeOneI) {
			a := t.St.Drop(3)
			t.St.Push([]byte{'?'})
			_ = a
		},
		"dict": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(t.NewDic())
			_ = a
		},
		"dup": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(a)
			t.St.Push(a)
		},
		"eexec": func(t *TypeOneI) {
			a := t.St.Pop()
			b := eexec(t.Rdr)
			old := t.Rdr
			t.Rdr = fancy.SliceReader(b)
			//proceed(t, t.Rdr)
			t.Rdr = old
			t.Done = false
			_ = a
		},
		"end": func(t *TypeOneI) {
			t.DicSp--
		},
		"exch": func(t *TypeOneI) {
			a := t.St.Drop(2)
			a0 := a[0]
			t.St.Push(a[1])
			t.St.Push(a0)
		},
		"executeonly": func(t *TypeOneI) {
		},
		"findresource": func(t *TypeOneI) {
			a := t.St.Drop(2)
			t.St.Push([]byte{'?'})
			_ = a
		},
		"for": func(t *TypeOneI) {
			a := t.St.Drop(4)
			// FIXME
			_ = a
		},
		"get": func(t *TypeOneI) {
			a := t.St.Drop(2)
			i := strm.Int(string(a[0][1:]), 1)
			if a[0][0] == 'D' {
				t.St.Push(t.Dicts[i].Defs[string(a[1])])
			} else if a[0][0] == 'A' {
				t.St.Push(t.Arrays[i][strm.Int(string(a[1]), 1)])
			} else {
				panic("Can not 'get' from!\n")
			}
		},
		"if": func(t *TypeOneI) {
			a := t.St.Drop(2)
			t.op_ifelse([][]byte{a[0], a[1], []byte{}})
		},
		"ifelse": func(t *TypeOneI) {
			a := t.St.Drop(3)
			t.op_ifelse(a)
		},
		"index": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(t.St.Index(strm.Int(string(a), 1) + 1))
		},
		"known": func(t *TypeOneI) {
			a := t.St.Drop(2)
			t.St.Push(util.Bytes("false")) // FIX ME knows nothing ;)
			_ = a
		},
		"noaccess": func(t *TypeOneI) {
		},
		"pop": func(t *TypeOneI) {
			a := t.St.Pop()
			_ = a
		},
		"put": func(t *TypeOneI) {
			a := t.St.Drop(3)
			if a[0][0] == 'D' {
				t.Dicts[strm.Int(string(a[0][1:]), 1)].Defs[string(a[1])] = a[2]
			} else if a[0][0] == 'A' {
				t.Arrays[strm.Int(string(a[0][1:]), 1)][strm.Int(string(a[1]), 1)] = a[2]
			} else {
				panic("Wrong dictionary or array!\n")
			}
		},
		"readonly": func(t *TypeOneI) {
		},
		"readstring": func(t *TypeOneI) {
			a := t.St.Drop(2)
			c, _ := t.Rdr.Read(a[1])
			t.St.Push(a[1][0:c])
			t.St.Push(util.Bytes("true"))
		},
		"string": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(make([]byte, strm.Int(string(a), 1)))
		},
		"userdict": func(t *TypeOneI) {
			t.St.Push(util.Bytes("D0"))
		},
		"where": func(t *TypeOneI) {
			a := t.St.Pop()
			t.St.Push(util.Bytes("false"))
			_ = a
		},
	}

	for k, v := range ops {
		Ops[k] = v
	}
}
Exemple #9
0
// Load() loads a PDF file of a given name.
func Load(fn string) *PdfReaderT {
	var rr [][2]int // list of entries to resolve

	r := new(PdfReaderT)
	r.File = fn
	r.rdr = fancy.FileReader(fn)
	if r.rdr == nil {
		util.Log(fn, "FileReader error")
		return nil
	}

	v := make([]byte, 16)
	r.rdr.ReadAt(v, 0)

	if v[0] != '%' || v[1] != 'P' || v[2] != 'D' || v[3] != 'F' {
		util.Log(string(v), "not a PDF")
		r.rdr.Close()
		return nil
	}

	x := bytes.IndexByte(v, '\r')
	if x > 0 {
		v = v[:x]
	}
	r.Version = string(v)

	if r.Startxref = xrefStart(r.rdr); r.Startxref == -1 {
		util.Log(fn, "xrefStart error")
		r.rdr.Close()
		return nil
	}

	if r.Xref, r.Trailer = xrefReadTable(r.rdr, r.Startxref); r.Xref == nil {
		r.Xref, rr, r.Trailer = xrefReadStream(r.rdr, r.Startxref)
	}

	if r.Xref == nil {
		util.Log(fn, "xrefRead error")
		r.rdr.Close()
		return nil
	}

	if r.Trailer == nil {
		r.rdr.Seek(int64(xrefSkip(r.rdr, r.Startxref)), 0)

		s, _ := ps.Token(r.rdr)
		if string(s) != "trailer" {
			util.Log(fn, "no trailer")
			r.rdr.Close()
			return nil
		}
		s, _ = ps.Token(r.rdr)
		if r.Trailer = Dictionary(s); r.Trailer == nil {
			util.Log(fn, "no trailer dictionary")
			r.rdr.Close()
			return nil
		}
	}

	r.rcache = make(map[string][]byte)
	r.rncache = make(map[string]int)
	r.dicache = make(map[string]DictionaryT)

	if rr != nil {
		curr := -1

		var dic DictionaryT
		var s []byte

		for _, v := range rr {
			o, i := v[0], v[1]

			if o != curr {
				curr = o

				dic, s = r.DecodedStream(util.MakeRef(curr))

				first := num(dic["/First"])
				n := num(dic["/N"])

				rdr := fancy.SliceReader(s)

				p := tuple(rdr, n*2)

				util.Log("Object-Stream", curr)
				for i := 0; i < len(p); i += 2 {
					oo := num(p[i+0])
					offs := num(p[i+1])

					util.Log(oo, first+offs)

					rdr.Seek(int64(first+offs), 0)
					s, _ := ps.Token(rdr)
					util.Log(string(s))

					ref := string(util.MakeRef(oo))
					r.rcache[ref] = s
					r.rncache[ref] = -1
				}

			}

			util.Log(o, i)
		}
	}

	r.PageMode = string(r.Dic(r.Trailer["/Root"])["/PageMode"])

	return r
}
Exemple #10
0
func decodeStream(dic DictionaryT, data []byte) []byte {
	if f, ok := dic["/Filter"]; ok {
		filter := ForcedArray(f)
		var decos [][]byte
		if d, ok := dic["/DecodeParms"]; ok {
			decos = ForcedArray(d)
		} else {
			decos = make([][]byte, len(filter))
		}

		for ff := range filter {
			// XXX: if there are multiple filters but only one DecodeParams,
			//      it should be used for all filters

			deco := Dictionary(decos[ff])
			switch string(filter[ff]) {
			case "/FlateDecode":
				data = fancy.ReadAndClose(zlib.NewReader(fancy.SliceReader(data)))
			case "/LZWDecode":
				early := true
				if deco != nil {
					if s, ok := deco["/EarlyChange"]; ok {
						early = num(s) == 1
					}
				}
				data = lzw.Decode(data, early)
			case "/ASCII85Decode":
				ds := data
				for len(ds) > 1 && ds[len(ds)-1] < 33 {
					ds = ds[0 : len(ds)-1]
				}
				if len(ds) >= 2 && ds[len(ds)-1] == '>' && ds[len(ds)-2] == '~' {
					ds = ds[0 : len(ds)-2]
				}
				data = fancy.ReadAll(ascii85.NewDecoder(fancy.SliceReader(ds)))
			case "/ASCIIHexDecode":
				data, _ = hex.DecodeString(string(data))
			default:
				util.Log("Unsupported filter", string(filter[ff]))
				data = []byte{}
			}

			if s, ok := deco["/Predictor"]; ok {
				pred := num(s)

				switch {
				case pred == 1:
					// no predictor

				case pred > 10:
					colors := numdef(deco["/Colors"], 1)
					columns := numdef(deco["/Columns"], 1)
					bitspercomponent := numdef(deco["/BitsPerComponent"], 8)

					util.Log("applying predictor", pred, colors, columns, bitspercomponent)
					data = util.ApplyPNGPredictor(pred, colors, columns, bitspercomponent, data)

				default:
					util.Log("Unsupported predictor", pred)
					return nil
				}
			}
		}
	}
	return data
}
Exemple #11
0
func extract(pd *pdfread.PdfReaderT, page int, t *TiffBuilder, next bool) {
	pg := pd.Pages()[page-1]
	mbox := util.StringArray(pd.Arr(pd.Att("/MediaBox", pg)))
	fmt.Println("Page", page)
	fmt.Println("  MediaBox", mbox)

	resources := pd.Dic(pd.Att("/Resources", pg))
	if xo := pd.Dic(resources["/XObject"]); xo != nil {
		for name, ref := range xo {
			dic, data := pd.Stream(ref)
			printdic(dic, name, "  ")

			if string(dic["/Subtype"]) != "/Image" {
				continue
			}

			switch string(dic["/Filter"]) {
			case "/CCITTFaxDecode": // TIFF
				if string(dic["/ColorSpace"]) != "/DeviceGray" {
					log.Fatal("cannot convert /CCITTFaxDecode ", string(pd.Obj(dic["/ColorSpace"])))
				}

				dparms := pd.Dic(dic["/DecodeParms"])
				width := pd.Num(dparms["/Columns"])
				height := pd.Num(dparms["/Rows"])
				k := pd.Num(dparms["/K"])
				bpc := pd.Num(dic["/BitsPerComponent"])

				if k >= 0 {
					// can't do this right now
					log.Fatal("can't do encoding with K=", k)
				}

				t.AddLong(TAG_IMAGE_WIDTH, uint32(width))
				t.AddLong(TAG_IMAGE_LENGTH, uint32(height))
				t.AddShort(TAG_BITS_PER_SAMPLE, uint16(bpc))
				t.AddShort(TAG_COMPRESSION, 4)                // CCITT Group 4
				t.AddShort(TAG_PHOTOMETRIC_INTERPRETATION, 0) // white is zero
				t.AddLong(TAG_STRIP_OFFSETS, 0)
				//t.AddShort(TAG_ORIENTATION, 1)
				//t.AddShort(TAG_SAMPLES_PER_PIXEL, 1)
				t.AddLong(TAG_ROWS_PER_STRIP, uint32(height))
				t.AddLong(TAG_STRIP_BYTE_COUNTS, uint32(len(data)))
				//t.AddRational(TAG_X_RESOLUTION, 300, 1) // 300 dpi (300/1)
				//t.AddRational(TAG_Y_RESOLUTION, 300, 1) // 300 dpi (300/1)
				//t.AddShort(TAG_RESOLUTION_UNIT, 2)      // pixels/inch

				t.WriteIFD(data, next)

			case "/DCTDecode": // JPEG
				/*
					width := pd.Num(dic["/Width"])
					height := pd.Num(dic["/Height"])
					bpc := pd.Num(dic["/BitsPerComponent"])
				*/

				f, err := os.Create("test.jpg")
				if err != nil {
					log.Fatal(err)
				}

				f.Write(data)
				f.Close()

			case "/FlateDecode": // compressed bitmap
				data = fancy.ReadAndClose(zlib.NewReader(fancy.SliceReader(data)))
				width := pd.Num(dic["/Width"])
				height := pd.Num(dic["/Height"])
				bpc := pd.Num(dic["/BitsPerComponent"])

				if bpc != 8 {
					log.Fatal("cannot convert /FlateDecode bpc:", bpc)
				}

				if string(dic["/ColorSpace"]) != "/DeviceRGB" {
					log.Fatal("cannot convert /FlateDecode ", string(pd.Obj(dic["/ColorSpace"])))
				}

				ima := image.NewRGBA(image.Rect(0, 0, width, height))

				for y := 0; y < height; y++ {
					for x := 0; x < width; x++ {
						ima.Set(x, y, color.RGBA{R: data[0], G: data[1], B: data[2], A: 255})
						data = data[3:]
					}
				}

				f, err := os.Create("test.png")
				if err != nil {
					log.Fatal(err)
				}

				png.Encode(f, ima)
				f.Close()

			default:
				log.Fatal("cannot decode ", string(dic["/Filter"]))
			}
		}
	}
}