func mergePageTrees(file *pdf.File, catalogs []pdf.Dictionary) pdf.ObjectReference { // reserve a reference for the new page tree root // needed to set the parent for the old page tree roots pageTreeRef, err := file.Add(pdf.Null{}) if err != nil { log.Fatalln(err) } // use the old page tree roots as our page tree kids kids := pdf.Array{} pageCount := pdf.Integer(0) for _, catalog := range catalogs { // add the old page tree root to our list of kids pagesRef := catalog["Pages"].(pdf.ObjectReference) kids = append(kids, pagesRef) // now that the old page tree root is a kid, it needs a parent pages := file.Get(pagesRef).(pdf.Dictionary) pages["Parent"] = pageTreeRef _, err = file.Add(pdf.IndirectObject{ ObjectReference: pagesRef, Object: pages, }) if err != nil { log.Fatalln(err) } pageCount += pages["Count"].(pdf.Integer) } // create the merged page tree _, err = file.Add(pdf.IndirectObject{ ObjectReference: pageTreeRef, Object: pdf.Dictionary{ "Type": pdf.Name("Pages"), "Kids": kids, "Count": pageCount, }, }) if err != nil { log.Fatalln(err) } return pageTreeRef }
func main() { log.SetFlags(log.Lshortfile) // Process arguments if len(os.Args) != 2 { log.Fatalln("Usage: single [file.pdf]") } filename := os.Args[1] // open pdf document single, err := pdf.Open(filename) if err != nil { log.Fatalln(err) } defer single.Close() // create references to input pages catalog := single.Get(single.Root).(pdf.Dictionary) pages := getPages(single, catalog["Pages"].(pdf.ObjectReference)) // output to A4 paper_width := 595.224 paper_height := 841.824 // assume that all pages are the same size media_box_obj := pages[0].Object.(pdf.Dictionary)["MediaBox"] var media_box pdf.Array if media_box_obj == nil { // the first page inherits its MediaBox, therefore get it from the root pages_ref := catalog["Pages"].(pdf.ObjectReference) pages := single.Get(pages_ref) media_box = pages.(pdf.Dictionary)["MediaBox"].(pdf.Array) } else { media_box = media_box_obj.(pdf.Array) } var page_width float64 switch typed := media_box[2].(type) { case pdf.Real: page_width = float64(typed) case pdf.Integer: page_width = float64(typed) default: panic(reflect.TypeOf(typed).Name()) } var page_height float64 switch typed := media_box[3].(type) { case pdf.Real: page_height = float64(typed) case pdf.Integer: page_height = float64(typed) default: panic(reflect.TypeOf(typed).Name()) } num_pages := len(pages) // assuming that all the pages are the same size // the sum of the page areas must fit in the paper area // paper_area >= scale_factor² * num_pages * page_area paper_area := paper_width * paper_height page_area := page_width * page_height scale_factor := math.Sqrt(paper_area / float64(num_pages) / page_area) scaled_page_width := scale_factor * page_width nx := int(math.Ceil(paper_width / scaled_page_width)) ny := num_pages / nx for (nx * ny) < num_pages { ny++ } // adjust scale_factor to fit the new page count scale_factor_width := paper_width / float64(nx) / page_width scale_factor_height := paper_height / float64(ny) / page_height if scale_factor_width > scale_factor_height { scale_factor = scale_factor_height } else { scale_factor = scale_factor_width } xobjects := pdf.Dictionary{} stream := &bytes.Buffer{} // content stream for the single page // move to upper left fmt.Fprintf(stream, "1 0 0 1 %v %v cm ", 0, paper_height-(page_height*scale_factor)) // if the pages won't fill up the paper, center them on the paper top_margin := (paper_height - (scale_factor * page_height * float64(ny))) / 2.0 left_margin := (paper_width - (scale_factor * page_width * float64(nx))) / 2.0 fmt.Fprintf(stream, "1 0 0 1 %v %v cm ", left_margin, -top_margin) // scale the pages fmt.Fprintf(stream, "%v 0 0 %v 0 0 cm ", scale_factor, scale_factor) for page_num, page := range pages { page := page.Object.(pdf.Dictionary) page["Type"] = pdf.Name("XObject") page["Subtype"] = pdf.Name("Form") page["BBox"] = media_box // consolidate the contents contents := []byte{} switch typed := page["Contents"].(type) { case pdf.ObjectReference: page_contents_obj := single.Get(typed) page_contents := page_contents_obj.(pdf.Stream) contents = page_contents.Stream page["Filter"] = page_contents.Dictionary["Filter"] case pdf.Array: if len(typed) == 1 { page_contents_obj := single.Get(typed[0].(pdf.ObjectReference)) page_contents := page_contents_obj.(pdf.Stream) contents = page_contents.Stream page["Filter"] = page_contents.Dictionary["Filter"] } else { for _, page_contents_ref := range typed { page_contents_obj := single.Get(page_contents_ref.(pdf.ObjectReference)) decoded, err := page_contents_obj.(pdf.Stream).Decode() if err != nil { log.Fatalln(err) } contents = append(contents, decoded...) } } default: panic(reflect.TypeOf(typed).Name()) } // add the xobject to the pdf xobj_ref, err := single.Add(pdf.Stream{ Dictionary: page, Stream: contents, }) if err != nil { log.Fatalln(err) } // draw the page page_name := fmt.Sprintf("Page%d", page_num) xobjects[pdf.Name(page_name)] = xobj_ref stream.WriteString("/" + page_name + " Do ") // draw rectangle around the page fmt.Fprintf(stream, "0 0 %v %v re S ", page_width, page_height) // move to where the next page goes if (page_num+1)%nx == 0 { // move to first page of next line of pages fmt.Fprintf(stream, "1 0 0 1 %v %v cm ", -page_width*float64(nx-1), -page_height) } else { // next page in same line fmt.Fprintf(stream, "1 0 0 1 %v %v cm ", page_width, 0) } } // Pages for single single_pages := pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Pages"), } single_pages_ref, err := single.Add(single_pages) if err != nil { log.Fatalln(err) } // content for single page contents := pdf.Stream{ Stream: stream.Bytes(), } contents_ref, err := single.Add(contents) if err != nil { log.Fatalln(err) } // add page single_page := pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Page"), pdf.Name("Parent"): single_pages_ref, pdf.Name("Resources"): pdf.Dictionary{ pdf.Name("XObject"): xobjects, }, pdf.Name("MediaBox"): pdf.Array{ pdf.Integer(0), pdf.Integer(0), pdf.Real(paper_width), // width pdf.Real(paper_height), // height }, pdf.Name("Contents"): contents_ref, } single_page_ref, err := single.Add(single_page) if err != nil { log.Fatalln(err) } // update pages list single_pages["Kids"] = pdf.Array{single_page_ref} single_pages["Count"] = pdf.Integer(1) _, err = single.Add(pdf.IndirectObject{ ObjectReference: single_pages_ref, Object: single_pages, }) if err != nil { log.Fatalln(err) } // catalog for single catalog["Pages"] = single_pages_ref _, err = single.Add(pdf.IndirectObject{ ObjectReference: single.Root, Object: catalog, }) if err != nil { log.Fatalln(err) } // close files err = single.Save() if err != nil { log.Fatalln(err) } }
func main() { log.SetFlags(log.Lshortfile) binding := flag.String("binding", "chapbook", "Type of binding to generate {perfect, chapbook, none}. Default is chapbook.") flag.Parse() switch *binding { case "chapbook", "perfect", "none": // no-op default: usage() } // Process arguments if flag.NArg() != 1 { usage() } filename := flag.Arg(0) // open pdf document book, err := pdf.Open(filename) if err != nil { log.Fatalln(err) } defer book.Close() // get the pdf page references pagesRef := book.Get(book.Root).(pdf.Dictionary)["Pages"].(pdf.ObjectReference) pages := getPages(book, pagesRef) // assuming that all pages are the same size, figure out the // media box that will be the bbox of the xobject mediaBoxObj := pages[0]["MediaBox"] var mediaBox pdf.Array if mediaBoxObj == nil { // the first page inherits its MediaBox, therefore get it from the root pages := book.Get(pagesRef) mediaBox = pages.(pdf.Dictionary)["MediaBox"].(pdf.Array) } else { mediaBox = mediaBoxObj.(pdf.Array) } // change the pages to xobjects pageXobjects := []pdf.ObjectReference{} for _, page := range pages { page["Type"] = pdf.Name("XObject") page["Subtype"] = pdf.Name("Form") page["BBox"] = mediaBox // consolidate the contents into the xobject stream contents := []byte{} switch typed := page["Contents"].(type) { case pdf.ObjectReference: pageContents := book.Get(typed).(pdf.Stream) contents = pageContents.Stream page["Filter"] = pageContents.Dictionary["Filter"] case pdf.Array: if len(typed) == 1 { pageContents := book.Get(typed[0].(pdf.ObjectReference)).(pdf.Stream) contents = pageContents.Stream page["Filter"] = pageContents.Dictionary["Filter"] } else { for _, pageContentsRef := range typed { pageContents := book.Get(pageContentsRef.(pdf.ObjectReference)).(pdf.Stream) decoded, err := pageContents.Decode() if err != nil { log.Fatalln(err) } contents = append(contents, decoded...) } } default: panic(reflect.TypeOf(typed).Name()) } delete(page, "Contents") // add the xobject to the pdf xobjRef, err := book.Add(pdf.Stream{ Dictionary: page, Stream: contents, }) if err != nil { log.Fatalln(err) } pageXobjects = append(pageXobjects, xobjRef) } // figure out how many pages to layout for numDocumentPages := len(pages) numPagesToLayout := numDocumentPages switch *binding { case "perfect", "chapbook": if (numPagesToLayout % 4) != 0 { numPagesToLayout = numDocumentPages + (4 - (numDocumentPages % 4)) } case "none": numDocumentPages++ if (numPagesToLayout % 2) != 0 { numPagesToLayout++ } } // layout on landscape version of page size paperHeight := toFloat64(mediaBox[3]) // same height as the original page paperWidth := toFloat64(mediaBox[2]) * 2.0 // twice the width of the original page // layout the pages layedOutPages := pdf.Array{} stream := &bytes.Buffer{} xobjects := pdf.Dictionary{} showPage := false flipNextPage := true for pageToLayout := 0; pageToLayout < numPagesToLayout; pageToLayout++ { var pageNum int switch *binding { case "perfect": // determine the real page number for perfect bound books pageNum = pageToLayout - 1 if pageToLayout%4 == 0 { pageNum += 4 } case "chapbook": // determine the real page number for chapbooks pageNum = pageToLayout / 2 if pageToLayout%2 == 1 { pageNum = numPagesToLayout - pageNum - 1 } case "none": pageNum = pageToLayout - 1 flipNextPage = false default: log.Println("unhandled binding:", *binding) usage() } // only render non-blank pages if pageNum < numDocumentPages && pageNum >= 0 { fmt.Fprintf(stream, "q ") // horizontal offset for recto (odd) pages // this correctly handles 0 based indexes for 1 based page numbers if pageNum%2 == 0 { fmt.Fprintf(stream, "1 0 0 1 %v %v cm ", paperWidth/2.0, 0) } // render the page pageName := fmt.Sprintf("Page%d", pageNum) xobjects[pdf.Name(pageName)] = pageXobjects[pageNum] fmt.Fprintf(stream, "/%v Do Q ", pageName) } // emit layouts after drawing both pages if showPage { // content for book page contents := pdf.Stream{ Stream: stream.Bytes(), } contentsRef, err := book.Add(contents) if err != nil { log.Fatalln(err) } // add page bookPage := pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Page"), pdf.Name("Parent"): pagesRef, pdf.Name("Resources"): pdf.Dictionary{ pdf.Name("XObject"): xobjects, }, pdf.Name("Contents"): contentsRef, } bookPageRef, err := book.Add(bookPage) if err != nil { log.Fatalln(err) } layedOutPages = append(layedOutPages, bookPageRef) // reset the stream and xobjects stream = &bytes.Buffer{} xobjects = pdf.Dictionary{} // flip the next page over if flipNextPage { fmt.Fprintf(stream, "%f %f %f %f %v %v cm ", math.Cos(math.Pi), math.Sin(math.Pi), -math.Sin(math.Pi), math.Cos(math.Pi), paperWidth, paperHeight, ) } flipNextPage = !flipNextPage } showPage = !showPage } // Page tree for book bookPages := pdf.Dictionary{ "Type": pdf.Name("Pages"), "Kids": layedOutPages, "Count": pdf.Integer(len(layedOutPages)), "MediaBox": pdf.Array{ pdf.Integer(0), pdf.Integer(0), pdf.Real(paperWidth), // width pdf.Real(paperHeight), // height }, } _, err = book.Add(pdf.IndirectObject{ ObjectReference: pagesRef, Object: bookPages, }) if err != nil { log.Fatalln(err) } // save err = book.Save() if err != nil { log.Fatalln(err) } }
// Stage 4: Add Three Annotations func stage4() { log.Println("stage 4") minimal, err := pdf.Open("h7-minimal.pdf") if err != nil { log.Fatalln(errgo.Details(err)) } annotationsObj := minimal.Get(pdf.ObjectReference{ObjectNumber: 7}) annotations := annotationsObj.(pdf.Array) // annotation 8 1 ref, err := minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 8, GenerationNumber: 1}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(58), pdf.Integer(657), pdf.Integer(172), pdf.Integer(742), }, pdf.Name("Contents"): pdf.String("New Text #1"), pdf.Name("Open"): pdf.Boolean(true), }, }) if err != nil { log.Fatal(errgo.Details(err)) } annotations = append(annotations, ref) // annotation 9 1 ref, err = minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 9, GenerationNumber: 1}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(389), pdf.Integer(459), pdf.Integer(570), pdf.Integer(537), }, pdf.Name("Contents"): pdf.String("New Text #2"), pdf.Name("Open"): pdf.Boolean(false), }, }) if err != nil { log.Fatal(errgo.Details(err)) } annotations = append(annotations, ref) // annotation 12 0 ref, err = minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 12}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(44), pdf.Integer(253), pdf.Integer(473), pdf.Integer(337), }, pdf.Name("Contents"): pdf.String("New Text #3\\203a longer text annotation which we will continue \\\nonto a second line"), pdf.Name("Open"): pdf.Boolean(true), }, }) if err != nil { log.Fatal(errgo.Details(err)) } annotations = append(annotations, ref) // update the annotations array minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 7}, Object: annotations, }) err = minimal.Save() if err != nil { log.Fatalln(errgo.Details(err)) } }
// create the minimal file described in H.2 func createMinimalFile() { log.Printf("createMinimalFile") minimal, err := pdf.Create("h7-minimal.pdf") if err != nil { log.Fatalln(errgo.Details(err)) } defer minimal.Close() minimal.Root = pdf.ObjectReference{ObjectNumber: 1} // catalog minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 1}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Catalog"), pdf.Name("Outlines"): pdf.ObjectReference{ ObjectNumber: 2, }, pdf.Name("Pages"): pdf.ObjectReference{ ObjectNumber: 3, }, }, }) // outlines minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 2}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Outlines"), pdf.Name("Count"): pdf.Integer(0), }, }) // pages minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 3}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Pages"), pdf.Name("Kids"): pdf.Array{ pdf.ObjectReference{ ObjectNumber: 4, }, }, pdf.Name("Count"): pdf.Integer(1), }, }) // page minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 4}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Page"), pdf.Name("Parent"): pdf.ObjectReference{ ObjectNumber: 3, }, pdf.Name("MediaBox"): pdf.Array{ pdf.Integer(0), pdf.Integer(0), pdf.Integer(612), pdf.Integer(792), }, pdf.Name("Contents"): pdf.ObjectReference{ ObjectNumber: 5, }, pdf.Name("Resources"): pdf.Dictionary{ pdf.Name("ProcSet"): pdf.ObjectReference{ ObjectNumber: 6, }, }, }, }) // content stream minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 5}, Object: pdf.Stream{ Dictionary: pdf.Dictionary{ pdf.Name("Length"): pdf.Integer(0), }, }, }) // procset minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 6}, Object: pdf.Array{ pdf.Name("PDF"), }, }) minimal.Root = pdf.ObjectReference{ObjectNumber: 1} err = minimal.Save() if err != nil { log.Fatalln(errgo.Details(err)) } }
// Stage 1: Add Four Text Annotations func stage1() { log.Println("stage 1") minimal, err := pdf.Open("h7-minimal.pdf") if err != nil { log.Fatalln(errgo.Details(err)) } // page page := minimal.Get(pdf.ObjectReference{ObjectNumber: 4}).(pdf.Dictionary) page[pdf.Name("Annots")] = pdf.ObjectReference{ObjectNumber: 7} minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 4}, Object: page, }) // annotation array minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 7}, Object: pdf.Array{ pdf.ObjectReference{ObjectNumber: 8}, pdf.ObjectReference{ObjectNumber: 9}, pdf.ObjectReference{ObjectNumber: 10}, pdf.ObjectReference{ObjectNumber: 11}, }, }) // annotation minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 8}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(44), pdf.Integer(616), pdf.Integer(162), pdf.Integer(735), }, pdf.Name("Contents"): pdf.String("Text #1"), pdf.Name("Open"): pdf.Boolean(true), }, }) // annotation minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 9}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(224), pdf.Integer(668), pdf.Integer(457), pdf.Integer(735), }, pdf.Name("Contents"): pdf.String("Text #2"), pdf.Name("Open"): pdf.Boolean(false), }, }) // annotation minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 10}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(239), pdf.Integer(393), pdf.Integer(328), pdf.Integer(622), }, pdf.Name("Contents"): pdf.String("Text #3"), pdf.Name("Open"): pdf.Boolean(true), }, }) // annotation minimal.Add(pdf.IndirectObject{ ObjectReference: pdf.ObjectReference{ObjectNumber: 11}, Object: pdf.Dictionary{ pdf.Name("Type"): pdf.Name("Annot"), pdf.Name("Subtype"): pdf.Name("Text"), pdf.Name("Rect"): pdf.Array{ pdf.Integer(34), pdf.Integer(398), pdf.Integer(225), pdf.Integer(575), }, pdf.Name("Contents"): pdf.String("Text #4"), pdf.Name("Open"): pdf.Boolean(false), }, }) err = minimal.Save() if err != nil { log.Fatalln(errgo.Details(err)) } }