1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
|
From: YOKOTA Hiroshi <yokota.hgml@gmail.com>
Date: Fri, 30 Jun 2023 17:10:44 +0900
Subject: Revert "Implement our own pdf append function the one in PoDoFo is
awful"
Forwarded: not-needed
This reverts commit 33bc00beb2931b0b9a1606d8b1222d9184a33fe4.
---
src/calibre/utils/podofo/doc.cpp | 107 ++++-----------------------------------
1 file changed, 10 insertions(+), 97 deletions(-)
diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 6f1220c..2177cd9 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -10,7 +10,6 @@
#include <algorithm>
#include <new>
#include <string_view>
-#include <unordered_map>
using namespace pdf;
@@ -331,109 +330,23 @@ PDFDoc_copy_page(PDFDoc *self, PyObject *args) {
} // }}}
// append() {{{
-
-static void
-fix_references(PdfObject &parent, const std::unordered_map<PdfReference, PdfObject*> &ref_map) {
- switch(parent.GetDataType()) {
- case PdfDataType::Dictionary:
- for (auto& pair : parent.GetDictionary()) {
- fix_references(pair.second, ref_map);
- }
- break;
- case PdfDataType::Array:
- for (auto& child : parent.GetArray()) fix_references(child, ref_map);
- break;
- case PdfDataType::Reference:
- if (auto search = ref_map.find(parent.GetReference()); search != ref_map.end()) {
- parent.SetReference(search->second->GetIndirectReference());
- }
- break;
- default:
- break;
- }
-}
-
static PyObject *
PDFDoc_append(PDFDoc *self, PyObject *args) {
- static const PdfName inheritableAttributes[] = {
- PdfName("Resources"),
- PdfName("MediaBox"),
- PdfName("CropBox"),
- PdfName("Rotate"),
- PdfName::KeyNull
- };
- PdfMemDocument *dest = self->doc;
+ PyObject *doc;
+ int typ;
+
+ if (!PyArg_ParseTuple(args, "O", &doc)) return NULL;
+
+ typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType);
+ if (typ == -1) return NULL;
+ if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; }
+ PDFDoc *pdfdoc = (PDFDoc*)doc;
try {
- for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(args); i++) {
- PyObject *doc = PyTuple_GET_ITEM(args, i);
- int typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType);
- if (typ == -1) return NULL;
- if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; }
- const PdfMemDocument *src = ((PDFDoc*)doc)->doc;
- std::unordered_map<PdfReference, PdfObject*> ref_map;
- std::unordered_map<PdfReference, PdfReference> page_parent_map;
- const unsigned initial_page_count = dest->GetPages().GetCount();
- // append pages first
- for (unsigned i = 0; i < src->GetPages().GetCount(); i++) {
- const auto& src_page = src->GetPages().GetPageAt(i);
- auto& dest_page = dest->GetPages().CreatePage(src_page.GetRect());
- page_parent_map[dest_page.GetObject().GetIndirectReference()] = dest_page.GetDictionary().GetKeyAs<PdfReference>("Parent");
- dest_page.GetObject() = src_page.GetObject();
- dest_page.GetDictionary().RemoveKey("Resource");
- dest_page.GetDictionary().RemoveKey("Parent");
- ref_map[src_page.GetObject().GetIndirectReference()] = &dest_page.GetObject();
- }
- // append all remaining objects
- for (const auto& obj : src->GetObjects()) {
- if (obj->IsIndirect() && ref_map.find(obj->GetIndirectReference()) == ref_map.end()) {
- auto copied_obj = &dest->GetObjects().CreateObject(*obj);
- ref_map[obj->GetIndirectReference()] = copied_obj;
- }
- }
- // fix references in appended objects
- for (auto& elem : ref_map) fix_references(*elem.second, ref_map);
- // fixup all pages
- for (unsigned i = 0; i < src->GetPages().GetCount(); i++) {
- auto& src_page = src->GetPages().GetPageAt(i);
- auto& dest_page = dest->GetPages().GetPageAt(initial_page_count + i);
- // Reset the parent to the correct value from the stored mapping
- dest_page.GetDictionary().AddKey("Parent", page_parent_map[dest_page.GetObject().GetIndirectReference()]);
- // Set the page contents
- if (auto key = src_page.GetDictionary().GetKeyAs<PdfReference>(PdfName::KeyContents); key.IsIndirect()) {
- if (auto search = ref_map.find(key); search != ref_map.end()) {
- dest_page.GetOrCreateContents().Reset(search->second);
- }
- }
- // ensure the contents is not NULL to prevent segfaults in other code that assumes it
- dest_page.GetOrCreateContents();
-
- // Set the page resources
- if (src_page.GetResources() != nullptr) {
- const auto &src_resources = src_page.GetResources()->GetDictionary();
- dest_page.GetOrCreateResources().GetDictionary() = src_resources;
- fix_references(dest_page.GetResources()->GetObject(), ref_map);
- } else dest_page.GetOrCreateResources();
-
- // Copy inherited properties
- auto inherited = inheritableAttributes;
- while (!inherited->IsNull()) {
- auto attribute = src_page.GetDictionary().FindKeyParent(*inherited);
- if (attribute != nullptr) {
- PdfObject attributeCopy(*attribute);
- fix_references(attributeCopy, ref_map);
- dest_page.GetDictionary().AddKey(*inherited, attributeCopy);
- }
- inherited++;
- }
- }
- }
+ self->doc->GetPages().AppendDocumentPages(*pdfdoc->doc);
} catch (const PdfError & err) {
podofo_set_exception(err);
return NULL;
- } catch (std::exception & err) {
- PyErr_Format(PyExc_ValueError, "An error occurred while trying to append pages: %s", err.what());
- return NULL;
}
Py_RETURN_NONE;
} // }}}
|