File: 0046-Revert-Implement-our-own-pdf-append-function-the-one.patch

package info (click to toggle)
calibre 8.5.0%2Bds-1
  • links: PTS, VCS
  • area: main
  • in suites: trixie
  • size: 431,828 kB
  • sloc: python: 450,752; ansic: 87,218; javascript: 57,667; cpp: 18,719; xml: 1,244; sh: 935; sql: 735; objc: 330; makefile: 68; sed: 3
file content (144 lines) | stat: -rw-r--r-- 6,513 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
From: YOKOTA Hiroshi <yokota.hgml@gmail.com>
Date: Fri, 30 Jun 2023 17:10:44 +0900
Subject: Revert "Implement our own pdf append function the one in PoDoFo is
 awful"

Forwarded: not-needed

This reverts commit 33bc00beb2931b0b9a1606d8b1222d9184a33fe4.
---
 src/calibre/utils/podofo/doc.cpp | 107 ++++-----------------------------------
 1 file changed, 10 insertions(+), 97 deletions(-)

diff --git a/src/calibre/utils/podofo/doc.cpp b/src/calibre/utils/podofo/doc.cpp
index 6f1220c..2177cd9 100644
--- a/src/calibre/utils/podofo/doc.cpp
+++ b/src/calibre/utils/podofo/doc.cpp
@@ -10,7 +10,6 @@
 #include <algorithm>
 #include <new>
 #include <string_view>
-#include <unordered_map>
 
 using namespace pdf;
 
@@ -331,109 +330,23 @@ PDFDoc_copy_page(PDFDoc *self, PyObject *args) {
 } // }}}
 
 // append() {{{
-
-static void
-fix_references(PdfObject &parent, const std::unordered_map<PdfReference, PdfObject*> &ref_map) {
-    switch(parent.GetDataType()) {
-        case PdfDataType::Dictionary:
-            for (auto& pair : parent.GetDictionary()) {
-                fix_references(pair.second, ref_map);
-            }
-            break;
-        case PdfDataType::Array:
-            for (auto& child : parent.GetArray())  fix_references(child, ref_map);
-            break;
-        case PdfDataType::Reference:
-            if (auto search = ref_map.find(parent.GetReference()); search != ref_map.end()) {
-                parent.SetReference(search->second->GetIndirectReference());
-            }
-            break;
-        default:
-            break;
-    }
-}
-
 static PyObject *
 PDFDoc_append(PDFDoc *self, PyObject *args) {
-    static const PdfName inheritableAttributes[] = {
-        PdfName("Resources"),
-        PdfName("MediaBox"),
-        PdfName("CropBox"),
-        PdfName("Rotate"),
-        PdfName::KeyNull
-    };
-    PdfMemDocument *dest = self->doc;
+    PyObject *doc;
+    int typ;
+
+    if (!PyArg_ParseTuple(args, "O", &doc)) return NULL;
+
+    typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType);
+    if (typ == -1) return NULL;
+    if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; }
+    PDFDoc *pdfdoc = (PDFDoc*)doc;
 
     try {
-        for (Py_ssize_t i = 0; i < PyTuple_GET_SIZE(args); i++) {
-            PyObject *doc = PyTuple_GET_ITEM(args, i);
-            int typ = PyObject_IsInstance(doc, (PyObject*)&PDFDocType);
-            if (typ == -1) return NULL;
-            if (typ == 0) { PyErr_SetString(PyExc_TypeError, "You must pass a PDFDoc instance to this method"); return NULL; }
-            const PdfMemDocument *src = ((PDFDoc*)doc)->doc;
-            std::unordered_map<PdfReference, PdfObject*> ref_map;
-            std::unordered_map<PdfReference, PdfReference> page_parent_map;
-            const unsigned initial_page_count = dest->GetPages().GetCount();
-            // append pages first
-            for (unsigned i = 0; i < src->GetPages().GetCount(); i++) {
-                const auto& src_page = src->GetPages().GetPageAt(i);
-                auto& dest_page = dest->GetPages().CreatePage(src_page.GetRect());
-                page_parent_map[dest_page.GetObject().GetIndirectReference()] = dest_page.GetDictionary().GetKeyAs<PdfReference>("Parent");
-                dest_page.GetObject() = src_page.GetObject();
-                dest_page.GetDictionary().RemoveKey("Resource");
-                dest_page.GetDictionary().RemoveKey("Parent");
-                ref_map[src_page.GetObject().GetIndirectReference()] = &dest_page.GetObject();
-            }
-            // append all remaining objects
-            for (const auto& obj : src->GetObjects()) {
-                if (obj->IsIndirect() && ref_map.find(obj->GetIndirectReference()) == ref_map.end()) {
-                    auto copied_obj = &dest->GetObjects().CreateObject(*obj);
-                    ref_map[obj->GetIndirectReference()] = copied_obj;
-                }
-            }
-            // fix references in appended objects
-            for (auto& elem : ref_map) fix_references(*elem.second, ref_map);
-            // fixup all pages
-            for (unsigned i = 0; i < src->GetPages().GetCount(); i++) {
-                auto& src_page = src->GetPages().GetPageAt(i);
-                auto& dest_page = dest->GetPages().GetPageAt(initial_page_count + i);
-                // Reset the parent to the correct value from the stored mapping
-                dest_page.GetDictionary().AddKey("Parent", page_parent_map[dest_page.GetObject().GetIndirectReference()]);
-                // Set the page contents
-                if (auto key = src_page.GetDictionary().GetKeyAs<PdfReference>(PdfName::KeyContents); key.IsIndirect()) {
-                    if (auto search = ref_map.find(key); search != ref_map.end()) {
-                        dest_page.GetOrCreateContents().Reset(search->second);
-                    }
-                }
-                // ensure the contents is not NULL to prevent segfaults in other code that assumes it
-                dest_page.GetOrCreateContents();
-
-                // Set the page resources
-                if (src_page.GetResources() != nullptr) {
-                    const auto &src_resources = src_page.GetResources()->GetDictionary();
-                    dest_page.GetOrCreateResources().GetDictionary() = src_resources;
-                    fix_references(dest_page.GetResources()->GetObject(), ref_map);
-                } else dest_page.GetOrCreateResources();
-
-                // Copy inherited properties
-                auto inherited = inheritableAttributes;
-                while (!inherited->IsNull()) {
-                    auto attribute = src_page.GetDictionary().FindKeyParent(*inherited);
-                    if (attribute != nullptr) {
-                        PdfObject attributeCopy(*attribute);
-                        fix_references(attributeCopy, ref_map);
-                        dest_page.GetDictionary().AddKey(*inherited, attributeCopy);
-                    }
-                    inherited++;
-                }
-            }
-        }
+        self->doc->GetPages().AppendDocumentPages(*pdfdoc->doc);
     } catch (const PdfError & err) {
         podofo_set_exception(err);
         return NULL;
-    } catch (std::exception & err) {
-        PyErr_Format(PyExc_ValueError, "An error occurred while trying to append pages: %s", err.what());
-        return NULL;
     }
     Py_RETURN_NONE;
 } // }}}