1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
|
# encoding: utf-8
"""Custom element classes for core properties-related XML elements"""
from __future__ import (
absolute_import, division, print_function, unicode_literals
)
import re
from datetime import datetime, timedelta
from docx.compat import is_string
from docx.oxml import parse_xml
from docx.oxml.ns import nsdecls, qn
from docx.oxml.xmlchemy import BaseOxmlElement, ZeroOrOne
class CT_CoreProperties(BaseOxmlElement):
"""
``<cp:coreProperties>`` element, the root element of the Core Properties
part stored as ``/docProps/core.xml``. Implements many of the Dublin Core
document metadata elements. String elements resolve to an empty string
('') if the element is not present in the XML. String elements are
limited in length to 255 unicode characters.
"""
category = ZeroOrOne('cp:category', successors=())
contentStatus = ZeroOrOne('cp:contentStatus', successors=())
created = ZeroOrOne('dcterms:created', successors=())
creator = ZeroOrOne('dc:creator', successors=())
description = ZeroOrOne('dc:description', successors=())
identifier = ZeroOrOne('dc:identifier', successors=())
keywords = ZeroOrOne('cp:keywords', successors=())
language = ZeroOrOne('dc:language', successors=())
lastModifiedBy = ZeroOrOne('cp:lastModifiedBy', successors=())
lastPrinted = ZeroOrOne('cp:lastPrinted', successors=())
modified = ZeroOrOne('dcterms:modified', successors=())
revision = ZeroOrOne('cp:revision', successors=())
subject = ZeroOrOne('dc:subject', successors=())
title = ZeroOrOne('dc:title', successors=())
version = ZeroOrOne('cp:version', successors=())
_coreProperties_tmpl = (
'<cp:coreProperties %s/>\n' % nsdecls('cp', 'dc', 'dcterms')
)
@classmethod
def new(cls):
"""
Return a new ``<cp:coreProperties>`` element
"""
xml = cls._coreProperties_tmpl
coreProperties = parse_xml(xml)
return coreProperties
@property
def author_text(self):
"""
The text in the `dc:creator` child element.
"""
return self._text_of_element('creator')
@author_text.setter
def author_text(self, value):
self._set_element_text('creator', value)
@property
def category_text(self):
return self._text_of_element('category')
@category_text.setter
def category_text(self, value):
self._set_element_text('category', value)
@property
def comments_text(self):
return self._text_of_element('description')
@comments_text.setter
def comments_text(self, value):
self._set_element_text('description', value)
@property
def contentStatus_text(self):
return self._text_of_element('contentStatus')
@contentStatus_text.setter
def contentStatus_text(self, value):
self._set_element_text('contentStatus', value)
@property
def created_datetime(self):
return self._datetime_of_element('created')
@created_datetime.setter
def created_datetime(self, value):
self._set_element_datetime('created', value)
@property
def identifier_text(self):
return self._text_of_element('identifier')
@identifier_text.setter
def identifier_text(self, value):
self._set_element_text('identifier', value)
@property
def keywords_text(self):
return self._text_of_element('keywords')
@keywords_text.setter
def keywords_text(self, value):
self._set_element_text('keywords', value)
@property
def language_text(self):
return self._text_of_element('language')
@language_text.setter
def language_text(self, value):
self._set_element_text('language', value)
@property
def lastModifiedBy_text(self):
return self._text_of_element('lastModifiedBy')
@lastModifiedBy_text.setter
def lastModifiedBy_text(self, value):
self._set_element_text('lastModifiedBy', value)
@property
def lastPrinted_datetime(self):
return self._datetime_of_element('lastPrinted')
@lastPrinted_datetime.setter
def lastPrinted_datetime(self, value):
self._set_element_datetime('lastPrinted', value)
@property
def modified_datetime(self):
return self._datetime_of_element('modified')
@modified_datetime.setter
def modified_datetime(self, value):
self._set_element_datetime('modified', value)
@property
def revision_number(self):
"""
Integer value of revision property.
"""
revision = self.revision
if revision is None:
return 0
revision_str = revision.text
try:
revision = int(revision_str)
except ValueError:
# non-integer revision strings also resolve to 0
revision = 0
# as do negative integers
if revision < 0:
revision = 0
return revision
@revision_number.setter
def revision_number(self, value):
"""
Set revision property to string value of integer *value*.
"""
if not isinstance(value, int) or value < 1:
tmpl = "revision property requires positive int, got '%s'"
raise ValueError(tmpl % value)
revision = self.get_or_add_revision()
revision.text = str(value)
@property
def subject_text(self):
return self._text_of_element('subject')
@subject_text.setter
def subject_text(self, value):
self._set_element_text('subject', value)
@property
def title_text(self):
return self._text_of_element('title')
@title_text.setter
def title_text(self, value):
self._set_element_text('title', value)
@property
def version_text(self):
return self._text_of_element('version')
@version_text.setter
def version_text(self, value):
self._set_element_text('version', value)
def _datetime_of_element(self, property_name):
element = getattr(self, property_name)
if element is None:
return None
datetime_str = element.text
try:
return self._parse_W3CDTF_to_datetime(datetime_str)
except ValueError:
# invalid datetime strings are ignored
return None
def _get_or_add(self, prop_name):
"""
Return element returned by 'get_or_add_' method for *prop_name*.
"""
get_or_add_method_name = 'get_or_add_%s' % prop_name
get_or_add_method = getattr(self, get_or_add_method_name)
element = get_or_add_method()
return element
@classmethod
def _offset_dt(cls, dt, offset_str):
"""
Return a |datetime| instance that is offset from datetime *dt* by
the timezone offset specified in *offset_str*, a string like
``'-07:00'``.
"""
match = cls._offset_pattern.match(offset_str)
if match is None:
raise ValueError(
"'%s' is not a valid offset string" % offset_str
)
sign, hours_str, minutes_str = match.groups()
sign_factor = -1 if sign == '+' else 1
hours = int(hours_str) * sign_factor
minutes = int(minutes_str) * sign_factor
td = timedelta(hours=hours, minutes=minutes)
return dt + td
_offset_pattern = re.compile(r'([+-])(\d\d):(\d\d)')
@classmethod
def _parse_W3CDTF_to_datetime(cls, w3cdtf_str):
# valid W3CDTF date cases:
# yyyy e.g. '2003'
# yyyy-mm e.g. '2003-12'
# yyyy-mm-dd e.g. '2003-12-31'
# UTC timezone e.g. '2003-12-31T10:14:55Z'
# numeric timezone e.g. '2003-12-31T10:14:55-08:00'
templates = (
'%Y-%m-%dT%H:%M:%S',
'%Y-%m-%d',
'%Y-%m',
'%Y',
)
# strptime isn't smart enough to parse literal timezone offsets like
# '-07:30', so we have to do it ourselves
parseable_part = w3cdtf_str[:19]
offset_str = w3cdtf_str[19:]
dt = None
for tmpl in templates:
try:
dt = datetime.strptime(parseable_part, tmpl)
except ValueError:
continue
if dt is None:
tmpl = "could not parse W3CDTF datetime string '%s'"
raise ValueError(tmpl % w3cdtf_str)
if len(offset_str) == 6:
return cls._offset_dt(dt, offset_str)
return dt
def _set_element_datetime(self, prop_name, value):
"""
Set date/time value of child element having *prop_name* to *value*.
"""
if not isinstance(value, datetime):
tmpl = (
"property requires <type 'datetime.datetime'> object, got %s"
)
raise ValueError(tmpl % type(value))
element = self._get_or_add(prop_name)
dt_str = value.strftime('%Y-%m-%dT%H:%M:%SZ')
element.text = dt_str
if prop_name in ('created', 'modified'):
# These two require an explicit 'xsi:type="dcterms:W3CDTF"'
# attribute. The first and last line are a hack required to add
# the xsi namespace to the root element rather than each child
# element in which it is referenced
self.set(qn('xsi:foo'), 'bar')
element.set(qn('xsi:type'), 'dcterms:W3CDTF')
del self.attrib[qn('xsi:foo')]
def _set_element_text(self, prop_name, value):
"""Set string value of *name* property to *value*."""
if not is_string(value):
value = str(value)
if len(value) > 255:
tmpl = (
"exceeded 255 char limit for property, got:\n\n'%s'"
)
raise ValueError(tmpl % value)
element = self._get_or_add(prop_name)
element.text = value
def _text_of_element(self, property_name):
"""
Return the text in the element matching *property_name*, or an empty
string if the element is not present or contains no text.
"""
element = getattr(self, property_name)
if element is None:
return ''
if element.text is None:
return ''
return element.text
|