1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
|
"""Implements the PRECIS string classes."""
from precis_i18n.context import context_rule_error
from precis_i18n.derived import CONTEXTJ, CONTEXTO, FREE_PVAL, PVALID, derived_property
class BaseClass:
"""Abstract base class for all String classes in PRECIS framework.
Subclasses must set `_allowed` to a tuple of derived property names. For
example, `_allowed = (PVALID, )`.
Args:
ucd (UnicodeData): Unicode character database.
name (str): String class name.
Attributes:
ucd (UnicodeData): Unicode character database.
name (str): String class name.
"""
_allowed = ()
def __init__(self, ucd, name=None):
self.ucd = ucd
self.name = name or self.__class__.__name__
def enforce(self, value, codec_name=None):
"""Ensure that all characters in `value` are allowed by the string class.
Args:
value (str): String value to enforce.
codec_name (Optional[str]): Codec name to report in exceptions. If
None, use `self.name`.
Returns:
str: Enforced value.
Raises:
UnicodeEncodeError: Value is disallowed by the string class.
"""
if codec_name is None:
codec_name = self.name
for i, char in enumerate(value):
prop, kind = derived_property(ord(char), self.ucd)
if prop in self._allowed:
continue
if prop in (CONTEXTJ, CONTEXTO):
# Replace `kind` ('exceptions', 'join_control') with the
# specific name of the context rule, if the rule fails.
kind = context_rule_error(value, i, self.ucd)
if not kind:
continue
raise_error(codec_name, value, i, kind)
return value
class IdentifierClass(BaseClass):
"""Concrete class representing PRECIS IdentifierClass from RFC 8264.
Args:
ucd (UnicodeData): Unicode character database.
name (str): String class name.
"""
_allowed = (PVALID,)
class FreeFormClass(BaseClass):
"""Concrete class repsenting PRECIS FreeFormClass from RFC 8264.
Args:
ucd (UnicodeData): Unicode character database.
name (str): String class name.
"""
_allowed = (PVALID, FREE_PVAL)
def raise_error(encoding, value, offset, error):
"""Raise specially formatted UnicodeEncodeError exception.
Args:
encoding (str): Name of the encoding/codec.
value (str): Value being encoded.
offset (int): Offset in `value` where error detected. Use -1 to
indicate the entire string.
error (str): Subtype of error detected.
Raises:
UnicodeEncodeError: Always.
"""
if offset < 0:
start = 0
end = len(value)
else:
start = offset
end = offset + 1
reason = "DISALLOWED/%s" % error
raise UnicodeEncodeError(encoding, value, start, end, reason)
|