1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
|
<?xml version="1.0" encoding="UTF-8"?>
<patterns type="Common Error">
<pattern _name="Letter "O" in a number" _description="Replace letter "O" with a zero in a number" classes="OCR;">
<rule regex="((?<![^\W\d])(\d|\d[,.]))[oO]" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1\060" repeat="True"/>
<rule regex="[oO]((?!2\W)(\d|[,.]\d)(?![^\W\d]))" flags="DOTALL;MULTILINE;UNICODE;" replacement="\060\1" repeat="True"/>
</pattern>
<pattern _name="Double apostrophe" _description="Replace a double apostrophe with a quotation mark" classes="OCR;">
<rule regex="''" flags="DOTALL;MULTILINE;UNICODE;" replacement=""" repeat="False"/>
</pattern>
<pattern _name="Spaces around brackets" _description="Add or remove spaces around parentheses and square brackets" classes="Human;OCR;">
<rule regex="(\S)(\(|\[)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
<rule regex="(\)|\])(\w)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
<rule regex="(\(|\[)( )" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
<rule regex="( )(\)|\])" flags="DOTALL;MULTILINE;UNICODE;" replacement="\2" repeat="False"/>
</pattern>
<pattern _name="Space after a dialogue dash" _description="Add space after a dialogue dash" classes="Human;OCR;">
<rule regex="^-([^-\s])" flags="DOTALL;MULTILINE;UNICODE;" replacement="- \1" repeat="False"/>
</pattern>
<pattern _name="Space before punctuation marks" _description="Remove space before various punctuation marks" classes="Human;OCR;">
<rule regex=" +(["'«»]?(?!\.\.)([,;:.?!])(?!\d))" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
</pattern>
<pattern _name="Space after punctuation marks" _description="Add space after various punctuation marks" classes="Human;OCR;">
<rule regex="((\w|^|["'«»]) ?[,;:?!])(?!["'«»])([^\W\d][\w\s])" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \3" repeat="False"/>
</pattern>
<pattern _name="Space after an ellipsis" _description="Add space after an ellipsis" classes="Human;OCR;">
<rule regex="(\w[^\w\s]*?\.\.\.)(["'«»]?\w)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
</pattern>
<pattern _name="Space after a starting ellipsis" _description="Remove space after an ellipsis that starts a line" classes="Human;OCR;">
<rule regex="(^\W*?\.\.\.)( +)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
</pattern>
<pattern _name="Spaces around a quotation mark" _description="Remove space after a starting- and before an ending quotation mark" classes="OCR;">
<rule regex="(\A[^"]*)(" +)([^"]*"[^"]*\Z)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1"\3" repeat="False"/>
<rule regex="(\A[^"]*"[^"]*)( +")([^"]*\Z)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1"\3" repeat="False"/>
<rule regex="^(\W*?["'])( +)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
<rule regex="( +)(["']\W*?)$" flags="DOTALL;MULTILINE;UNICODE;" replacement="\2" repeat="False"/>
</pattern>
<pattern _name="Multiple question- and exclamation marks" _description="Replace multiple consecutive question- and exclamation marks with only one" classes="Human;">
<rule regex="(\w\W*)(?<!^)(?<!\?)\?{2,}" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1?" repeat="False"/>
<rule regex="(\w\W*)(?<!^)(?<!\!)\!{2,}" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1!" repeat="False"/>
</pattern>
<pattern _name="Periods around a punctuation mark" _description="Remove period before or after various punctuation marks" classes="OCR;">
<rule regex="(?<=[:?!])\.(?!\.)" flags="DOTALL;MULTILINE;UNICODE;" replacement="" repeat="False"/>
<rule regex="(?<!\.\w)(?<!\.)\.(?=[:?!])" flags="DOTALL;MULTILINE;UNICODE;" replacement="" repeat="False"/>
</pattern>
</patterns>
|