File: Latn.common-error.se-pattern.in

package info (click to toggle)
subtitleeditor 0.56.1-2
  • links: PTS, VCS
  • area: main
  • in suites:
  • size: 8,908 kB
  • sloc: cpp: 26,446; makefile: 1,713; perl: 434; sh: 259; xml: 149
file content (45 lines) | stat: -rw-r--r-- 4,021 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
<?xml version="1.0" encoding="UTF-8"?>
<patterns type="Common Error">
  <pattern _name="Letter &quot;O&quot; in a number" _description="Replace letter &quot;O&quot; with a zero in a number" classes="OCR;">
    <rule regex="((?&lt;![^\W\d])(\d|\d[,.]))[oO]" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1\060" repeat="True"/>
    <rule regex="[oO]((?!2\W)(\d|[,.]\d)(?![^\W\d]))" flags="DOTALL;MULTILINE;UNICODE;" replacement="\060\1" repeat="True"/>
  </pattern>
  <pattern _name="Double apostrophe" _description="Replace a double apostrophe with a quotation mark" classes="OCR;">
    <rule regex="''" flags="DOTALL;MULTILINE;UNICODE;" replacement="&quot;" repeat="False"/>
  </pattern>
  <pattern _name="Spaces around brackets" _description="Add or remove spaces around parentheses and square brackets" classes="Human;OCR;">
    <rule regex="(\S)(\(|\[)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
    <rule regex="(\)|\])(\w)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
    <rule regex="(\(|\[)( )" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
    <rule regex="( )(\)|\])" flags="DOTALL;MULTILINE;UNICODE;" replacement="\2" repeat="False"/>
  </pattern>
  <pattern _name="Space after a dialogue dash" _description="Add space after a dialogue dash" classes="Human;OCR;">
    <rule regex="^-([^-\s])" flags="DOTALL;MULTILINE;UNICODE;" replacement="- \1" repeat="False"/>
  </pattern>
  <pattern _name="Space before punctuation marks" _description="Remove space before various punctuation marks" classes="Human;OCR;">
    <rule regex=" +([&quot;'«»]?(?!\.\.)([,;:.?!])(?!\d))" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
  </pattern>
  <pattern _name="Space after punctuation marks" _description="Add space after various punctuation marks" classes="Human;OCR;">
    <rule regex="((\w|^|[&quot;'«»]) ?[,;:?!])(?![&quot;'«»])([^\W\d][\w\s])" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \3" repeat="False"/>
  </pattern>
  <pattern _name="Space after an ellipsis" _description="Add space after an ellipsis" classes="Human;OCR;">
    <rule regex="(\w[^\w\s]*?\.\.\.)([&quot;'«»]?\w)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1 \2" repeat="False"/>
  </pattern>
  <pattern _name="Space after a starting ellipsis" _description="Remove space after an ellipsis that starts a line" classes="Human;OCR;">
    <rule regex="(^\W*?\.\.\.)( +)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
  </pattern>
  <pattern _name="Spaces around a quotation mark" _description="Remove space after a starting- and before an ending quotation mark" classes="OCR;">
    <rule regex="(\A[^&quot;]*)(&quot; +)([^&quot;]*&quot;[^&quot;]*\Z)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1&quot;\3" repeat="False"/>
    <rule regex="(\A[^&quot;]*&quot;[^&quot;]*)( +&quot;)([^&quot;]*\Z)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1&quot;\3" repeat="False"/>
    <rule regex="^(\W*?[&quot;'])( +)" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1" repeat="False"/>
    <rule regex="( +)([&quot;']\W*?)$" flags="DOTALL;MULTILINE;UNICODE;" replacement="\2" repeat="False"/>
  </pattern>
  <pattern _name="Multiple question- and exclamation marks" _description="Replace multiple consecutive question- and exclamation marks with only one" classes="Human;">
    <rule regex="(\w\W*)(?&lt;!^)(?&lt;!\?)\?{2,}" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1?" repeat="False"/>
    <rule regex="(\w\W*)(?&lt;!^)(?&lt;!\!)\!{2,}" flags="DOTALL;MULTILINE;UNICODE;" replacement="\1!" repeat="False"/>
  </pattern>
  <pattern _name="Periods around a punctuation mark" _description="Remove period before or after various punctuation marks" classes="OCR;">
    <rule regex="(?&lt;=[:?!])\.(?!\.)" flags="DOTALL;MULTILINE;UNICODE;" replacement="" repeat="False"/>
    <rule regex="(?&lt;!\.\w)(?&lt;!\.)\.(?=[:?!])" flags="DOTALL;MULTILINE;UNICODE;" replacement="" repeat="False"/>
  </pattern>
</patterns>