1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
|
# localslackirc
# Copyright (C) 2018-2022 Salvo "LtWorf" Tomaselli
#
# localslackirc is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# author Salvo "LtWorf" Tomaselli <tiposchi@tiscali.it>
from typing import Iterable
from itertools import count
__all__ = [
'seddiff',
]
_SEPARATORS = frozenset(' .,:;\t\n()[]{}')
def wordsplit(word: str) -> Iterable[str]:
bucket = ''
for i in word:
if i in _SEPARATORS:
yield bucket
bucket = ''
bucket += i
if bucket:
yield bucket
def seddiff(a: str, b: str) -> str:
"""
Original string, changed string
This is meant to operate on simple word changes
or similar. Returns the IRC style correction
format.
"""
if a == b:
return ''
l1 = list(wordsplit(a))
l2 = list(wordsplit(b))
for prefix in count():
try:
if l1[prefix] != l2[prefix]:
break
except:
break
for postfix in count(1):
try:
if l1[-postfix] != l2[-postfix]:
break
except Exception:
break
postfix -= 1
if prefix and postfix and len(l1) != len(l2):
prefix -= 1
postfix -= 1
px = None if postfix == 0 else -postfix
return 's/%s/%s/' % (''.join(l1[prefix:px]).strip() or '$', ''.join(l2[prefix:px]).strip())
|