1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
"""Natural sort.
Compares numeric parts numerically.
Rules:
* String consists of numeric and non-numeric parts.
* Parts are compared pairwise, if both are numeric then numerically,
otherwise textually. (Only first fragment can have different type)
* If strings are different, they should compare differnet in natsort.
Extra rules for version numbers:
* In textual comparision, '~' is less than anything else, including end-of-string.
* In numeric comparision, numbers that start with '0' are compared as decimal
fractions, thus number that starts with more zeroes is smaller.
"""
import re
from typing import List, Sequence
__all__ = (
'natsort_key', 'natsort', 'natsorted',
'natsort_key_icase', 'natsort_icase', 'natsorted_icase'
)
_rc = re.compile(r'\d+|\D+', re.A)
def natsort_key(s: str) -> str:
"""Returns string that sorts according to natsort rules.
"""
# generates four types of fragments:
# 1) strings < "0", stay as-is
# 2) numbers starting with 0, fragment starts with "A".."Z"
# 3) numbers starting with 1..9, fragment starts with "a".."z"
# 4) strings > "9", fragment starts with "|"
if "~" in s:
s = s.replace("~", "\0")
key: List[str] = []
key_append = key.append
for frag in _rc.findall(s):
if frag < "0":
key_append(frag)
key_append("\1")
elif frag < "1":
nzeros = len(frag) - len(frag.lstrip('0'))
mag = str(nzeros)
mag = str(10**len(mag) - nzeros)
key_append(chr(0x5B - len(mag))) # Z, Y, X, ...
key_append(mag)
key_append(frag)
elif frag < ":":
mag = str(len(frag))
key_append(chr(0x60 + len(mag))) # a, b, c, ...
key_append(mag)
key_append(frag)
else:
key_append("|")
key_append(frag)
key_append("\1")
if not (key and key[-1] == "\1"):
key_append("\1")
return "".join(key)
def natsort(lst: List[str]) -> None:
"""Natural in-place sort, case-sensitive."""
lst.sort(key=natsort_key)
def natsorted(lst: Sequence[str]) -> List[str]:
"""Return copy of list, sorted in natural order, case-sensitive.
"""
return sorted(lst, key=natsort_key)
# case-insensitive api
def natsort_key_icase(s: str) -> str:
"""Split string to numeric and non-numeric fragments."""
return natsort_key(s.lower())
def natsort_icase(lst: List[str]) -> None:
"""Natural in-place sort, case-sensitive."""
lst.sort(key=natsort_key_icase)
def natsorted_icase(lst: Sequence[str]) -> List[str]:
"""Return copy of list, sorted in natural order, case-sensitive.
"""
return sorted(lst, key=natsort_key_icase)
|