File: sorting.py

package info (click to toggle)
python-tksheet 7.4.16%2Bdfsg-1
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 1,560 kB
  • sloc: python: 25,406; sh: 27; makefile: 2
file content (543 lines) | stat: -rw-r--r-- 16,959 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
from __future__ import annotations

from collections.abc import Callable, Iterator
from datetime import datetime
from pathlib import Path
from re import split
from typing import Any

# Possible date formats to try for the entire string
date_formats = (
    # Common formats
    "%d/%m/%Y",  # Day/Month/Year
    "%m/%d/%Y",  # Month/Day/Year (US format)
    "%Y/%m/%d",  # Year/Month/Day
    "%d.%m.%Y",  # Day.Month.Year (European format)
    "%d-%m-%Y",  # Day-Month-Year
    "%m-%d-%Y",  # Month-Day-Year
    "%Y-%m-%d",  # Year-Month-Day (ISO format without time)
    "%d/%m/%y",  # Day/Month/2-digit year
    "%m/%d/%y",  # Month/Day/2-digit year
    "%y/%m/%d",  # 2-digit year/Month/Day
    "%d,%m,%Y",  # Day,Month,Year
    "%m,%d,%Y",  # Month,Day,Year
    "%Y,%m,%d",  # Year,Month,Day
    "%d %m %Y",  # Day Month Year (with space)
    "%m %d %Y",  # Month Day Year (with space)
    "%Y %d %m",  # Year Month Day (with space)
    # With month names
    "%d %b %Y",  # Day Abbreviated Month Year
    "%b %d, %Y",  # Abbreviated Month Day, Year
    "%d %B %Y",  # Day Full Month Name Year
    "%B %d, %Y",  # Full Month Name Day, Year
    # ISO 8601 with/without time
    "%Y-%m-%dT%H:%M:%S",  # With time
    "%Y-%m-%d",  # Without time
    # Regional or less common formats
    # "%Y年%m月%d日",  # Japanese-style date
    "%Y%m%d",  # YYYYMMDD format, often used in logs or filenames
    "%y%m%d",  # YYMMDD
    "%d%m%Y",  # DDMMYYYY, sometimes used in Europe
    # Additional formats
    "%d/%m/%y %H:%M",  # Day/Month/Year Hour:Minute
    "%m/%d/%y %H:%M",  # Month/Day/Year Hour:Minute
    "%Y-%m-%d %H:%M:%S",  # Year-Month-Day Hour:Minute:Second
)


def _string_fallback(item: str) -> tuple[int, ...]:
    """
    In order to have reasonable file path sorting:
    - Split by path separators
    - Determine depth, more separators more depth
    - Deal with every dir by splitting by digits
    - Deal with file name by splitting by digits
    """
    components = split(r"[/\\]", item)
    if components[-1]:
        return (
            5,
            len(components),
            tuple(int(e) if e.isdigit() else e.lower() for comp in components[:-1] for e in split(r"(\d+)", comp) if e),
            tuple(int(e) if e.isdigit() else e.lower() for e in split(r"(\d+)", components[-1])),
        )
    else:
        return (
            5,
            len(components),
            tuple(int(e) if e.isdigit() else e.lower() for comp in components[:-1] for e in split(r"(\d+)", comp) if e),
            (),
        )


def version_sort_key(item: Any) -> tuple[int, ...]:
    """
    A key for natural sorting of various Python types.

    - Won't convert strings to floats
    - Will sort string version numbers

    0. None
    1. Empty strings
    2. bool
    3. int, float
    4. datetime (inc. strings that are dates)
    5. strings (including string file paths and paths as POSIX strings) & unknown objects with __str__
    6. unknown objects
    """
    if isinstance(item, str):
        if item:
            for date_format in date_formats:
                try:
                    return (4, datetime.strptime(item, date_format).timestamp())
                except ValueError:
                    continue
            # the same as _string_fallback
            components = split(r"[/\\]", item)
            if components[-1]:
                return (
                    5,
                    len(components),
                    tuple(
                        int(e) if e.isdigit() else e.lower()
                        for comp in components[:-1]
                        for e in split(r"(\d+)", comp)
                        if e
                    ),
                    tuple(int(e) if e.isdigit() else e.lower() for e in split(r"(\d+)", components[-1])),
                )
            else:
                return (
                    5,
                    len(components),
                    tuple(
                        int(e) if e.isdigit() else e.lower()
                        for comp in components[:-1]
                        for e in split(r"(\d+)", comp)
                        if e
                    ),
                    (),
                )
        else:
            return (1, item)

    elif item is None:
        return (0,)

    elif isinstance(item, bool):
        return (2, item)

    elif isinstance(item, (int, float)):
        return (3, item)

    elif isinstance(item, datetime):
        return (4, item.timestamp())

    elif isinstance(item, Path):
        return _string_fallback(item.as_posix())

    else:
        try:
            return _string_fallback(f"{item}")
        except Exception:
            return (6, item)


def natural_sort_key(item: Any) -> tuple[int, ...]:
    """
    A key for natural sorting of various Python types.

    - Won't sort string version numbers
    - Will convert strings to floats
    - Will sort strings that are file paths

    0. None
    1. Empty strings
    2. bool
    3. int, float (inc. strings that are numbers)
    4. datetime (inc. strings that are dates)
    5. strings (including string file paths and paths as POSIX strings) & unknown objects with __str__
    6. unknown objects
    """
    if isinstance(item, str):
        if item:
            for date_format in date_formats:
                try:
                    return (4, datetime.strptime(item, date_format).timestamp())
                except ValueError:
                    continue

            try:
                return (3, float(item))
            except ValueError:
                # the same as _string_fallback
                components = split(r"[/\\]", item)
                if components[-1]:
                    return (
                        5,
                        len(components),
                        tuple(
                            int(e) if e.isdigit() else e.lower()
                            for comp in components[:-1]
                            for e in split(r"(\d+)", comp)
                            if e
                        ),
                        tuple(int(e) if e.isdigit() else e.lower() for e in split(r"(\d+)", components[-1])),
                    )
                else:
                    return (
                        5,
                        len(components),
                        tuple(
                            int(e) if e.isdigit() else e.lower()
                            for comp in components[:-1]
                            for e in split(r"(\d+)", comp)
                            if e
                        ),
                        (),
                    )
        else:
            return (1, item)

    elif item is None:
        return (0,)

    elif isinstance(item, bool):
        return (2, item)

    elif isinstance(item, (int, float)):
        return (3, item)

    elif isinstance(item, datetime):
        return (4, item.timestamp())

    elif isinstance(item, Path):
        return _string_fallback(item.as_posix())

    else:
        try:
            return _string_fallback(f"{item}")
        except Exception:
            return (6, item)


def fast_sort_key(item: Any) -> tuple[int, ...]:
    """
    A faster key for natural sorting of various Python types.

    - Won't sort strings that are dates very well
    - Won't convert strings to floats
    - Won't sort string file paths very well
    - Will do ok with string version numbers

    0. None
    1. Empty strings
    2. bool
    3. int, float
    4. datetime
    5. strings (including paths as POSIX strings) & unknown objects with __str__
    6. unknown objects
    """
    if isinstance(item, str):
        if item:
            return (5, tuple(int(e) if e.isdigit() else e.lower() for e in split(r"(\d+)", item)))
        else:
            return (1, item)

    elif item is None:
        return (0,)

    elif isinstance(item, bool):
        return (2, item)

    elif isinstance(item, (int, float)):
        return (3, item)

    elif isinstance(item, datetime):
        return (4, item.timestamp())

    elif isinstance(item, Path):
        return _string_fallback(item.as_posix())

    else:
        try:
            return _string_fallback(f"{item}")
        except Exception:
            return (6, item)


def sort_selection(
    data: list[list[Any]],
    reverse: bool = False,
    key: Callable | None = None,
    row_wise: bool = False,
) -> list[list[Any]]:
    if not data or not isinstance(data[0], list):
        raise ValueError("Data must be a list of lists.")

    if key is None:
        key = natural_sort_key

    if row_wise:
        return [sorted(row, key=key, reverse=reverse) for row in data]
    else:
        return list(
            zip(
                *(
                    sorted(
                        (row[col] for row in data),
                        key=key,
                        reverse=reverse,
                    )
                    for col in range(len(data[0]))
                )
            )
        )


def sort_column(
    data: list[list[Any]] | list[Any] | Iterator[Any],
    column: int = 0,
    reverse: bool = False,
    key: Callable | None = None,
) -> list[list[Any]] | list[Any]:
    if not data:
        return data

    if key is None:
        key = natural_sort_key

    if isinstance(data, list) and isinstance(data[0], list):
        return sorted(data, key=lambda row: key(row[column]) if len(row) > column else key(None), reverse=reverse)
    else:
        return sorted(data, reverse=reverse, key=key)


def sort_row(
    data: list[list[Any]] | list[Any] | Iterator[Any],
    row: int = 0,
    reverse: bool = False,
    key: Callable | None = None,
) -> list[list[Any]]:
    if not data:
        return data

    if key is None:
        key = natural_sort_key

    if isinstance(data, list) and isinstance(data[0], list):
        if 0 <= row < len(data):
            data[row] = sorted(data[row], key=key, reverse=reverse)
            return data
        else:
            raise IndexError(f"Row index {row} out of range for data with {len(data)} rows.")
    else:
        return sorted(data, reverse=reverse, key=key)


def sort_rows_by_column(
    data: list[list[Any]],
    column: int = 0,
    reverse: bool = False,
    key: Callable | None = None,
) -> tuple[list[tuple[int, list[Any]]], dict[int, int]]:
    if not data:
        return data, {}

    # Check if data is a list of lists
    if not isinstance(data[0], list):
        raise ValueError("Data must be a list of lists for row sorting.")

    if key is None:
        key = natural_sort_key

    # Use a generator expression for sorting to avoid creating an intermediate list
    sorted_indexed_data = sorted(
        ((i, row) for i, row in enumerate(data)),
        key=lambda item: key(item[1][column]) if len(item[1]) > column else key(None),
        reverse=reverse,
    )

    # Return sorted rows [(old index, row), ...] and create the mapping dictionary
    return sorted_indexed_data, {old: new for new, (old, _) in enumerate(sorted_indexed_data)}


def sort_columns_by_row(
    data: list[list[Any]],
    row: int = 0,
    reverse: bool = False,
    key: Callable | None = None,
) -> tuple[list[int], dict[int, int]]:
    if not data:
        return data, {}

    # Check if data is a list of lists
    if not isinstance(data[0], list):
        raise ValueError("Data must be a list of lists for column sorting.")

    if row >= len(data) or row < 0:
        raise IndexError(f"Row index {row} out of range for data with {len(data)} rows.")

    if key is None:
        key = natural_sort_key

    # Get sorting indices based on the elements of the specified row
    sort_indices = sorted(range(len(data[row])), key=lambda i: key(data[row][i]), reverse=reverse)
    sort_indices_set = set(sort_indices)

    new_data = []
    for row_data in data:
        # Sort the columns based on sort_indices, then append any additional elements from longer rows
        sorted_part = [row_data[i] for i in sort_indices if i < len(row_data)]
        unsorted_part = [elem for idx, elem in enumerate(row_data) if idx not in sort_indices_set]
        new_data.append(sorted_part + unsorted_part)

    return sort_indices, dict(zip(range(len(data[row])), sort_indices))


def sort_tree_rows_by_column(
    data: list[list[Any]],
    column: int,
    index: list[Any],
    rns: dict[str, int],
    reverse: bool = False,
    key: Callable | None = None,
) -> tuple[list[Any], dict[int, int]]:
    """
    Sorts tree rows by a specified column in depth-first order, returning sorted nodes and a row mapping.

    Args:
        data: List of rows, where each row is a list of column values.
        column: Index of the column to sort by.
        index: List of nodes, where each node has 'iid', 'parent', and 'children' attributes.
        rns: Dictionary mapping item IDs (iid) to original row numbers in data.
        reverse: If True, sort in descending order; otherwise, ascending.
        key: Optional function to compute sort keys; defaults to natural_sort_key if None.

    Returns:
        Tuple containing:
        - List of nodes in sorted order.
        - Dictionary mapping original row numbers to new row numbers.
    """
    if not index or not rns:
        return [], {}

    if key is None:
        key = natural_sort_key  # Assuming natural_sort_key is defined elsewhere

    # Define the sort_reverse parameter to avoid unnecessary reversals
    sort_reverse = not reverse

    # Helper function to compute the sorting key for a node
    def get_key(node):
        row = data[rns[node.iid]]
        return key(row[column] if len(row) > column else None)

    # Initialize stack with sorted top-level nodes for efficiency
    stack = sorted(
        (node for node in index if node.parent == ""),
        key=get_key,
        reverse=sort_reverse,
    )

    # Initialize output structures
    sorted_nodes = []
    mapping = {}
    new_rn = 0

    # Process nodes iteratively in depth-first order
    while stack:
        current = stack.pop()  # Pop from the right end
        sorted_nodes.append(current)
        mapping[rns[current.iid]] = new_rn
        new_rn += 1
        if current.children:
            # Sort children
            sorted_children = sorted(
                (index[rns[ciid]] for ciid in current.children if ciid in rns),
                key=get_key,
                reverse=sort_reverse,
            )
            # Extend stack with sorted children
            stack.extend(sorted_children)  # Adds to the right end

    return sorted_nodes, mapping


# if __name__ == "__main__":
#     test_cases = {
#         "Filenames": ["file10.txt", "file2.txt", "file1.txt"],
#         "Versions": ["1.10", "1.2", "1.9", "1.21"],
#         "Mixed": ["z1.doc", "z10.doc", "z2.doc", "z100.doc"],
#         "Paths": [
#             "/folder/file.txt",
#             "/folder/file (1).txt",
#             "/folder (1)/file.txt",
#             "/folder (10)/file.txt",
#             "/dir/subdir/file1.txt",
#             "/dir/subdir/file10.txt",
#             "/dir/subdir/file2.txt",
#             "/dir/file.txt",
#             "/dir/sub/file123.txt",
#             "/dir/sub/file12.txt",
#             # New challenging cases
#             "/a/file.txt",
#             "/a/file1.txt",
#             "/a/b/file.txt",
#             "/a/b/file1.txt",
#             "/x/file-v1.2.txt",
#             "/x/file-v1.10.txt",
#             # My own new challenging cases
#             "/a/zzzzz.txt",
#             "/a/b/a.txt",
#         ],
#         "Case": ["Apple", "banana", "Corn", "apple", "Banana", "corn"],
#         "Leading Zeros": ["001", "010", "009", "100"],
#         "Complex": ["x-1-y-10", "x-1-y-2", "x-2-y-1", "x-10-y-1"],
#         "Lengths": ["2 ft 9 in", "2 ft 10 in", "10 ft 1 in", "10 ft 2 in"],
#         "Floats": [
#             "1.5",
#             "1.25",
#             "1.025",
#             "10.5",
#             "-10.2",
#             "-2.5",
#             "5.7",
#             "-1.25",
#             "0.0",
#             "1.5e3",
#             "2.5e2",
#             "1.23e4",
#             "1e-2",
#             "file1.2.txt",
#             "file1.5.txt",
#             "file1.10.txt",
#         ],
#         "Strings": [
#             "123abc",
#             "abc123",
#             "123abc456",
#             "abc123def",
#         ],
#     }

#     print("FAST SORT KEY:")

#     for name, data in test_cases.items():
#         sorted1 = sorted(data, key=fast_sort_key)
#         print(f"\n{name}:")
#         print(sorted1)

#     print("\nNATURAL SORT KEY:")

#     for name, data in test_cases.items():
#         sorted1 = sorted(data, key=natural_sort_key)
#         print(f"\n{name}:")
#         print(sorted1)

#     print("\nVERSION SORT KEY:")

#     for name, data in test_cases.items():
#         sorted1 = sorted(data, key=version_sort_key)
#         print(f"\n{name}:")
#         print(sorted1)