File: textreader.py

package info (click to toggle)
python-sigima 1.0.3-1
  • links: PTS, VCS
  • area: main
  • in suites: sid
  • size: 24,956 kB
  • sloc: python: 33,326; makefile: 3
file content (58 lines) | stat: -rw-r--r-- 1,539 bytes parent folder | download
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# Copyright (c) DataLab Platform Developers, BSD 3-Clause license, see LICENSE file.

"""I/O utility functions."""

# pylint: disable=invalid-name  # Allows short reference names like x, y...

from __future__ import annotations

import os
from itertools import islice

from sigima.io.enums import FileEncoding


def count_lines(filename: str | os.PathLike[str]) -> int:
    """Count the number of lines in a file.

    Args:
        filename: File name or path.

    Returns:
        The number of lines in the file.

    Raises:
        IOError: If the file cannot be read.
    """
    for encoding in FileEncoding:
        try:
            with open(filename, "r", encoding=encoding) as file:
                line_count = sum(1 for _ in file)
            return line_count
        except UnicodeDecodeError:
            # Try next encoding.
            pass
    raise IOError(f"Cannot read file {filename}.")


def read_first_n_lines(filename: str | os.PathLike[str], n: int = 100000) -> str:
    """Read the first `n` lines of a file.

    Args:
        filename: File name or path.
        n: Number of lines to read.

    Returns:
        The first `n` lines of the file.

    Raises:
        IOError: If the file cannot be read.
    """
    for encoding in FileEncoding:
        try:
            with open(filename, "r", encoding=encoding) as file:
                return "".join(islice(file, n))
        except UnicodeDecodeError:
            # Try next encoding.
            pass
    raise IOError(f"Cannot read file {filename}.")