File: LineReader.pm

package info (click to toggle)
naturaldocs 1.51-4
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 2,304 kB
  • sloc: perl: 17,534; javascript: 1,925; makefile: 6; sh: 1
file content (166 lines) | stat: -rw-r--r-- 4,486 bytes parent folder | download | duplicates (4)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
###############################################################################
#
#   Class: NaturalDocs::LineReader
#
###############################################################################
#
#   An object to handle reading text files line by line in a cross platform manner.  Using this class instead of the standard
#	angle brackets approach has the following benefits:
#
#	- It strips all three types of line breaks automatically: CR/LF (Windows) LF (Unix) and CR (Classic Mac).  You do not need to
#	  call chomp().  Perl's chomp() fails when parsing Windows-format line breaks on a Unix platform anyway.  It leaves the /r on,
#	  which screws everything up.
#	- It reads Classic Mac files line by line correctly, whereas the Perl version returns it all as one line.
#	- It abstracts away ignoring the Unicode BOM on the first line, if present.
#
###############################################################################

# This file is part of Natural Docs, which is Copyright  2003-2010 Greg Valure
# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
# Refer to License.txt for the complete details

use strict;
use integer;


package NaturalDocs::LineReader;

#
#	Constants: Members
#
#	LINEREADER_FILEHANDLE - The file handle being used to read the file.  Has the LINEREADER_ prefix to make sure it doesn't
#											 conflict with any actual filehandles named FILEHANDLE in the program.
#	CACHED_LINES - An arrayref of lines already read into memory.
#
use NaturalDocs::DefineMembers 'LINEREADER_FILEHANDLE',
                                                 'CACHED_LINES';

#
#   Function: New
#
#   Creates and returns a new object.
#
#   Parameters:
#
#       filehandle - The file handle being used to read the file.
#
sub New #(filehandle)
    {
    my ($selfPackage, $filehandle) = @_;

    my $object = [ ];

    $object->[LINEREADER_FILEHANDLE] = $filehandle;
    $object->[CACHED_LINES] = [ ];

    binmode($filehandle, ':raw');

    my $possibleBOM = undef;
    read($filehandle, $possibleBOM, 2);

    if ($possibleBOM eq "\xEF\xBB")
        {
        read($filehandle, $possibleBOM, 1);
        if ($possibleBOM eq "\xBF")
            {
            seek($filehandle, 3, 0);
            binmode($filehandle, ':crlf:encoding(UTF-8)');  # Strict UTF-8, not Perl's lax version.
            }
        else
            {
            seek($filehandle, 0, 0);
            binmode($filehandle, ':crlf');
            }
        }
    elsif ($possibleBOM eq "\xFE\xFF")
        {
        seek($filehandle, 2, 0);
        binmode($filehandle, ':crlf:encoding(UTF-16BE)');
        }
    elsif ($possibleBOM eq "\xFF\xFE")
        {
        seek($filehandle, 2, 0);
        binmode($filehandle, ':crlf:encoding(UTF-16LE)');
        }
    else
        {
        seek($filehandle, 0, 0);
        binmode($filehandle, ':crlf');
        }

    bless $object, $selfPackage;
    return $object;
    };


#
#   Function: Chomp
#
#   Removes any line breaks from the end of a value.  It does not remove any that are in the middle of it.
#
#   Parameters:
#
#       lineRef - A *reference* to the line to chomp.
#
sub Chomp #(lineRef)
    {
    my ($self, $lineRef) = @_;
    $$lineRef =~ s/(?:\r\n|\r|\n)$//;
    };


#
#	Function: Get
#
#	Returns the next line of text from the file, or undef if there are no more.  The line break will be removed automatically.  If
#	the first line contains a Unicode BOM, that will also be removed automatically.
#
sub Get
	{
	my $self = shift;
	my $line = undef;

	if (scalar @{$self->[CACHED_LINES]} == 0)
		{
		my $filehandle = $self->[LINEREADER_FILEHANDLE];
		my $rawLine = <$filehandle>;

		if (!defined $rawLine)
			{  return undef;  }

		$self->Chomp(\$rawLine);

        if ($rawLine =~ /\r/)
        	{
	  		push @{$self->[CACHED_LINES]}, split(/\r/, $rawLine);  # Split for Classic Mac
			$line = shift @{$self->[CACHED_LINES]};
          	}
        else
        	{  $line = $rawLine;  }
		}
	else
		{  $line = shift @{$self->[CACHED_LINES]};  }

	return $line;
	}


#
#	Function: GetAll
#
#	Returns an array of all the lines from the file.  The line breaks will be removed automatically.  If the first line contains a
#	Unicode BOM, that will also be removed automatically.
#
sub GetAll
	{
	my $self = shift;

	my $filehandle = $self->[LINEREADER_FILEHANDLE];
	my $rawContent;

    read($filehandle, $rawContent, -s $filehandle);

    return split(/\r\n|\n|\r/, $rawContent);
	}

1;