1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166
|
###############################################################################
#
# Class: NaturalDocs::LineReader
#
###############################################################################
#
# An object to handle reading text files line by line in a cross platform manner. Using this class instead of the standard
# angle brackets approach has the following benefits:
#
# - It strips all three types of line breaks automatically: CR/LF (Windows) LF (Unix) and CR (Classic Mac). You do not need to
# call chomp(). Perl's chomp() fails when parsing Windows-format line breaks on a Unix platform anyway. It leaves the /r on,
# which screws everything up.
# - It reads Classic Mac files line by line correctly, whereas the Perl version returns it all as one line.
# - It abstracts away ignoring the Unicode BOM on the first line, if present.
#
###############################################################################
# This file is part of Natural Docs, which is Copyright 2003-2010 Greg Valure
# Natural Docs is licensed under version 3 of the GNU Affero General Public License (AGPL)
# Refer to License.txt for the complete details
use strict;
use integer;
package NaturalDocs::LineReader;
#
# Constants: Members
#
# LINEREADER_FILEHANDLE - The file handle being used to read the file. Has the LINEREADER_ prefix to make sure it doesn't
# conflict with any actual filehandles named FILEHANDLE in the program.
# CACHED_LINES - An arrayref of lines already read into memory.
#
use NaturalDocs::DefineMembers 'LINEREADER_FILEHANDLE',
'CACHED_LINES';
#
# Function: New
#
# Creates and returns a new object.
#
# Parameters:
#
# filehandle - The file handle being used to read the file.
#
sub New #(filehandle)
{
my ($selfPackage, $filehandle) = @_;
my $object = [ ];
$object->[LINEREADER_FILEHANDLE] = $filehandle;
$object->[CACHED_LINES] = [ ];
binmode($filehandle, ':raw');
my $possibleBOM = undef;
read($filehandle, $possibleBOM, 2);
if ($possibleBOM eq "\xEF\xBB")
{
read($filehandle, $possibleBOM, 1);
if ($possibleBOM eq "\xBF")
{
seek($filehandle, 3, 0);
binmode($filehandle, ':crlf:encoding(UTF-8)'); # Strict UTF-8, not Perl's lax version.
}
else
{
seek($filehandle, 0, 0);
binmode($filehandle, ':crlf');
}
}
elsif ($possibleBOM eq "\xFE\xFF")
{
seek($filehandle, 2, 0);
binmode($filehandle, ':crlf:encoding(UTF-16BE)');
}
elsif ($possibleBOM eq "\xFF\xFE")
{
seek($filehandle, 2, 0);
binmode($filehandle, ':crlf:encoding(UTF-16LE)');
}
else
{
seek($filehandle, 0, 0);
binmode($filehandle, ':crlf');
}
bless $object, $selfPackage;
return $object;
};
#
# Function: Chomp
#
# Removes any line breaks from the end of a value. It does not remove any that are in the middle of it.
#
# Parameters:
#
# lineRef - A *reference* to the line to chomp.
#
sub Chomp #(lineRef)
{
my ($self, $lineRef) = @_;
$$lineRef =~ s/(?:\r\n|\r|\n)$//;
};
#
# Function: Get
#
# Returns the next line of text from the file, or undef if there are no more. The line break will be removed automatically. If
# the first line contains a Unicode BOM, that will also be removed automatically.
#
sub Get
{
my $self = shift;
my $line = undef;
if (scalar @{$self->[CACHED_LINES]} == 0)
{
my $filehandle = $self->[LINEREADER_FILEHANDLE];
my $rawLine = <$filehandle>;
if (!defined $rawLine)
{ return undef; }
$self->Chomp(\$rawLine);
if ($rawLine =~ /\r/)
{
push @{$self->[CACHED_LINES]}, split(/\r/, $rawLine); # Split for Classic Mac
$line = shift @{$self->[CACHED_LINES]};
}
else
{ $line = $rawLine; }
}
else
{ $line = shift @{$self->[CACHED_LINES]}; }
return $line;
}
#
# Function: GetAll
#
# Returns an array of all the lines from the file. The line breaks will be removed automatically. If the first line contains a
# Unicode BOM, that will also be removed automatically.
#
sub GetAll
{
my $self = shift;
my $filehandle = $self->[LINEREADER_FILEHANDLE];
my $rawContent;
read($filehandle, $rawContent, -s $filehandle);
return split(/\r\n|\n|\r/, $rawContent);
}
1;
|