File: InFileStream.cpp

package info (click to toggle)
clustalx 2.1%2Blgpl-8
  • links: PTS, VCS
  • area: main
  • in suites: buster
  • size: 3,320 kB
  • sloc: cpp: 40,050; sh: 163; xml: 102; makefile: 16
file content (143 lines) | stat: -rw-r--r-- 3,689 bytes parent folder | download | duplicates (12)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
/**
 * Author: Nigel Brown
 * 
 * Copyright (c) 2007 Des Higgins, Julie Thompson and Toby Gibson.  
 */
/**
 * InFileStream subclasses std::ifstream, adding a check for the end-of-line
 * character convention in the input file. This is then used by the getline()
 * member as the line delimiter, unless the caller supplies an explicit
 * delimiter.
 *
 * Note: This is an ugly workaround; at present various operations repeatedly
 * construct/destruct an instance and open/close a sequence file up to 12
 * times! A cleaner class will probably derive this class from something like
 * 'istream' aggregating a 'filebuf' under control of istream::seekg().
 *
 * Created: 09-02-07,Nigel Brown(EMBL)
 * 
 * Changes:
 * Mark Larkin 13-2-07: I removed the dynamic cast from the getline functions. 
 ***************************************************************************/
#ifdef HAVE_CONFIG_H
    #include "config.h"
#endif
#include <string>
#include <fstream>
#include <iostream>
#include "InFileStream.h"
using namespace std;

const char LF = 0x0a;  //linefeed
const char CR = 0x0d;  //carriage return

InFileStream::InFileStream() :
    ifstream()
{
    delim = '\n'; // default
    //cout << "InFileStream() constructor 1" << endl;
}

InFileStream::InFileStream(const char *filename) :
    ifstream(filename, ios::in), filename(filename)
{
    //cout << "InFileStream(f) constructor 2" << endl;
    delim = findDelimiter();
}

//- copy-constructor: can't copy superclass private members
//- InFileStream::InFileStream(const InFileStream &copy) :
//-     ifstream(static_cast<const ifstream&>(copy))
//- {
//-     cout << "InFileStream() constructor 3" << endl;
//-     delim = copy.delim;
//- }

void InFileStream::open(const char *filename) 
{

    this->filename = filename;
    ifstream::open(filename, ios::in);
    if  (ifstream::fail())
        return;
    delim = findDelimiter();
}

//not necessary, but for symmetry to open()
void InFileStream::close() 
{
    ifstream::close();   
}


//getline with stored delimiter
std::istream& InFileStream::getline(char *s, streamsize n) 
{
    return ifstream::getline(s, n, delim);
}

//getline with caller supplied delimiter
std::istream& InFileStream::getline(char *s, streamsize n, char delim) 
{
    return ifstream::getline(s, n, delim);
}


/**
 * Mark 24-1-2007. I added the function findDelimiter to determine if '\r' or
 * '\n' will be used as the line delimiter when parsing the file.
 *
 * 25-01-07,Nigel Brown(EMBL): changed body of loop to check successive chars
 * in case of DOS/Windows
 *
 * 09-02-07,Nigel Brown(EMBL): moved member into new InFileStream subclassed
 * from std::ifstream, so this is called automatically for any file reader
 * that uses InFileStream in place of std::ifstream. Replaced if/then/else
 * with switch.
 */
char InFileStream::findDelimiter()
{
    ifstream in;
    int type = 0;
    
    in.open(filename.c_str(), ios::in);
    if (in.fail())
        return delim;
    
    in.seekg(0, ios::beg);

    //look for CR or LF or CRLF (or LFCR)
    if (in.is_open()) {
        char c;
        while (in.get(c)) {
            if (c == CR)
                type |= 1;
            else if (c == LF)
                type |= 2;
            else if (type)
                break;
        }
    }
    in.close();

    switch (type) {
	case 1:
	    //cout << "file is Mac System 9" << endl;
	    delim = '\r';
	    break;
	case 2:
	    //cout << "file is UNIX" << endl;
	    delim = '\n';
	    break;
	case 3:
	    //cout << "file is DOS" << endl;
	    delim = '\n';
	    break;
	default: //short or empty file
	    //cout << "file is UNIX (default)" << endl;
	    delim = '\n';
    }
    return delim;
}