File: char_stream.hh

package info (click to toggle)
ruby-unf-ext 0.0.6-2
  • links: PTS, VCS
  • area: main
  • in suites: jessie, jessie-kfreebsd
  • size: 2,128 kB
  • ctags: 147
  • sloc: cpp: 14,043; ruby: 79; makefile: 2
file content (150 lines) | stat: -rw-r--r-- 4,149 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#ifndef UNF_TRIE_CHAR_STREAM_HH
#define UNF_TRIE_CHAR_STREAM_HH

#include <vector>
#include <string>
#include "../util.hh"

namespace UNF {
  namespace Trie {
    class CharStream {
    public:
      CharStream(const char* str) : cur_(str) {}
      unsigned char read() { return eos() ? '\0' : *cur_++; }
      unsigned char prev() const { return cur_[-1]; }
      unsigned char peek() const { return *cur_; } 
      const char*   cur() const { return cur_; }
      bool          eos() const { return *cur_ == '\0'; }
      void          setCur(const char* new_cur) { cur_ = new_cur; }

    private:
      const char* cur_;
    };

    class RangeCharStream {
    public:
      RangeCharStream(const char* beg, const char* end) : cur_(beg), end_(end) {}
      unsigned char read() { return eos() ? '\0' : *cur_++; }
      unsigned char prev() const { return cur_[-1]; }
      unsigned char peek() const { return *cur_; } 
      const char*   cur() const { return cur_; }
      const char*   end() const { return end_; }
      bool          eos() const { return cur_ == end_; }

    private:
      const char* cur_;
      const char* end_;
    };

    class CompoundCharStream {
    public:
      CompoundCharStream(const char* first, const char* second) 
	: beg1(first), beg2(second), cur1(beg1), cur2(beg2) {}

      unsigned char read() { return !eos1() ? read1() : read2(); }
      unsigned char peek() const { return !eos1() ? *cur1 : *cur2; }
      unsigned char prev() const { return !eos1() || beg2==cur2 ? cur1[-1] : cur2[-1]; }

      const char* cur() const { return !eos1() ? cur1 : cur2; }
      bool eos() const { return eos1() && eos2(); }
      bool within_first() const { return !eos1(); }

      unsigned offset() const { return cur1-beg1 + cur2-beg2; }
      void setCur(const char* p) { 
	if(beg1 <= p && p <= cur1) {
	  cur1=p;
	  cur2=beg2;
	} else {
	  cur2=p;
	}
      }

    protected:
      unsigned char read1() { return eos1() ? '\0' : *cur1++; }
      unsigned char read2() { return eos2() ? '\0' : *cur2++; }
      bool eos1() const { return *cur1=='\0'; }
      bool eos2() const { return *cur2=='\0'; }
      
    protected:
      const char* beg1;
      const char* beg2;
      const char* cur1;
      const char* cur2;
    };

    class CharStreamForComposition : public CompoundCharStream {
    public:
      CharStreamForComposition (const char* first, const char* second, 
				const std::vector<unsigned char>& canonical_classes, 
				std::string& buf)
	: CompoundCharStream(first, second), classes(canonical_classes), skipped(buf) 
      {}
      
      void init_skipinfo() { 
	skipped.clear();
	skipped_tail = 0;
      }

      void mark_as_last_valid_point() {
	skipped_tail = skipped.size();
	marked_point = cur();
      }

      void reset_at_marked_point() {
	setCur(marked_point);
      }

      void append_read_char_to_str(std::string& s, const char* beg) const {
	if(eos1()==false) {
	  s.append(beg, cur());
	} else {
	  s.append(beg,  cur1);
	  s.append(beg2, cur());
	}
      }

      void append_skipped_chars_to_str(std::string& s) const {
	s.append(skipped.begin(), skipped.begin()+skipped_tail);
      }

      unsigned char get_canonical_class() const { 
	return offset() < classes.size() ? classes[offset()] : 0;
      }
      
      bool next_combining_char(unsigned char prev_class, const char* ppp) {
	while(Util::is_utf8_char_start_byte(peek()) == false)
	  read();
	
	unsigned char mid_class = get_prev_canonical_class();
	unsigned char cur_class = get_canonical_class();
	
	if(prev_class==0 && mid_class==0 && cur_class!=0)
	  return false;

	if(prev_class < cur_class && mid_class < cur_class) {
	  skipped.append(ppp, cur());
	  return true;
	} else {
	  if(cur_class != 0) {
	    read();
	    return next_combining_char(prev_class,ppp);
	  }
	  return false;
	}
      }

    private:
      unsigned char get_prev_canonical_class() const { 
	return offset()-1 < classes.size() ? classes[offset()-1] : 0;
      }

    private:
      const std::vector<unsigned char>& classes;
      std::string& skipped;
      unsigned skipped_tail;
      const char* marked_point;
    };
  }
}

#endif