1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
|
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
//
// rbbitblb.h
//
/*
**********************************************************************
* Copyright (c) 2002-2016, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
*/
#ifndef RBBITBLB_H
#define RBBITBLB_H
#include <_foundation_unicode/utypes.h>
#if !UCONFIG_NO_BREAK_ITERATION
#include <_foundation_unicode/uobject.h>
#include <_foundation_unicode/rbbi.h>
#include "rbbidata.h"
#include "rbbirb.h"
#include "rbbinode.h"
U_NAMESPACE_BEGIN
class RBBIRuleScanner;
class RBBIRuleBuilder;
class UVector32;
//
// class RBBITableBuilder is part of the RBBI rule compiler.
// It builds the state transition table used by the RBBI runtime
// from the expression syntax tree generated by the rule scanner.
//
// This class is part of the RBBI implementation only.
// There is no user-visible public API here.
//
class RBBITableBuilder : public UMemory {
public:
RBBITableBuilder(RBBIRuleBuilder *rb, RBBINode **rootNode, UErrorCode &status);
~RBBITableBuilder();
void buildForwardTable();
/** Return the runtime size in bytes of the built state table. */
int32_t getTableSize() const;
/** Fill in the runtime state table. Sufficient memory must exist at the specified location.
*/
void exportTable(void *where);
/** Use 8 bits to encode the forward table */
bool use8BitsForTable() const;
/**
* Find duplicate (redundant) character classes. Begin looking with categories.first.
* Duplicate, if found are returned in the categories parameter.
* This is an iterator-like function, used to identify character classes
* (state table columns) that can be eliminated.
* @param categories in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate char classes were found, false otherwise.
*/
bool findDuplCharClassFrom(IntPair *categories);
/** Remove a column from the state table. Used when two character categories
* have been found equivalent, and merged together, to eliminate the unneeded table column.
*/
void removeColumn(int32_t column);
/**
* Check for, and remove duplicate states (table rows).
* @return the number of states removed.
*/
int32_t removeDuplicateStates();
/** Build the safe reverse table from the already-constructed forward table. */
void buildSafeReverseTable(UErrorCode &status);
/** Return the runtime size in bytes of the built safe reverse state table. */
int32_t getSafeTableSize() const;
/** Fill in the runtime safe state table. Sufficient memory must exist at the specified location.
*/
void exportSafeTable(void *where);
/** Use 8 bits to encode the safe reverse table */
bool use8BitsForSafeTable() const;
private:
void calcNullable(RBBINode *n);
void calcFirstPos(RBBINode *n);
void calcLastPos(RBBINode *n);
void calcFollowPos(RBBINode *n);
void calcChainedFollowPos(RBBINode *n, RBBINode *endMarkNode);
void bofFixup();
void buildStateTable();
void mapLookAheadRules();
void flagAcceptingStates();
void flagLookAheadStates();
void flagTaggedStates();
void mergeRuleStatusVals();
/**
* Merge redundant state table columns, eliminating character classes with identical behavior.
* Done after the state tables are generated, just before converting to their run-time format.
*/
int32_t mergeColumns();
void addRuleRootNodes(UVector *dest, RBBINode *node);
/**
* Find duplicate (redundant) states, beginning at the specified pair,
* within this state table. This is an iterator-like function, used to
* identify states (state table rows) that can be eliminated.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if duplicate states were found, false otherwise.
*/
bool findDuplicateState(IntPair *states);
/** Remove a duplicate state.
* @param duplStates The duplicate states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
*/
void removeState(IntPair duplStates);
/** Find the next duplicate state in the safe reverse table. An iterator function.
* @param states in/out parameter, specifies where to start looking for duplicates,
* and returns the first pair of duplicates found, if any.
* @return true if a duplicate pair of states was found.
*/
bool findDuplicateSafeState(IntPair *states);
/** Remove a duplicate state from the safe table.
* @param duplStates The duplicate states. The first is kept, the second is removed.
* All references to the second in the state table are retargeted
* to the first.
*/
void removeSafeState(IntPair duplStates);
// Set functions for UVector.
// TODO: make a USet subclass of UVector
void setAdd(UVector *dest, UVector *source);
UBool setEquals(UVector *a, UVector *b);
void sortedAdd(UVector **dest, int32_t val);
public:
#ifdef RBBI_DEBUG
void printSet(UVector *s);
void printPosSets(RBBINode *n /* = nullptr */);
void printStates();
void printRuleStatusTable();
void printReverseTable();
#else
#define printSet(s)
#define printPosSets(n)
#define printStates()
#define printRuleStatusTable()
#define printReverseTable()
#endif
private:
RBBIRuleBuilder *fRB;
RBBINode *&fTree; // The root node of the parse tree to build a
// table for.
UErrorCode *fStatus;
/** State Descriptors, UVector<RBBIStateDescriptor> */
UVector *fDStates; // D states (Aho's terminology)
// Index is state number
// Contents are RBBIStateDescriptor pointers.
/** Synthesized safe table, UVector of UnicodeString, one string per table row. */
UVector *fSafeTable;
/** Map from rule number (fVal in look ahead nodes) to sequential lookahead index. */
UVector32 *fLookAheadRuleMap = nullptr;
/* Counter used when assigning lookahead rule numbers.
* Contains the last look-ahead number already in use.
* The first look-ahead number is 2; Number 1 (ACCEPTING_UNCONDITIONAL) is reserved
* for non-lookahead accepting states. See the declarations of RBBIStateTableRowT. */
int32_t fLASlotsInUse = ACCEPTING_UNCONDITIONAL;
RBBITableBuilder(const RBBITableBuilder &other) = delete; // forbid copying of this class
RBBITableBuilder &operator=(const RBBITableBuilder &other) = delete; // forbid copying of this class
};
//
// RBBIStateDescriptor - The DFA is constructed as a set of these descriptors,
// one for each state.
class RBBIStateDescriptor : public UMemory {
public:
UBool fMarked;
uint32_t fAccepting;
uint32_t fLookAhead;
UVector *fTagVals;
int32_t fTagsIdx;
UVector *fPositions; // Set of parse tree positions associated
// with this state. Unordered (it's a set).
// UVector contents are RBBINode *
UVector32 *fDtran; // Transitions out of this state.
// indexed by input character
// contents is int index of dest state
// in RBBITableBuilder.fDStates
RBBIStateDescriptor(int maxInputSymbol, UErrorCode *fStatus);
~RBBIStateDescriptor();
private:
RBBIStateDescriptor(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
RBBIStateDescriptor &operator=(const RBBIStateDescriptor &other) = delete; // forbid copying of this class
};
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
#endif
|