1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170
|
/** @file
A brief file description
@section license License
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
/***************************************/
#ifndef _TOKENIZER_H_
#define _TOKENIZER_H_
/****************************************************************************
*
* Tokenizer.h - A string tokenzier
*
*
*
****************************************************************************/
/**********************************************************
* class Tokenizer
*
* Tokenizes a string, and then allows array like access
*
* The delimiters are determined by the string passed to the
* the constructor.
*
* There are three memory options.
* SHARE_TOKS - this modifies the original string passed in
* through Intialize() and shares its space. NULLs
* are inserted into string after each token. Choosing
* this option means the user is reponsible for not
* deallocating the string storage before deallocating
* the tokenizer object
* COPY_TOKS - this option copies the orginial string and
* leaves the original unchanged. The deallocation of the
* original string and the deallocation of the Tokenizer
* object are now independent.
* Note: If neither SHARE_TOKS or COPY_TOKS is selected, COPY_TOKS
* is the default
* ALLOW_EMPTY_TOKENS: If multiple delimiters appear next to each
* other, each delimiter creates a token someof which
* will be zero length. The default is to skip repeated
* delimiters
*
* Tokenizer(const char* StrOfDelimit) - a string that contains
* the delimiters for tokenizing. This string is copied.
*
* Intialize(char* str, TokenizerOpts opt) - Submits a string
* to be tokenized according to the memory options listed above
*
* ReUse() - Allows the object to be reused for a new string
* After ReUse() is called, Initialize() can be called safely
* again
*
* operator[index] - returns a pointer to the number token given
* by index. If index > numTokens-1, NULL is returned.
* Because of way tokens are stored, this is O(n) operation
* It is very fast though for the first 16 tokens and
* is intended to be used on a small number of tokens
*
* iterFirst(tok_iter_state* state) - Returns the first
* token and intializes state argument for subsequent
* calls to iterNext. If no tokens exist, NULL is
* returned
*
* iterNext(tok_iter_state* state) - Returns the next token after
* what arg state returned next last time. Returns NULL if no
* more tokens exists.
*
* Note: To iterate through a list using operator[] takes O(n^2) time
* Using iterFirst, iterNext the running time is O(n), so use
* the iteration where possible
*
* count() - returns the number of tokens
*
* setMaxTokens() - sets the maximum number of tokens. Once maxTokens
* is reached, delimiters are ignored and the
* last token is rest of the string. Negative numbers
* mean no limit on the number of tokens
*
* getMaxTokens() - returns maxTokens. UINT_MAX means no limit
*
* Print() - Debugging method to print out the tokens
*
*******************************************************************/
#include "ts/ink_apidefs.h"
#define COPY_TOKS (1u << 0)
#define SHARE_TOKS (1u << 1)
#define ALLOW_EMPTY_TOKS (1u << 2)
#define ALLOW_SPACES (1u << 3)
#define TOK_NODE_ELEMENTS 16
struct tok_node {
char *el[TOK_NODE_ELEMENTS];
tok_node *next;
};
struct tok_iter_state {
tok_node *node;
int index;
};
class Tokenizer
{
public:
inkcoreapi Tokenizer(const char *StrOfDelimiters);
inkcoreapi ~Tokenizer();
unsigned Initialize(char *str, unsigned options);
inkcoreapi unsigned Initialize(const char *str); // Automatically sets option to copy
const char *operator[](unsigned index) const;
void
setMaxTokens(unsigned max)
{
maxTokens = max;
};
unsigned
getMaxTokens() const
{
return maxTokens;
};
unsigned count() const;
void Print(); // Debugging print out
inkcoreapi const char *iterFirst(tok_iter_state *state);
inkcoreapi const char *iterNext(tok_iter_state *state);
private:
Tokenizer &operator=(const Tokenizer &);
Tokenizer(const Tokenizer &);
int isDelimiter(char c);
void addToken(char *startAddr, int length);
void ReUse();
char *strOfDelimit;
tok_node start_node;
unsigned numValidTokens;
unsigned maxTokens;
int options;
bool quoteFound;
// State about where to add the next token
tok_node *add_node;
int add_index;
};
#endif
|