1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
|
/*
* token.c -- tokenize strings, a la strtok(3)
*
* Copyright (C) 2007 Oracle. All rights reserved.
* Copyright (C) 2007 Chuck Lever <chuck.lever@oracle.com>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program; if not, write to the
* Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 0211-1301 USA
*
*/
/*
* We've constructed a simple string tokenizer that is better than
* strtok(3) in several ways:
*
* 1. It doesn't interfere with ongoing tokenizations using strtok(3).
* 2. It's re-entrant so we can nest tokenizations, if needed.
* 3. It can handle double-quoted delimiters (needed for 'context="sd,fslj"').
* 4. It doesn't alter the string we're tokenizing, so it can work
* on write-protected strings as well as writable strings.
*/
#include <ctype.h>
#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <errno.h>
#include "token.h"
struct tokenizer_state {
char *pos;
char delimiter;
int error;
};
static void find_next_nondelimiter(struct tokenizer_state *tstate)
{
while (*tstate->pos != '\0' && *tstate->pos == tstate->delimiter)
tstate->pos++;
}
static size_t find_next_delimiter(struct tokenizer_state *tstate)
{
size_t len = 0;
int quote_seen = 0;
while (*tstate->pos != '\0') {
if (*tstate->pos == '"')
quote_seen ^= 1;
if (!quote_seen && *tstate->pos == tstate->delimiter)
break;
len++;
tstate->pos++;
}
/* did the string terminate before the close quote? */
if (quote_seen) {
tstate->error = EINVAL;
return 0;
}
return len;
}
/**
* next_token - find the next token in a string and return it
* @tstate: pointer to tokenizer context object
*
* Returns the next token found in the current string.
* Returns NULL if there are no more tokens in the string,
* or if an error occurs.
*
* Side effect: tstate is updated
*/
char *next_token(struct tokenizer_state *tstate)
{
char *token;
size_t len;
if (!tstate || !tstate->pos || tstate->error)
return NULL;
find_next_nondelimiter(tstate);
if (*tstate->pos == '\0')
goto fail;
token = tstate->pos;
len = find_next_delimiter(tstate);
if (len) {
token = strndup(token, len);
if (token)
return token;
tstate->error = ENOMEM;
}
fail:
tstate->pos = NULL;
return NULL; /* no tokens found in this string */
}
/**
* init_tokenizer - return an initialized tokenizer context object
* @string: pointer to C string
* @delimiter: single character that delimits tokens in @string
*
* Returns an initialized tokenizer context object
*/
struct tokenizer_state *init_tokenizer(char *string, char delimiter)
{
struct tokenizer_state *tstate;
tstate = malloc(sizeof(*tstate));
if (tstate) {
tstate->pos = string;
tstate->delimiter = delimiter;
tstate->error = 0;
}
return tstate;
}
/**
* tokenizer_error - digs error value out of tokenizer context
* @tstate: pointer to tokenizer context object
*
*/
int tokenizer_error(struct tokenizer_state *tstate)
{
return tstate ? tstate->error : 0;
}
/**
* end_tokenizer - free a tokenizer context object
* @tstate: pointer to tokenizer context object
*
*/
void end_tokenizer(struct tokenizer_state *tstate)
{
free(tstate);
}
|