1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
<html>
<head>
<title>Tutorial on ustr APIs</title>
<link rel="stylesheet" type="text/css" href="f_c-1.0.css">
<style>
A { background: #FFF; color: #44F; }
A:hover { color: #20b2aa; }
A.anchor { color: #000; }
A.anchor:hover { color: #000; }
P { text-indent: 2em; }
body { background: #FFF; }
tr.heading { background: #DDD; }
tr.heading:hover { background: #ccc; }
table.conv { border-bottom: solid; margin-bottom: 2em; }
table.conv tr.r1 { background: #eee; }
table.conv tr.r2 { }
table.conv tr.r1:hover { background: #ccc; }
table.conv tr.r2:hover { background: #ccc; }
table.fig1 { text-align: center; }
td.ent1 { color: #080; }
td.ent2 { color: #800; }
span.blk a.anc { display: none; }
span.blk:hover a.anc { display: inline; color: #d7f; }
span.lastmod { text-size: 50%; }
</style>
</head>
<body>
<h1>Tutorial on ustr APIs</h1>
<p>
ustr is a string library designed to be very space efficient, and easily integrate with "normal" C string handling code. This means that it is designed to allow the programer to create the "strings" from allocated memory, the stack and read-only memory.
</p>
<p>
Also note that all error checking is included in every example, it may make the examples somewhat easier to read if it wasn't included ... however including error checking is what the code must look like in a real application.
</p>
<ul>
<li><a href="#skel">Skel</a>: Skeleton template for tutorial snippets.</li>
<li><a href="#hw_c">Hello World - const</a>: A very simple hello world, using constant strings.</li>
<li><a href="#hw_d">Hello World - dup</a>: Create the string.</li>
<li><a href="#hw_m">Hello World - multiple</a>: Create the string from multiple functions.</li>
<li><a href="#hw_s">Hello World - stack</a>: Use the stack instead of the heap.</li>
<li><a href="#fgrep">fgrep</a>: Simple fgrep.</li>
<li><a href="#mkdir">mkdir</a>: Simple mkdir -p.</li>
<li><a href="#txt2html">Text to html converter</a>: Convert plain text into similar html.</li>
</ul>
<a class="anchor" id="skel">
<h2> Skeleton code required to run the tutorial examples </h2>
</a>
<p> This is the skeleton code required to run all the tutorial examples,
<a href="http://www.and.org/ustr/src/examples/">the full versions</a> may
require a few more headers though. </p>
<pre class="c2html">
#<span class="cppinclude">include</span> <span class="str">"ustr.h"</span>
#<span class="cppinclude">include</span> <errno.h>
<span class="static">static</span> <span class="void">void</span> die(<span clas
s="const">const</span> <span class="char">char</span> *prog_name, <span class="c
onst">const</span> <span class="char">char</span> *msg)
{
fprintf(<span class="stderr">stderr</span>, <span class="str">"%s: %s\n"</span
>, prog_name, msg);
<span class="exit">exit</span> (<span class="exitfail">EXIT_FAILURE</span>);
}
</pre>
<a class="anchor" id="hw_c">
<h2> Hello World - const </h2>
</a>
<p> This example is the simplest possible, basically being the same as
calling puts(). Note however that the length of the string is passed
through to the IO function without any compiler magic by using the
<a href="functions.html#USTR1_CHK">USTR1_CHK (checked
constant ustr creation)</a> macro, if it looks a little weird at first you
can use the
<a href="http://www.and.org/ustr/src/examples/custr.c">custr</a> example
program, to create constant ustr's automatically).
</p>
<p> Note: Although the USTR1_CHK() macro is handed constant data and should be
able to determine at compile time whether the length check is correct, GCC
doesn't think this is constant and so for file scope variables you'll need to
use the <a href="functions.html#USTR1">USTR1 (non-checking constant ustr
creation)</a> macro. Or even the simple
<a href="functions.html#USTR">USTR("")</a> macro for empty ustr strings.</p>
<pre class="c2html">
<span class="comment">/* simplest, just "create" a Ustr from a constant string */</span>
<span class="static">static</span> <span class="void">void</span> hello_world_one(<span class="void">void</span>)
{
Ustr *hello_world = USTR1_CHK(\xC, <span class="str">"Hello world!"</span>);
<span class="if">if</span> (!ustr_io_putfileline(&hello_world, <span class="stdout">stdout</span>))
die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
}
</pre>
<a class="anchor" id="hw_d">
<h2> Hello World - dup </h2>
</a>
<p> This example creates the string before writting it out. Note that although
the ustr is allocated, it doesn't need to be free'd because a default
configured string of zero length is represented by "".</p>
<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_two(<span class="void">void</span>)
{ <span class="comment">/* next, create a Ustr from a printf() like format. This needs to be
* free'd. */</span>
Ustr *out = ustr_dup_fmt(<span class="str">"%s %s"</span>, <span class="str">"Hello"</span>, <span class="str">"world!"</span>);
<span class="if">if</span> (!out || !ustr_io_putfileline(&out, <span class="stdout">stdout</span>))
die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
}
</pre>
<a class="anchor" id="hw_m">
<h2> Hello World - multiple parts</h2>
</a>
<p> This example creates the string using multiple functions. The interesting
part to notice is the call to ustr_enomem(), this checks if any allocation
failures have happened to this particular ustr. Also note that we create
a specifically configured ustr, which includes a size and a 4 byte reference
count.</p>
<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_three(<span class="void">void</span>)
{ <span class="comment">/* manually create a Ustr, from multiple parts. Often "significantly faster"
* than using ustr_*_fmt(), due to printf overhead. Still needs to allocate
* memory, and maybe resize it. Still need to free it. */</span>
Ustr *hello = USTR1(\5, <span class="str">"Hello"</span>);
Ustr *sp = USTR1(\1, <span class="str">" "</span>);
Ustr *world = USTR1(\6, <span class="str">"world!"</span>);
Ustr *out = ustr_dupx_empty(1, 4, <span class="false">USTR_FALSE</span>, <span class="false">USTR_FALSE</span>);
<span class="if">if</span> (!out)
die(<span class="str">"hello_world"</span>, strerror(ENOMEM));
ustr_add(&out, hello);
ustr_add(&out, sp);
ustr_add(&out, world);
<span class="if">if</span> (ustr_enomem(out)) <span class="comment">/* check all 3 additions at once */</span>
die(<span class="str">"hello_world"</span>, strerror(ENOMEM));
<span class="if">if</span> (!ustr_io_putfileline(&out, <span class="stdout">stdout</span>))
die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
ustr_free(out);
}
</pre>
<a class="anchor" id="hw_s">
<h2> Hello World - stack based allocation </h2>
</a>
<p> This example also creates a string from multiple functions, however the
storage for the ustr is on the stack and so we don't need to de-allocate the
ustr (although, as with the constant ustr, it does no harm).</p>
<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> hello_world_four(<span class="void">void</span>)
{ <span class="comment">/* manually create a Ustr, but use "auto" allocated storage
* (stack instead of heap). As long as you don't use more than ustr_size()
* you don't need to free. Also note that ustr_dup() will now always copy. */</span>
Ustr *sp = USTR1(\1, <span class="str">" "</span>);
Ustr *world = USTR1(\6, <span class="str">"world!"</span>);
<span class="char">char</span> buf_out[1024] = USTR_BEG_FIXED2 <span class="str">"Hello"</span>;
Ustr *out = USTR_SC_INIT_AUTO(buf_out, <span class="true">USTR_TRUE</span>, 5);
ustr_add(&out, sp);
ustr_add(&out, world);
<span class="comment">/* in this can we know !ustr_enomem() as there is more than enough space */</span>
<span class="if">if</span> (!ustr_io_putfileline(&out, <span class="stdout">stdout</span>))
die(<span class="str">"hello_world"</span>, strerror(<span class="errno">errno</span>));
<span class="comment">/* ustr_free() not needed, because nothing was allocated.
* Although it's often good to call it anyway, as it does no harm. */</span>
}
</pre>
<a class="anchor" id="fgrep">
<h2> fgrep </h2>
</a>
<p> This example works like GNU's fgrep --color. There are two main things to
notice here. The first is that searching, or searching and replacing with a
colourized variant, and reading lines are all just ustr API calls. The second
is that the line data is allocated on the stack, by default.
</p>
<p> Both of these significantly improve the ease of use and speed of the
resulting code, as ustr_io_getline() is significantly faster than fgets() (due
to not having the length of data returned) and allocating from the stack can
be a huge performance win (note that all functions, including ustr_free(), do the correct thing ... so we don't suffer complexity for that performance).
</p>
<pre class="c2html">
<span class="static">static</span> <span class="int">int</span> fgrep(Ustr **ps1, Ustr *fgrep_srch, Ustr *fgrep_repl,
<span class="int">int</span> first_only)
{
<span class="sizet">size_t</span> num = <span class="num0">0</span>;
<span class="if">if</span> (fgrep_repl)
num = ustr_srch_fwd(*ps1, <span class="num0">0</span>, fgrep_srch);
<span class="else">else</span> <span class="if">if</span> (!(num = ustr_replace(ps1, fgrep_srch, fgrep_repl,
!first_only)) && <span class="errno">errno</span>)
die(<span class="str">"fgrep"</span>, strerror(ENOMEM));
<span class="if">if</span> (!num)
ustr_sc_del(ps1);
<span class="return">return</span> (!!num);
}
<span class="static">static</span> <span class="void">void</span> fgrep_fp_loop(FILE *in,
Ustr *fgrep_srch, Ustr *fgrep_repl)
{
<span class="char">char</span> buf[USTR_SIZE_FIXED(160)]; <span class="comment">/* enough for two "normal" lines,
after that we alloc. */</span>
Ustr *line = USTR_SC_INIT_AUTO(buf, <span class="false">USTR_FALSE</span>, <span class="num0">0</span>);
<span class="while">while</span> (ustr_io_getline(&line, in))
{
<span class="if">if</span> (!fgrep(&line, fgrep_srch, fgrep_repl, <span class="false">USTR_FALSE</span>))
ustr_sc_del(&line);
else <span class="if">if</span> (!ustr_io_putfile(&line, <span class="stdout">stdout</span>))
die(<span class="str">"fgrep"</span>, strerror(<span class="errno">errno</span>));
<span class="if">if</span> (line != USTR(buf)) <span class="comment">/* re-init */</span>
ustr_sc_free2(&line, USTR_SC_INIT_AUTO(buf, <span class="false">USTR_FALSE</span>, <span class="num0">0</span>));
}
<span class="if">if</span> (<span class="errno">errno</span>)
die(<span class="str">"fgrep"</span>, strerror(<span class="errno">errno</span>));
ustr_free(line);
}
</pre>
<a class="anchor" id="mkdir">
<h2> mkdir -p function</h2>
</a>
<p> This example shows how a single function can take both an allocated (and
modifiable) ustr and a constant string ustr, to efficiently modify data.</p>
<pre class="c2html">
<span class="static">static</span> <span class="int">int</span> fu__mkdir_p(<span class="const">const</span> Ustr *s1, <span class="int">int</span> mode, <span class="sizet">size_t</span> off, <span class="int">int</span> ret)
{
Ustr *allocd = USTR_NULL;
<span class="char">char</span> *ptr = <span class="null">NULL</span>;
<span class="if">if</span> (mkdir(ustr_cstr(s1), mode) != <span class="numm1">-1</span>)
<span class="return">return</span> (ret + 1);
<span class="switch">switch</span> (<span class="errno">errno</span>)
{
<span class="case">case</span> EEXIST: <span class="return">return</span> (ret);
<span class="case">case</span> ENOENT: <span class="break">break</span>;
<span class="default">default</span>: <span class="return">return</span> (<span class="numm1">-1</span>);
}
<span class="if">if</span> ((off = ustr_srch_chr_rev(s1, off, <span class="chr">'/'</span>)) <= 1)
{
<span class="errno">errno</span> = EINVAL;
<span class="return">return</span> (<span class="numm1">-1</span>);
}
--off; <span class="comment">/* NOTE: offset moves from beg. to end */</span>
<span class="if">if</span> (!ustr_owner(s1))
{ <span class="comment">/* do it this way, so we can pass constant Ustr's to this function
* and don't use ustr_sc_ensure_owner() so that we don't release a
* reference */</span>
<span class="if">if</span> (!(allocd = ustr_dup_buf(ustr_cstr(s1), ustr_len(s1))))
<span class="return">return</span> (<span class="numm1">-1</span>); <span class="comment">/* errno == ENOMEM, done by ustr */</span>
s1 = allocd;
}
ptr = ustr_wstr((Ustr *)s1);
ptr[off] = <span class="num0">0</span>;
<span class="if">if</span> ((ret = fu__mkdir_p(s1, mode, ustr_len(s1) - off, ret + 1)) != <span class="numm1">-1</span>)
{
ptr[off] = <span class="chr">'/'</span>;
<span class="if">if</span> (mkdir(ustr_cstr(s1), mode) == <span class="numm1">-1</span>)
ret = <span class="numm1">-1</span>;
}
ustr_free(allocd);
<span class="return">return</span> (ret);
}
<span class="comment">/* This returns -1, on error, or the number of directories created. */</span>
<span class="static">static</span> <span class="int">int</span> mkdir_p(<span class="const">const</span> Ustr *s1, <span class="int">int</span> mode)
{
<span class="return">return</span> (fu__mkdir_p(s1, mode, <span class="num0">0</span>, <span class="num0">0</span>));
}
</pre>
<p> In other words:</p>
<pre class="c2html">
mkdir_p(USTR1(\x9, <span class="str">"12/45/789"</span>), 0700)
</pre>
<p>...will create/free a single ustr for all 3 of the seperate paths
required.</p>
<a class="anchor" id="txt2html">
<h2> text into similar looking html converter</h2>
</a>
<p> This is a function program showing how you can do the perl code (written by, tchrist@perl.com - Sunday, December 19th, 1999):</p>
<pre class="c2html">
<span class="comment">/*
# first kill all the tabs
1 while s{ \t + }
{ " " x (length($&)*8 - length($`)%8) }ex;
# then the four standard naughty bits
s/&/&amp;/g; # must remember to do this one first!
s/</&lt;/g; # this is the most important one
s/>/&gt;/g; # don't close too early
s/"/&quot;/g; # only in embedded tags, i guess
# make lines break where they should
s/^\s*$/<P>/ || s/$/<BR>/;
# make sure spaces aren't squishticated so we
# can do indentation and properly align comments
s/( {2,})/'&nbsp;' x length($1)/ge;
*/</span>
</pre>
<p> The interesting points here are how simple most of the ustr code is, tab
conversion is 2 lines in the original perl and 5 (1 could be dropped for a
speed loss) with ustr. The "four std. naughty bits" take a single line in
both perl and ustr. The only major difference is in the final perl 1 liner,
which is 8 interesting lines (due to the two or more spaces constraint, or
we could just use <a href="functions.html#ustr_replace">ustr_replace()</a>).
</p>
<p> It's also worth noting that the ustr version is <b>significantly</b> faster
than the smaller perl code.
</p>
<pre class="c2html">
<span class="static">static</span> <span class="void">void</span> txt2html(Ustr **pline)
{
Ustr *line = *pline;
<span class="sizet">size_t</span> tab_pos = <span class="num0">0</span>;
<span class="sizet">size_t</span> tab_off = <span class="num0">0</span>;
<span class="int">int</span> has_ret = <span class="num0">0</span>;
<span class="comment">/* convert tabs to spaces */</span>
<span class="while">while</span> ((tab_pos = ustr_srch_chr_fwd(line, tab_off, <span class="chr">'\t'</span>)))
{
<span class="sizet">size_t</span> tabs_len = ustr_spn_chr_fwd(line, tab_pos - 1, <span class="chr">'\t'</span>);
<span class="sizet">size_t</span> spcs_len = (tabs_len * 8) - ((tab_pos - 1) % 8);
ustr_sc_sub_rep_chr(&line, tab_pos, tabs_len, <span class="chr">' '</span>, spcs_len);
tab_off = tab_pos + spcs_len - 1;
}
<span class="if">if</span> (ustr_cstr(line)[ustr_len(line) - 1] == <span class="chr">'\n'</span>)
has_ret = 1;
<span class="if">if</span> (ustr_spn_chr_fwd(line, <span class="num0">0</span>, <span class="chr">' '</span>) == (ustr_len(line) - has_ret))
ustr_set(&line, USTR1(\3, <span class="str">"<P>"</span>)); <span class="comment">/* blank lines start new paragraph */</span>
<span class="else">else</span>
{
<span class="sizet">size_t</span> spcs_off = <span class="num0">0</span>;
<span class="sizet">size_t</span> spcs_pos = <span class="num0">0</span>;
<span class="char">char</span> buf_rep[USTR_SIZE_FIXED(40 * 6)] = USTR_BEG_FIXED2 <span class="str">"&nbsp;"</span>;
ustr_replace_cstr(&line, <span class="str">"&"</span>, <span class="str">"&amp;"</span>, <span class="num0">0</span>);
ustr_replace_cstr(&line, <span class="str">"<"</span>, <span class="str">"&lt;"</span>, <span class="num0">0</span>);
ustr_replace_cstr(&line, <span class="str">">"</span>, <span class="str">"&gt;"</span>, <span class="num0">0</span>);
ustr_replace_cstr(&line, <span class="str">"\""</span>, <span class="str">"&quot;"</span>, <span class="num0">0</span>);
ustr_del(&line, has_ret);
ustr_add_cstr(&line, <span class="str">"<BR>\n"</span>);
<span class="comment">/* convert runs of two or more spaces into runs of &nbsp; */</span>
<span class="while">while</span> ((spcs_pos = ustr_srch_cstr_fwd(line, spcs_off, <span class="str">" "</span>)))
{
<span class="sizet">size_t</span> spcs_len = ustr_spn_chr_fwd(line, spcs_pos - 1, <span class="chr">' '</span>);
<span class="sizet">size_t</span> rep = spcs_len;
<span class="sizet">size_t</span> more = <span class="num0">0</span>;
Ustr *rep_nbsp = USTR_SC_INIT_AUTO(buf_rep, USTR_FALSE, <span class="num0">0</span>);
<span class="while">while</span> (rep--)
ustr_add_cstr(&rep_nbsp, <span class="str">"&nbsp;"</span>);
<span class="if">if</span> (ustr_enomem(rep_nbsp) ||
!ustr_sc_sub(&line, spcs_pos, spcs_len, rep_nbsp))
die(prog_name, strerror(ENOMEM));
spcs_off = spcs_pos + (spcs_len * strlen(<span class="str">"&nbsp;"</span>)) - 1;
ustr_free(rep_nbsp);
}
}
<span class="if">if</span> (ustr_enomem(line))
die(<span class="str">"txt2html"</span>, strerror(<span class="errno">errno</span>));
*pline = line;
}
</pre>
<hr>
<address><a href="mailto:james@and.org">James Antill</a></address>
<!-- Created: Sat Oct 13 16:41:12 EDT 2007 -->
<!-- hhmts start -->
Last modified: Tue Oct 30 01:20:07 EDT 2007
<!-- hhmts end -->
</body>
</html>
|