File: search_thread.c

package info (click to toggle)
swish++ 6.1.5-2
  • links: PTS
  • area: main
  • in suites: squeeze
  • size: 2,256 kB
  • ctags: 1,759
  • sloc: ansic: 11,931; lisp: 804; sh: 629; perl: 366; makefile: 80
file content (277 lines) | stat: -rw-r--r-- 8,709 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
/*
**      SWISH++
**      search_thread.c
**
**      Copyright (C) 1998  Paul J. Lucas
**
**      This program is free software; you can redistribute it and/or modify
**      it under the terms of the GNU General Public License as published by
**      the Free Software Foundation; either version 2 of the License, or
**      (at your option) any later version.
**
**      This program is distributed in the hope that it will be useful,
**      but WITHOUT ANY WARRANTY; without even the implied warranty of
**      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
**      GNU General Public License for more details.
**
**      You should have received a copy of the GNU General Public License
**      along with this program; if not, write to the Free Software
**      Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifdef  SEARCH_DAEMON

// standard
#include <cctype>
#include <cerrno>
#include <climits>                      /* for ARG_MAX */
#include <cstring>
#include <fcntl.h>
#include <iostream>
#include <sys/socket.h>                 /* for recv(3) */
#include <time.h>
#include <unistd.h>                     /* for close(2) */

//
// We need to know the maximum number of command-line arguments so we can split
// a command-line string into individual arguments.  If the OS defines the
// POSIX.1 ARG_MAX macro, see if it's insanely large (Solaris's limit is over a
// million!) because we don't want to allocate that much space for argument
// pointers since it would probably blow our thread stack space; however, if
// it's small, we might as well use that number since there's no reason to
// exceed it.
//
// See also: W. Richard Stevens.  "Advanced Programming in the Unix
// Environment," Addison-Wesley, Reading, MA, 1993.  pp. 32-40.
//
#define REASONABLE_ARG_MAX 50
#ifdef  ARG_MAX
#   if ARG_MAX > REASONABLE_ARG_MAX
#       undef ARG_MAX
#   endif
#endif
#ifndef ARG_MAX
#   define ARG_MAX REASONABLE_ARG_MAX
#endif

// local
#include "fdbuf.h"
#include "platform.h"
#include "search.h"
#include "search_thread.h"
#include "util.h"

using namespace PJL;
using namespace std;

unsigned search_thread::socket_timeout;

extern void reset_socket( int fd );
static int  split_args( char *s, char *argv[], int arg_max );
static bool timed_read_line( int fd, char *buf, int buf_size, int seconds );

//*****************************************************************************
//
// SYNOPSIS
//
        void search_thread::main( argument_type arg )
//
// DESCRIPTION
//
//      Read a "command-line" from the client via a socket, service a request,
//      and return the results via the same socket.
//
// PARAMETERS
//
//      arg     The 'i' member is the socket file descriptor.
//
//*****************************************************************************
{
#define SEARCH_DAEMON_OPTIONS_ONLY
#include "search_options.c"             /* defines opt_spec */

#   ifdef DEBUG_threads
    cerr << "in search_thread::main()\n";
#   endif

    char buf[ 1024 ];
    bool ok = false;
    if ( timed_read_line( arg.i, buf, sizeof buf, socket_timeout ) ) {

#       ifdef DEBUG_threads
        cerr << "query=" << buf << "\n";
#       endif

        char*   argv_vec[ ARG_MAX ];
        char**  argv = argv_vec;
        int     argc = split_args( buf, argv, ARG_MAX );
        fdbuf   buf( arg.i );
        ostream out( &buf );

        if ( !argc )
            out << usage;
        else if ( argc == ARG_MAX )
            out << error << "more than " << ARG_MAX
                << " arguments" << endl;
        else {
            search_options const opt( &argc, &argv, opt_spec, out );
            if ( opt )
                ok = service_request( argv, opt, out, out );
        }
        out << flush;
    }

    if ( !ok ) {
        //
        // It was a bad request because it (a) timed out, (b) had too few or
        // many arguments, (c) had an error in usage, or (d) was malformed.
        // That being the case, reset the TCP connection.
        //
        // The reason for doing this is so we don't potentially have a socket
        // lingering in TIME-WAIT from a client that was too dumb to give us a
        // valid request in the first place.  This helps alleviate
        // denial-of-service attacks (if that's what's going on).
        //
        reset_socket( arg.i );
    }

    ::close( arg.i );
}

//*****************************************************************************
//
// SYNOPSIS
//
        int split_args( register char *s, char *argv[], int arg_max )
//
// DESCRIPTION
//
//      Split a string into individual, argv-like arguments at whitespace.
//      This code is based on buf_args() in [Stevens 1993], p. 495, except that
//      it:
//
//          1. Is thread-safe by not using strtok().
//          2. Discards leading whitespace in the buffer.
//          3. Just does the split and doesn't call any function.
//
// PARAMETERS
//
//      s       The string to be split.
//
//      argv        The array to deposit the pointers to arguments in.
//
//      arg_max     The maximum number of argument to allow.
//
// RETURN VALUE
//
//      Upon success, returns the number of arguments; upon failure, returns
//      arg_max.
//
// SEE ALSO
//
//      W. Richard Stevens.  "Advanced Programming in the Unix Environment,"
//      Addison-Wesley, Reading, MA, 1993.  p. 495.
//
//*****************************************************************************
{
    for ( ; *s && is_space( *s ); ++s ) ;   // skip leading whitespace
    if ( !*s )
        return 0;

    register int argc = 0;

    while ( argv[ argc++ ] = s ) {
        if ( argc >= arg_max - 1 )      // -1 to allow for null at end
            return arg_max;
        if ( s = ::strpbrk( s, " \t\n\r" ) ) {
            *s = '\0';
            //
            // We must skip *ALL* whitespace characters separating arguments.
            //
            while ( *++s && is_space( *s ) ) ;
        }
    }
    return argc;
}

//*****************************************************************************
//
// SYNOPSIS
//
        bool timed_read_line( int fd, char *buf, int buf_size, int seconds )
//
// DESCRIPTION
//
//      Read a line of text (a string of characters ending in either a carriage
//      return or a newline) from a Unix file descriptor and store it in the
//      given buffer, null-terminated; but time-out if we don't get it in a
//      certain amount of time.  The carriage return or newline is discarded.
//
// PARAMETERS
//
//      fd          The Unix file descriptor to read from.
//
//      buf         The buffer to read into.
//
//      buf_size    The size of the buffer.
//
//      seconds     The number of seconds until a time-out.
//
// RETURN VALUE
//
//      Returns true only if an entire line was read in the time allotted.
//
// SEE ALSO
//
//      W. Richard Stevens.  "Unix Network Programming, Vol 1, 2nd ed."
//      Prentice-Hall, Upper Saddle River, NJ, 1998.  pp. 352-353.
//
//*****************************************************************************
{
    //
    // In a single-threaded application, we could simply use alarm(2) to set a
    // time-out before reading; however, in a multi-threaded application, we
    // can't since there can be at most one alarm set for an entire process:
    // individual threads can not have independent alarms.
    //
    // Therefore, what we do instead is to use select(2) to do the blocking,
    // but with a time-out specified.
    //
    time_t const start_time = ::time( 0 );
    int seconds_remaining = seconds;
    while ( seconds_remaining > 0 ) {
        fd_set rset;
        FD_ZERO( &rset );
        FD_SET( fd, &rset );

        struct timeval tv;
        tv.tv_sec  = seconds_remaining;
        tv.tv_usec = 0;

        if ( ::select( fd + 1, &rset, 0, 0, &tv ) < 1 )
            break;
        if ( !FD_ISSET( fd, &rset ) )   // shouldn't happen, but...
            break;

        ssize_t const bytes_read = ::recv( fd, buf, buf_size, 0 );
        if ( bytes_read == -1 )         // error
            break;
        buf += bytes_read, buf_size -= bytes_read;
        if ( buf[-1] == '\r' || buf[-1] == '\n' || buf_size <= 0 ) {
            buf[-1] = '\0';
            return true;                // got a line: woohoo!
        }
        //
        // We haven't gotten a complete line yet: see how much time has elapsed
        // and, if there's more time left before the time-out expires, try to
        // read some more.
        //
        time_t const elapsed_time = ::time( 0 ) - start_time;
        seconds_remaining = seconds - elapsed_time;
    }

    return false;
}

#endif  /* SEARCH_DAEMON */
/* vim:set et sw=4 ts=4: */