1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
|
/* STREAM TO GUESS CONTENT-TYPE HTGuess.c
** ============================
**
** This version of the stream object just writes its input
** to its output, but prepends Content-Type: field and an
** empty line after it.
**
** HISTORY:
** 8 Jul 94 FM Insulate free() from _free structure element.
**
*/
#define SAMPLE_SIZE 200 /* Number of chars to look at */
#include "HTGuess.h"
#include "HTFormat.h"
#include "HTAlert.h"
#include "HTList.h"
/* Stream Object
** ------------
*/
struct _HTStream {
CONST HTStreamClass * isa;
HTRequest * req;
HTStream * output_stream;
BOOL discard;
int cnt;
int text_cnt;
int lf_cnt;
int cr_cnt;
int pg_cnt;
int ctrl_cnt;
int high_cnt;
char * write_ptr;
char buffer[ SAMPLE_SIZE + 1 ];
};
PRIVATE BOOL is_html ARGS1(char *, buf)
{
char * p = strchr(buf,'<');
if (p && (!strncasecomp(p, "<HTML>", 6) ||
!strncasecomp(p, "<HEAD", 5) ||
!strncasecomp(p, "<TITLE>", 7) ||
!strncasecomp(p, "<BODY>", 6) ||
!strncasecomp(p, "<PLAINTEXT>", 11) ||
(p[0]=='<' && TOUPPER(p[1]) == 'H' && p[3]=='>')))
return YES;
else
return NO;
}
#define PUT_CHAR(c) \
(*me->output_stream->isa->put_character)(me->output_stream,c)
#define PUT_STRING(s) \
(*me->output_stream->isa->put_string)(me->output_stream,s)
#define PUT_BLOCK(b,l) \
(*me->output_stream->isa->put_block)(me->output_stream,b,l)
#define CONTENT_TYPE(t) \
me->req->content_type = HTAtom_for(t)
#define CONTENT_ENCODING(t) \
me->req->content_encoding = HTAtom_for(t)
PRIVATE BOOL header_and_flush ARGS1(HTStream *, me)
{
CTRACE(stderr,"GUESSING.... text=%d newlines=%d ctrl=%d high=%d\n",
me->text_cnt, me->lf_cnt, me->ctrl_cnt, me->high_cnt);
if (me->cnt) {
CTRACE(stderr,
"Percentages. text=%d%% newlines=%d%% ctrl=%d%% high=%d%%\n",
(int)(100*me->text_cnt/me->cnt + 0.5),
(int)(100*me->lf_cnt /me->cnt + 0.5),
(int)(100*me->ctrl_cnt/me->cnt + 0.5),
(int)(100*me->high_cnt/me->cnt + 0.5));
}
if (!me->ctrl_cnt ||
me->text_cnt + me->lf_cnt >= 16 * (me->ctrl_cnt + me->high_cnt)) {
/* some kind of text */
*me->write_ptr = 0; /* terminate buffer */
if (me->high_cnt > 0)
CONTENT_ENCODING("8bit");
else
CONTENT_ENCODING("7bit");
if (is_html(me->buffer))
CONTENT_TYPE("text/html");
else if (!strncmp(me->buffer, "%!", 2))
CONTENT_TYPE("application/postscript");
else if (strstr(me->buffer, "#define") &&
strstr(me->buffer, "_width") &&
strstr(me->buffer, "_bits"))
CONTENT_TYPE("image/x-xbitmap");
else
CONTENT_TYPE("text/plain");
}
else {
if (!strncmp(me->buffer, "GIF", 3))
CONTENT_TYPE("image/gif");
else if (!strncmp(me->buffer, "\377\330\377\340", 4))
CONTENT_TYPE("image/jpeg");
else if (!strcmp(me->buffer, "MM")) /* MM followed by a zero */
CONTENT_TYPE("image/tiff");
else if (!strncmp(me->buffer, ".snd", 4))
CONTENT_TYPE("audio/basic");
else if (!strncmp(me->buffer, "\037\235", 2))
CONTENT_ENCODING("x-compress");
else if (!strncmp(me->buffer, "\037\213", 2))
CONTENT_ENCODING("x-gzip");
else
CONTENT_TYPE("application/octet-stream");
}
if (!me->req->content_type) CONTENT_TYPE("www/unknown");
if (!me->req->content_encoding) CONTENT_ENCODING("binary");
CTRACE(stderr,"Guessed..... %s\n", HTAtom_name(me->req->content_type));
CTRACE(stderr,"Encoding.... %s\n", HTAtom_name(me->req->content_encoding));
me->output_stream = HTStreamStack(me->req->content_type, me->req, NO);
if (!me->output_stream) {
me->discard = YES; /* Turning into a black hole */
return NO;
}
else {
PUT_BLOCK(me->buffer, me->cnt);
return YES;
}
}
PRIVATE void HTGuess_put_character ARGS2(HTStream *, me, char, c)
{
if (me->discard) return;
if (me->output_stream) PUT_CHAR(c);
else {
me->cnt++;
#if 0
if (c < 0) me->high_cnt++;
else
#endif
if (c == LF) me->lf_cnt++;
else if (c == CR) me->cr_cnt++;
else if (c == 12) me->pg_cnt++;
else if (c =='\t')me->text_cnt++;
else if ((unsigned char)c < 32) me->ctrl_cnt++;
else if ((unsigned char)c < 128) me->text_cnt++;
else me->high_cnt++;
*me->write_ptr++ = c;
if (me->cnt >= SAMPLE_SIZE) header_and_flush(me);
}
}
PRIVATE void HTGuess_put_string ARGS2(HTStream *, me, CONST char*, s)
{
if (me->discard) return;
if (me->output_stream) PUT_STRING(s);
else {
while (*s) {
HTGuess_put_character(me,*s);
s++;
}
}
}
PRIVATE void HTGuess_put_block ARGS3(HTStream *, me, CONST char*, b, int, l)
{
if (me->discard) return;
while (!me->output_stream && l > 0) {
HTGuess_put_character(me, *b);
b++;
l--;
}
if (l > 0) PUT_BLOCK(b,l);
}
PRIVATE void HTGuess_free ARGS1(HTStream *, me)
{
if (!me->discard && !me->output_stream)
header_and_flush(me);
if (me->output_stream)
(*me->output_stream->isa->_free)(me->output_stream);
free(me);
}
PRIVATE void HTGuess_abort ARGS2(HTStream *, me, HTError, e)
{
if (me->output_stream)
(*me->output_stream->isa->abort)(me,e);
free(me);
}
/* Guessing stream
** ---------------
*/
PRIVATE CONST HTStreamClass HTGuessClass =
{
"Guess",
HTGuess_free,
HTGuess_abort,
HTGuess_put_character,
HTGuess_put_string,
HTGuess_put_block
};
PUBLIC HTStream * HTGuess_new ARGS1(HTRequest *, req)
{
HTStream * me = (HTStream*)calloc(1,sizeof(HTStream));
if (!me) outofmem(__FILE__, "HTGuess_new");
me->isa = &HTGuessClass;
me->req =req;
me->write_ptr = me->buffer;
return me;
}
|