1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
|
#include "tmbstr.h"
#include "httpio.h"
int
makeConnection ( HTTPInputSource *pHttp )
{
struct sockaddr_in sock;
struct hostent *pHost;
/* Get internet address of the host. */
if (!(pHost = gethostbyname ( pHttp->pHostName )))
{
return -1;
}
/* Copy the address of the host to socket description. */
memcpy (&sock.sin_addr, pHost->h_addr, pHost->h_length);
/* Set port and protocol */
sock.sin_family = AF_INET;
sock.sin_port = htons( pHttp->nPort );
/* Make an internet socket, stream type. */
if ((pHttp->s = socket (AF_INET, SOCK_STREAM, 0)) == -1)
return -1;
/* Connect the socket to the remote host. */
if (connect (pHttp->s, (struct sockaddr *) &sock, sizeof( sock )))
{
if (errno == ECONNREFUSED)
return ECONNREFUSED;
else
return -1;
}
return 0;
}
int parseURL( HTTPInputSource *pHttp, tmbstr url )
{
int i, j = 0;
ctmbstr pStr;
pStr = tmbsubstr( url, "://" );
/* If protocol is there, but not http, bail out, else assume http. */
if (NULL != pStr)
{
if (tmbstrncasecmp( url, "http://", 7 ))
return -1;
}
if (NULL != pStr)
j = pStr - url + 3;
for (i = j; url[i] && url[i] != ':' && url[i] != '/'; i++) {}
if (i == j)
return -1;
/* Get the hostname. */
pHttp->pHostName = tmbstrndup (&url[j], i - j );
if (url[i] == ':')
{
/* We have a colon delimiting the hostname. It should mean that
a port number is following it */
pHttp->nPort = 0;
if (isdigit( url[++i] )) /* A port number */
{
for (; url[i] && url[i] != '/'; i++)
{
if (isdigit( url[i] ))
pHttp->nPort = 10 * pHttp->nPort + (url[i] - '0');
else
return -1;
}
if (!pHttp->nPort)
return -1;
}
else /* or just a misformed port number */
return -1;
}
else
/* Assume default port. */
pHttp->nPort = 80;
/* skip past the delimiting slash (we'll add it later ) */
while (url[i] && url[i] == '/')
i++;
pHttp->pResource = tmbstrdup (url + i );
return 0;
}
int fillBuffer( HTTPInputSource *in )
{
if (0 < in->s)
{
in->nBufSize = recv( in->s, in->buffer, sizeof( in->buffer ), 0);
in->nextBytePos = 0;
if (in->nBufSize < sizeof( in->buffer ))
in->buffer[in->nBufSize] = '\0';
}
else
in->nBufSize = 0;
return in->nBufSize;
}
int openURL( HTTPInputSource *in, tmbstr pUrl )
{
int rc = -1;
#ifdef WIN32
WSADATA wsaData;
rc = WSAStartup( 514, &wsaData );
#endif
in->tis.getByte = (TidyGetByteFunc) HTTPGetByte;
in->tis.ungetByte = (TidyUngetByteFunc) HTTPUngetByte;
in->tis.eof = (TidyEOFFunc) HTTPIsEOF;
in->tis.sourceData = (uint) in;
in->nextBytePos = in->nextUnGotBytePos = in->nBufSize = 0;
parseURL( in, pUrl );
if (0 == (rc = makeConnection( in )))
{
char ch, lastCh = '\0';
int blanks = 0;
char *getCmd = MemAlloc( 48 + strlen( in->pResource ));
sprintf( getCmd, "GET /%s HTTP/1.0\r\nAccept: text/html\r\n\r\n", in->pResource );
send( in->s, getCmd, strlen( getCmd ), 0 );
MemFree( getCmd );
/* skip past the header information */
while ( in->nextBytePos >= in->nBufSize
&& 0 < (rc = fillBuffer( in )))
{
if (1 < blanks)
break;
for (; in->nextBytePos < sizeof( in->buffer )
&& 0 != in->buffer[ in->nextBytePos ];
in->nextBytePos++ )
{
ch = in->buffer[ in->nextBytePos ];
if (ch == '\r' || ch == '\n')
{
if (ch == lastCh)
{
/* Two carriage returns or two newlines in a row,
that's good enough */
blanks++;
}
if (lastCh == '\r' || lastCh == '\n')
{
blanks++;
}
}
else
blanks = 0;
lastCh = ch;
if (1 < blanks)
{
/* end of header, scan to first non-white and return */
while ('\0' != ch && isspace( ch ))
ch = in->buffer[ ++in->nextBytePos ];
break;
}
}
}
}
return rc;
}
void closeURL( HTTPInputSource *source )
{
if (0 < source->s)
closesocket( source->s );
source->s = -1;
source->tis.sourceData = 0;
#ifdef WIN32
WSACleanup();
#endif
}
int HTTPGetByte( HTTPInputSource *source )
{
if (source->nextUnGotBytePos)
return source->unGetBuffer[ --source->nextUnGotBytePos ];
if (0 != source->nBufSize && source->nextBytePos >= source->nBufSize)
{
fillBuffer( source );
}
if (0 == source->nBufSize)
return EndOfStream;
return source->buffer[ source->nextBytePos++ ];
}
void HTTPUngetByte( HTTPInputSource *source, uint byteValue )
{
if (source->nextUnGotBytePos < 16 ) /* Only you can prevent buffer overflows */
source->unGetBuffer[ source->nextUnGotBytePos++ ] = (char) byteValue;
}
Bool HTTPIsEOF( HTTPInputSource *source )
{
if (source->nextUnGotBytePos)
/* pending ungot bytes, not done */
return no;
if ( 0 != source->nBufSize
&& source->nextBytePos >= source->nBufSize)
/* We've consumed the existing buffer, get another */
fillBuffer( source );
if (source->nextBytePos < source->nBufSize)
/* we have stuff in the buffer, must not be done. */
return no;
/* Nothing in the buffer, and the last receive failed, must be done. */
return yes;
}
|