1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
|
/* HTParse: URI parsing in the WWW Library
HTPARSE
This module is a part of the CERN Common WWW Library. It contains code to parse URIs
and various related things such as:
Parse a URI relative to another URI
Get the URI relative to another URI
Remove redundant data from the URI (HTSimplify and HTCanon)
Expand a local host name into a full domain name (HTExpand)
Search a URI for illigal characters in order to prevent security holes
Escape and Unescape a URI for reserved characters in URLs
Implemented by HTParse.c.
*/
#ifndef HTPARSE_H
#define HTPARSE_H
#include "HTUtils.h"
/*
HTParse: Parse a URI relative to another URI
This returns those parts of a name which are given (and requested) substituting bits
from the related name where necessary.
CONTROL FLAGS FOR HTPARSE
The following are flag bits which may be ORed together to form a number to give the
'wanted' argument to HTParse.
*/
#define PARSE_ACCESS 16
#define PARSE_HOST 8
#define PARSE_PATH 4
#define PARSE_ANCHOR 2
#define PARSE_PUNCTUATION 1
#define PARSE_ALL 31
/*
ON ENTRY
aName A filename given
relatedName A name relative to which aName is to be parsed
wanted A mask for the bits which are wanted.
ON EXIT,
returns A pointer to a malloc'd string which MUST BE FREED
*/
PUBLIC char * HTParse PARAMS(( const char * aName,
const char * relatedName,
int wanted));
/*
HTStrip: Strip white space off a string
ON EXIT
Return value points to first non-white character, or to 0 if none.
All trailing white space is OVERWRITTEN with zero.
*/
#ifdef __STDC__
extern char * HTStrip(char * s);
#else
extern char * HTStrip();
#endif
/*
HTSimplify: Simplify a UTL
A URI is allowed to contain the seqeunce xxx/../ which may be replaced by "" , and the
seqeunce "/./" which may be replaced by "/". Simplification helps us recognize
duplicate URIs. Thus, the following transformations are done:
/etc/junk/../fred
becomes
/etc/fred
/etc/junk/./fred
becomes
/etc/junk/fred
but we should NOT change
http://fred.xxx.edu/../.. or
../../albert.html
In the same manner, the following prefixed are preserved:
./
//
In order to avoid empty URIs the following URIs become:
/fred/..
becomes /fred/..
/fred/././..
becomes /fred/..
/fred/.././junk/.././
becomes /fred/..
If more than one set of `://' is found (several proxies in cascade) then only the part
after the last `://' is simplified.
*/
PUBLIC char *HTSimplify PARAMS((char *filename));
/*
HTRelative: Make Relative (Partial) URI
This function creates and returns a string which gives an expression of one address as
related to another. Where there is no relation, an absolute address is retured.
ON ENTRY,
Both names must be absolute, fully qualified names of nodes (no anchor bits)
ON EXIT,
The return result points to a newly allocated name which, if parsed by HTParse relative
to relatedName, will yield aName. The caller is responsible for freeing the resulting
name later.
*/
#ifdef __STDC__
extern char * HTRelative(const char * aName, const char *relatedName);
#else
extern char * HTRelative();
#endif
/*
HTExpand: Expand a Local Host Name Into a Full Domain Name
This function expands the host name of the URI from a local name to a full domain name,
converts the host name to lower case and takes away `:80', `:70' and `:21'. The
advantage by doing this is that we only have one entry in the host case and one entry
in the document cache.
*/
PUBLIC char *HTCanon PARAMS (( char ** filename,
char * host));
/*
HTEscape: Encode unacceptable characters in string
This funtion takes a string containing any sequence of ASCII characters, and returns a
malloced string containing the same infromation but with all "unacceptable" characters
represented in the form %xy where X and Y are two hex digits.
*/
PUBLIC char * HTEscape PARAMS((CONST char * str, unsigned char mask));
/*
The following are valid mask values. The terms are the BNF names in the URI document.
*/
#define URL_XALPHAS (unsigned char) 1
#define URL_XPALPHAS (unsigned char) 2
#define URL_PATH (unsigned char) 4
/*
HTUnEscape: Decode %xx escaped characters
This function takes a pointer to a string in which character smay have been encoded in
%xy form, where xy is the acsii hex code for character 16x+y. The string is converted
in place, as it will never grow.
*/
extern char * HTUnEscape PARAMS(( char * str));
/*
Prevent Security Holes
HTCleanTelnetString() makes sure that the given string doesn't contain characters that
could cause security holes, such as newlines in ftp, gopher, news or telnet URLs; more
specifically: allows everything between hexadesimal ASCII 20-7E, and also A0-FE,
inclusive.
str the string that is *modified* if necessary. The string will be
truncated at the first illegal character that is encountered.
returns YES, if the string was modified. NO, otherwise.
*/
PUBLIC BOOL HTCleanTelnetString PARAMS((char * str));
/*
*/
#endif /* HTPARSE_H */
/*
End of HTParse Module */
|