1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
|
(in-package :araneida)
;;;; Handle URLs in a CLOS object way, after the pattern of PATHNAMEs
;;;; See also url-class.lisp
;;; TODO:
;;; - clean it up - it's a mess
;;; - print URLs in #u format
;;; - Tidy for non-HTTP schemes someday too
;;; This could usefully all be seriously revisited in the light of
;;; rfc2396, which I haven't read yet but looks at a glance to be
;;; impressively more sensible than 1738
(defvar *default-url-defaults* nil
"Default URL to use as context when PARSE-URL is given a relative urlstring")
(defmethod make-load-form ((url url) &optional environment)
(make-load-form-saving-slots url :environment environment))
;;; basic URL about which we know little
(defgeneric url-p (url)
(:documentation "Predicate determining if url is a url or not."))
(defmethod url-p ((url url)) #+nil (declare (ignore url)) t)
(defmethod url-p ((anything t)) #+nil (declare (ignore anything)) nil)
(defgeneric parse-url (url)
(:documentation "Creates a url instance from a general url string, such as 'http://example.com:8080'"))
(defmethod parse-url ((url url))
(destructuring-bind (scheme rest) (split (url-unparsed url) 2 '(#\:))
(setf (url-scheme url) scheme
(url-unparsed url) rest)))
(defmethod parse-url ((url mailto-url))
(let ((unparsed (second (split (url-unparsed url) 2 '(#\:)))))
(destructuring-bind (user host) (split unparsed 2 '(#\@))
(setf (url-username url) user
(url-host url) host)
url)))
(defgeneric copy-url (url &optional extra-slots)
(:documentation "Makes a copy of a URL object"))
(defmethod copy-url ((url url) &optional extra-slots)
(let* ((class (class-of url))
(new (make-instance class)))
(loop for i in (append extra-slots '(string unparsed scheme))
when (slot-boundp url i)
do (setf (slot-value new i) (slot-value url i)))
new))
(defmethod copy-url ((url mailto-url) &optional extra-slots)
(call-next-method url (append extra-slots '(username host))))
(defmethod copy-url ((url internet-url) &optional extra-slots)
(call-next-method url (append extra-slots '(username password host port))))
(defmethod copy-url ((url httplike-url) &optional extra-slots)
(call-next-method url (append extra-slots '(path query fragment))))
(defgeneric url-equal-p (url1 url2)
(:documentation "Checks if url1 equals url2."))
(defmethod url-equal-p ((url1 url) (url2 url))
(and (eql (class-of url1) (class-of url2))
(let ((class (class-of url1)))
(loop for slot in (class-slots class)
for name = (slot-definition-name slot)
always
(or (eql name 'string) (eql name 'unparsed)
(equal (slot-value url1 (slot-definition-name slot))
(slot-value url2 (slot-definition-name slot))))))))
(defmethod url-equal-p ((u1 t) (u2 t))
(and (eq u1 :wild) (eq u2 :wild)))
;;; internet-url
(defmethod parse-url ((url internet-url))
(call-next-method url) ;parse bits that the parent knows
(let* ((string (subseq (url-unparsed url) 2)) ; skip "//"
(dir-s (position #\/ string))
(at-s (position #\@ string :end dir-s))
(colon1-s (position #\: string :end at-s))
(colon2-s (if at-s (position #\: string :start (1+ at-s)) nil))
(colon-s (if at-s colon2-s colon1-s)))
(setf (url-unparsed url) (if dir-s (subseq string dir-s) nil)
(url-host url) (subseq string (1+ (or at-s -1)) (or colon-s dir-s))
(url-port url)
(if colon-s (parse-integer (subseq string (1+ colon-s) dir-s)))
(url-username url) (if at-s (subseq string 0 (or colon1-s at-s)) nil)
(url-password url) (if (and at-s colon1-s)
(subseq string (1+ colon1-s) at-s) nil))))
(defgeneric url-endpoint (url)
(:documentation "Returns \`hostname:port\' for this URL. The colon and port
number are omitted if the port is the default for this URL class (not true in practice for HTTPS)"))
(defmethod url-endpoint ((url internet-url))
(let ((default-port (url-port (make-instance (class-of url)))))
(if (eql (url-port url) default-port)
(url-host url)
(s. (url-host url) ":" (princ-to-string (url-port url))))))
;;; httplike-url
(defun parse-http-path (url string)
(let* ((frag-s (position #\# string :from-end t))
(query-s (position #\? string :end frag-s :from-end t)))
(setf (url-query url) (if query-s (subseq string (1+ query-s) frag-s) nil)
(url-port url) (or (url-port url)
(url-port (make-instance (class-of url))))
(url-fragment url) (if frag-s (subseq string (1+ frag-s)) nil)
(url-path url)
(or (subseq string 0 (or query-s frag-s)) "/"))))
(defmethod parse-url ((url httplike-url))
(call-next-method url) ;parse bits that the parent knows
(parse-http-path url (url-unparsed url))
(setf (url-unparsed url) nil)) ;we've finished parsing
(defgeneric urlstring (url &key query-parameters)
(:documentation "Returns a URL string from a url object (ie something like 'http://example.com')"))
;;; watch us assemble a URL backwards ...
;;; XXX half of this should be in the internet-url method
(defmethod urlstring ((url httplike-url) &key (query-parameters t))
(let ((out '()))
(awhen (url-fragment url) (push it out) (push "#" out))
(and query-parameters
(awhen (url-query url) (push it out) (push "?" out)))
(aif (url-path url) (push it out) (push "/" out))
(let ((default-port (url-port (make-instance (class-of url)))))
(unless (eql (url-port url) default-port)
(push (princ-to-string (url-port url)) out) (push ":" out)))
(awhen (url-host url) (push it out))
(awhen (url-username url)
(push "@" out)
(awhen (url-password url)
(push it out)
(push ":" out))
(push it out))
(push "://" out)
(push (url-scheme url) out)
(apply #'concatenate 'string out)))
(defmethod urlstring ((url mailto-url) &key (query-parameters t))
(declare (ignore query-parameters))
(format nil "mailto:~A@~A" (url-username url)
(url-host url)))
(defgeneric merge-url (url string)
(:documentation "Merge a string onto a url. FIXME: needs serious clarification"))
(defmethod merge-url ((template httplike-url) string)
(let ((url (copy-url template)))
;; Find the 'leftmost' bit present in string, and replace everything in
;; url to the right of that
(cond ((zerop (length string)) url)
((let ((colon (position #\: string))
(slash (position #\/ string)))
(and colon
(or (not slash)
(< colon slash))))
;; XXX this is probably wrong if STRING is
;; e.g. "foo.bar.com:80/" but I can't be bothered to
;; figure it out now
(parse-urlstring string))
((eql (elt string 0) #\/)
(parse-http-path url string)
url)
((eql (elt string 0) #\?)
(let ((hash (position #\# string)))
(setf (url-query url) (subseq string 1 hash))
(setf (url-fragment url) (if hash (subseq string (1+ hash)) nil)))
url)
((eql (elt string 0) #\#)
(setf (url-fragment url) (subseq string 1))
url)
(t ;; it's a partial path, then
(let* ((p (url-path url))
(c (reverse (split p nil '(#\/)))))
(rplaca c string)
;; we should really check for .. components too
(merge-url template (join "/" (reverse c))))))))
(defgeneric append-url (url string)
(:documentation "Appends a string onto the end of a url - only 'intelligence' is merging leading and trailing /'s"))
(defmethod append-url (url string)
(let ((urlstring (urlstring url)))
(parse-urlstring (concatenate 'string
(if (and (eql (elt urlstring (1- (length urlstring))) #\/)
(eql (elt string 0) #\/))
(subseq urlstring 0 (1- (length urlstring)))
urlstring)
string))))
(define-condition using-untainted-values (warning)
((with-url :initarg :with-url
:reader using-untainted-values-with-url)
(with-call :initarg :with-call
:reader using-untainted-values-with-call))
(:report (lambda (condition stream)
(format stream "Using untainted values with URL ~A, in function ~A"
(using-untainted-values-with-url condition)
(using-untainted-values-with-call condition)))))
(defvar *warn-when-using-untainted-values* nil
"When set to a true value, will signal using-untainted-values (a warning) when functions
that return parameters from the outside are called without tainting.
See CL-TAINT for more about tainting and untainting.")
(defgeneric url-query-alist (url &key prefix case-sensitive)
(:documentation "Return the URL query segment as a ( NAME VALUE ) alist. NAME=VALUE pairs may be separated by ampersand or semicolons. If PREFIX is supplied, select only the parameters that start with that text, and remove it from the keys before returning the list"))
(defmethod url-query-alist ((url t) &key prefix case-sensitive )
(when *warn-when-using-untainted-values*
(warn 'using-untainted-values :with-url url :with-call "url-query-alist"))
(_url-query-alist url :prefix prefix :case-sensitive case-sensitive))
(defgeneric _url-query-alist (url &key prefix case-sensitive)
(:documentation "Call that doesn't warn about using untainted values"))
(defmethod _url-query-alist ((url httplike-url) &key prefix case-sensitive )
(let ((s-eq (if case-sensitive #'string= #'string-equal)))
(remove-if-not
(if prefix
(lambda (x)
(destructuring-bind (k v) x
(if (and (>= (length k) (length prefix))
(funcall s-eq prefix k :end2 (length prefix)))
(list (subseq k (length prefix)) v)
nil)))
#'identity)
(mapcar (lambda (x)
(destructuring-bind (k &optional (v "")) (split x 2 '(#\=) )
(list (urlstring-unescape k) (urlstring-unescape v ))))
(split (url-query url) nil '(#\& #\;))))))
(defun tainted-url-query-alist (url &key prefix case-sensitive)
"Return tainted values (see documentation for CL-TAINT), otherwise exactly the same as url-query-alist"
(loop for (key value) in (_url-query-alist url :prefix prefix :case-sensitive case-sensitive)
collect (list key (taint value))))
(defgeneric url-query-param (url parameter-name &key case-sensitive)
(:documentation "Return the values of the query parameter NAME, or NIL if not present"))
(defmethod url-query-param ((url t) name &key case-sensitive )
(when *warn-when-using-untainted-values*
(warn 'using-untainted-values :with-url url :with-call "url-query-param"))
(_url-query-param url name :case-sensitive case-sensitive))
(defgeneric _url-query-param (url parameter-name &key case-sensitive)
(:documentation "Call that doesn't warn about using untainted values"))
(defmethod _url-query-param ((url httplike-url) name &key case-sensitive )
(mapcar #'cadr
(remove-if-not
(lambda (x) (funcall (if case-sensitive #'string= #'string-equal)
name (car x)))
(url-query-alist url))))
(defun tainted-url-query-param (url parameter-name &key case-sensitive)
"Return the parameter value tainted (see CL-TAINT), otherwise the same as url-query-param"
(mapcar #'taint (url-query-param url parameter-name :case-sensitive case-sensitive)))
;;; How to choose the right URL class: add its scheme here
(defparameter *url-schemes*
'(("HTTP" http-url)
("FTP" ftp-url)
("HTTPS" https-url)
("MAILTO" mailto-url)))
(defun url-class-for-scheme (scheme)
(aif (cadr (assoc scheme *url-schemes* :test #'string-equal))
(find-class it)
nil))
(defun make-url (&rest rest &key scheme &allow-other-keys)
(apply #'make-instance (url-class-for-scheme scheme) rest))
(defun parse-urlstring (string &optional (error-if-unparseable-p t))
(let* ((scheme (string-upcase (car (split string 2 '(#\:)))))
(class (url-class-for-scheme scheme)))
(if (and scheme class)
(let ((url (make-instance (url-class-for-scheme scheme)
:string string :unparsed string)))
(parse-url url)
url)
;; no scheme. maybe it's relative. We can try and merge it
;; onto our default *default-url-defaults*
(if (and (boundp '*default-url-defaults*)
*default-url-defaults*
(url-p *default-url-defaults*))
(merge-url *default-url-defaults* string)
(if error-if-unparseable-p
(error "Relative URL and no ~A" '*default-url-defaults*))))))
;; This can be used to set up #u as a reader macro to read in a URL
;; object. Note that this only works for absolute URLs
(defun url-reader (stream subchar arg)
(declare (ignore subchar arg))
(list 'parse-urlstring (read stream t nil t)))
(set-dispatch-macro-character #\# #\u #'url-reader)
(defun urlstring-unescape (url-string)
(do* ((n 0 (+ n 1))
(out '()))
((not (< n (length url-string))) (coerce (reverse out) 'string ))
(let ((c (elt url-string n)))
(setf out
(cond ((eql c #\%)
(progn (setf n (+ 2 n))
(cons (code-char
(or (parse-integer
url-string :start (- n 1)
:end (+ n 1)
:junk-allowed t
:radix 16) 32))
out)))
((eql c #\+)
(cons #\Space out))
(t (cons c out)))))))
;;; This escapes URIs according to the generic URI syntax described in
;;; rfc2396, or at least it does if the character set in use on this host
;;; is close enough to US-ASCII
;;; Allowed characters are
;;; unreserved = alphanum | mark
;;; mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
;;; All others must be escaped
(defparameter +allowed-url-symbols+
(let* ((lowercase "abcdefghijklmnopqrstuvwxyz")
(uppercase (string-upcase lowercase))
(numerical "0123456789")
(extrasyms "-_.!~*'()"))
(map 'list #'identity (concatenate 'string lowercase uppercase numerical extrasyms)))
"List of allowed characters in URLs")
(defun urlstring-reserved-p (c)
(not (member c +allowed-url-symbols+)))
;; Someone reported some speed issues with urlstring-escape. This version seems to work
;; much faster on the normal case and deals better with large strings.
;; Of course, I have no idea if that helps anything at all.
;; -- Alan Shields [15 November 2005]
(defun urlstring-escape (to-be-encoded)
(declare (type string to-be-encoded))
(if (every (complement #'urlstring-reserved-p) to-be-encoded)
to-be-encoded
(urlstring-escape/guts (coerce to-be-encoded 'cons) nil)))
(defun urlstring-escape/guts (input output)
(declare (type (or null (cons character)) input output)
(optimize (speed 3)))
(if (null input)
(coerce (nreverse output) 'string)
(let ((c (car input)))
(urlstring-escape/guts (cdr input) (if (urlstring-reserved-p c)
(append (nreverse (coerce (format nil "%~2,'0X" (char-code c)) 'cons))
output)
(cons c output))))))
(defun link (url &rest attribs)
"Returns a string from a url with options attribs being passed.
Example:
(let ((url (parse-urlstring \"http://localhost\")))
(link url :a 7 :b \"squid\"))
would be:
\"http://localhost/?a=7&b=squid\"
If the URL already has parameters, they are appended as well
If a parameter value is nil, the parameter is skipped."
(declare (type araneida:url url))
(if (null attribs)
(urlstring url)
(format nil "~A?~A=~A~{~A~}"
(urlstring url :query-parameters nil)
(urlstring-escape (string-downcase (symbol-name (first attribs))))
(urlstring-escape (princ-to-string (second attribs)))
(let ((current-params (when (url-query url)
(loop for (attr val . ignore) in (url-query-alist url)
appending (list attr val)))))
(loop for (attrib val . rest) on (append (cddr attribs) current-params) by #'cddr
when (not (null val))
collect (format nil "&~A=~A"
(urlstring-escape (string-downcase (princ-to-string attrib)))
(urlstring-escape (princ-to-string val))))))))
(defun url-query-string-from-alist (alist)
"Creates a properly-url-escaped query string from an alist as from URL-QUERY-ALIST.
This is a good function to use when you need to, say, take a url, modify a few parameters, then re-assemble the url.
For example:
(defun strip-from-url (url &rest params-to-strip)
(let ((current-query (url-query-alist url))
(new-url (copy-url url)))
(setf (url-query new-url)
(araneida:url-query-string-from-alist
(remove-if (lambda (pair)
(member (car pair) params-to-strip :test #'string-equal))
current-query)))
new-url))
Also fun at parties."
(join "&" (mapcar (lambda (x)
(format nil "~A=~A" (urlstring-escape (first x)) (urlstring-escape (second x))))
alist)))
;;; perhaps this should be a setf method on url-query-param
#+nil (defun update-query-param (name value query-string)
"Return a new query string based on QUERY-STRING but with the additional or updated parameter NAME=VALUE"
(let ((pairs (mapcar (lambda (x) (split x 2 '(#\=) ))
(split query-string nil '(#\& #\;)))))
(aif (assoc name pairs :test #'string=)
(rplacd it (list value))
(setf pairs (acons name (list value) pairs)))
(join "&" (mapcar (lambda (x) (s. (car x) "=" (cadr x))) pairs))))
(defmethod print-object ((u url) stream)
(if *print-escape*
(print-unreadable-object (u stream :type t :identity t)
(format stream "\"~A\"" (urlstring u)))
(princ (urlstring u) stream)))
(defmacro with-url-params ((&rest parameters) url-place &body body)
"binds parameters to the values of the url parameters.
See WITH-TAINTED-URL-PARAMETERS for the tainted variety.
Non-present values will be bound to nil.
NB: matching is case insensitive."
(once-only (url-place)
(with-gensyms (alist)
`(let ((,alist (araneida:url-query-alist ,url-place)))
(let (,@(mapcar (lambda (param)
`(,param (second (assoc ,(symbol-name param) ,alist :test #'string-equal))))
parameters))
,@body)))))
(defmacro with-tainted-url-params ((&rest parameters) url-place &body body)
"binds parameters to the tainted values of the url parameters.
See WITH-URL-PARAMETERS for the untainted variety.
Non-present values will be bound to nil. Note that that is NOT (taint nil), it's just nil.
NB: matching is case insensitive."
(once-only (url-place)
(with-gensyms (alist)
`(let ((,alist (araneida:tainted-url-query-alist ,url-place)))
(let (,@(mapcar (lambda (param)
`(,param (second (assoc ,(symbol-name param) ,alist :test #'string-equal))))
parameters))
,@body)))))
|