1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
|
#lang racket/base
(require (only-in html
read-html-as-xml)
racket/file
racket/function
racket/match
scribble/xref
setup/xref
(only-in xml
xml->xexpr
element
xexpr->string))
(provide scribble-doc/html
binding->path+anchor)
;;; Extract Scribble documentation as modified HTML suitable for
;;; Emacs' shr renderer.
(define (scribble-doc/html stx)
(define xexpr (scribble-doc/xexpr stx))
(and xexpr (xexpr->string xexpr)))
(define (scribble-doc/xexpr stx)
(define xexpr (scribble-doc/xexpr-raw stx))
(and xexpr (massage-xexpr xexpr)))
(define (scribble-doc/xexpr-raw stx)
(define-values (path anchor) (binding->path+anchor stx))
(and path anchor (scribble-get-xexpr path anchor)))
(define (binding->path+anchor stx)
(define xref (load-collections-xref))
(define tag (and (identifier? stx)
(xref-binding->definition-tag xref stx 0)))
(cond [tag (xref-tag->path+anchor xref tag)]
[else (values #f #f)]))
(define (scribble-get-xexpr path anchor)
(match (let loop ([es (main-elements (html-file->xexpr path))])
(match es
[(list) (list)]
[(cons (? (curryr anchored-element anchor) this) more)
;; Accumulate until another intrapara with an anchor
(cons this
(let get ([es more])
(match es
[(list) (list)]
[(cons (? anchored-element) _) (list)] ;stop
[(cons this more) (cons this (get more))])))]
[(cons _ more) (loop more)]))
[(list) #f]
[xs `(div () ,@xs)]))
(module+ test
(require rackunit)
(test-case "procedure"
(check-not-false (scribble-doc/xexpr #'print)))
(test-case "syntax"
(check-not-false (scribble-doc/xexpr #'match)))
(test-case "parameter"
(check-not-false (scribble-doc/xexpr #'current-eval)))
(test-case "indented sub-item"
(check-not-false (scribble-doc/xexpr #'struct-out)))
(test-case "deftogether"
(test-case "1 of 2"
(check-not-false (scribble-doc/xexpr #'lambda)))
(test-case "2 of 2"
(check-not-false (scribble-doc/xexpr #'λ))))
(check-not-false (scribble-doc/xexpr #'xref-binding->definition-tag)))
(define (main-elements x)
(match x
[`(x () "\n"
(html ()
(head ,_ . ,_)
(body ,_
(div ([class "tocset"]) . ,_)
(div ([class "maincolumn"])
(div ([class "main"]) . ,es))
. ,_)))
es]
[_ '()]))
;; anchored-element : xexpr? (or/c #f string?) -> (or/c #f string?)
;; When `name` is #f, return the first anchor having any name.
;; Otherwise, return the first anchor having `name`.
(define (anchored-element x [name #f])
(define (anchor xs)
(for/or ([x (in-list xs)])
(match x
[`(a ((name ,a)) . ,_) (or (not name) (equal? name a))]
[`(,tag ,attrs . ,es) (anchor es)]
[_ #f])))
(match x
[`(div ((class "SIntrapara"))
(blockquote ((class "SVInsetFlow"))
(table ,(list-no-order `(class "boxed RBoxed") _ ...)
. ,es)))
;; That's likely sufficient to say we're in HTML resulting from a
;; Scribble defXXX form. From here on out, there can be some
;; variation, so just look recursively for anchors within `es'.
(anchor es)]
[`(blockquote ((class "leftindent"))
(p ())
(div ((class "SIntrapara"))
(blockquote ((class "SVInsetFlow"))
(table ,(list-no-order `(class "boxed RBoxed") _ ...)
. ,es)))
,_ ...)
(anchor es)]
[_ #f]))
(define (html-file->xexpr pathstr)
(xml->xexpr
(element #f #f 'x '()
(read-html-as-xml (open-input-string (file->string pathstr))))))
;; This is a big ole pile of poo, attempting to simplify and massage
;; the HTML so that Emacs shr renders it in the least-worst way.
;;
;; Note: Emacs shr renderer removes leading spaces and nbsp from <td>
;; elements -- which messes up the alignment of s-expressions
;; including contracts. But actually, the best place to address that
;; is up in Elisp, not here -- replace in the HTML with some
;; temporary character, then replace that character in the shr output.
(define (massage-xexpr x)
(define kind-xexprs '())
(define provide-xexprs '())
(define (walk x)
(match x
;; The "Provided" title/tooltip. Set aside for later.
[`(span ([title ,(and s (pregexp "^Provided from:"))]) . ,xs)
(set! provide-xexprs (list s))
`(span () ,@(map walk xs))]
;; The HTML for the "kind" (e.g. procedure or syntax or
;; parameter) comes before the rest of the bluebox. Simple HTML
;; renderers like shr don't handle this well. Set aside for
;; later.
[`(div ([class "RBackgroundLabel SIEHidden"])
(div ([class "RBackgroundLabelInner"]) (p () . ,xs)))
(set! kind-xexprs xs)
""]
;; Bold RktValDef, which is the name of the thing.
[`(a ([class ,(pregexp "RktValDef|RktStxDef")] . ,_) . ,xs)
`(b () ,@(map walk xs))]
;; Kill links. (Often these won't work anyway -- e.g. due to
;; problems with "open" and file: links on macOS.)
[`(a ,_ . ,xs)
`(span () ,@(map walk xs))]
;; Kill "see also" notes, since they're N/A w/o links.
[`(div ([class "SIntrapara"])
(blockquote ([class "refpara"]) . ,_))
`(span ())]
;; Delete some things that produce unwanted blank lines and/or
;; indents in simple rendering engines like Emacs' shr.
[`(blockquote ([class ,(or "SVInsetFlow" "SubFlow")]) . ,xs)
`(span () ,@(map walk xs))]
[`(p ([class "RForeground"]) . ,xs)
`(div () ,@(map walk xs))]
;; Let's italicize all RktXXX classes except RktPn.
[`(span ([class ,(pregexp "^Rkt(?!Pn)")]) . ,xs)
`(i () ,@(map walk xs))]
;; Misc element: Just walk kids.
[`(,tag ,attrs . ,xs)
`(,tag ,attrs ,@(map walk xs))]
[x x]))
(match (walk x)
[`(div () . ,xs)
`(div ()
(span ([style "color: #C0C0C0"])
(i () ,@kind-xexprs)
'nbsp
,@provide-xexprs)
,@xs)]))
|