#lang racket/base (require (only-in html read-html-as-xml) racket/file racket/function racket/match scribble/xref setup/xref (only-in xml xml->xexpr element xexpr->string)) (provide scribble-doc/html binding->path+anchor) ;;; Extract Scribble documentation as modified HTML suitable for ;;; Emacs' shr renderer. (define (scribble-doc/html stx) (define xexpr (scribble-doc/xexpr stx)) (and xexpr (xexpr->string xexpr))) (define (scribble-doc/xexpr stx) (define xexpr (scribble-doc/xexpr-raw stx)) (and xexpr (massage-xexpr xexpr))) (define (scribble-doc/xexpr-raw stx) (define-values (path anchor) (binding->path+anchor stx)) (and path anchor (scribble-get-xexpr path anchor))) (define (binding->path+anchor stx) (define xref (load-collections-xref)) (define tag (and (identifier? stx) (xref-binding->definition-tag xref stx 0))) (cond [tag (xref-tag->path+anchor xref tag)] [else (values #f #f)])) (define (scribble-get-xexpr path anchor) (match (let loop ([es (main-elements (html-file->xexpr path))]) (match es [(list) (list)] [(cons (? (curryr anchored-element anchor) this) more) ;; Accumulate until another intrapara with an anchor (cons this (let get ([es more]) (match es [(list) (list)] [(cons (? anchored-element) _) (list)] ;stop [(cons this more) (cons this (get more))])))] [(cons _ more) (loop more)])) [(list) #f] [xs `(div () ,@xs)])) (module+ test (require rackunit) (test-case "procedure" (check-not-false (scribble-doc/xexpr #'print))) (test-case "syntax" (check-not-false (scribble-doc/xexpr #'match))) (test-case "parameter" (check-not-false (scribble-doc/xexpr #'current-eval))) (test-case "indented sub-item" (check-not-false (scribble-doc/xexpr #'struct-out))) (test-case "deftogether" (test-case "1 of 2" (check-not-false (scribble-doc/xexpr #'lambda))) (test-case "2 of 2" (check-not-false (scribble-doc/xexpr #'λ)))) (check-not-false (scribble-doc/xexpr #'xref-binding->definition-tag))) (define (main-elements x) (match x [`(x () "\n" (html () (head ,_ . ,_) (body ,_ (div ([class "tocset"]) . ,_) (div ([class "maincolumn"]) (div ([class "main"]) . ,es)) . ,_))) es] [_ '()])) ;; anchored-element : xexpr? (or/c #f string?) -> (or/c #f string?) ;; When `name` is #f, return the first anchor having any name. ;; Otherwise, return the first anchor having `name`. (define (anchored-element x [name #f]) (define (anchor xs) (for/or ([x (in-list xs)]) (match x [`(a ((name ,a)) . ,_) (or (not name) (equal? name a))] [`(,tag ,attrs . ,es) (anchor es)] [_ #f]))) (match x [`(div ((class "SIntrapara")) (blockquote ((class "SVInsetFlow")) (table ,(list-no-order `(class "boxed RBoxed") _ ...) . ,es))) ;; That's likely sufficient to say we're in HTML resulting from a ;; Scribble defXXX form. From here on out, there can be some ;; variation, so just look recursively for anchors within `es'. (anchor es)] [`(blockquote ((class "leftindent")) (p ()) (div ((class "SIntrapara")) (blockquote ((class "SVInsetFlow")) (table ,(list-no-order `(class "boxed RBoxed") _ ...) . ,es))) ,_ ...) (anchor es)] [_ #f])) (define (html-file->xexpr pathstr) (xml->xexpr (element #f #f 'x '() (read-html-as-xml (open-input-string (file->string pathstr)))))) ;; This is a big ole pile of poo, attempting to simplify and massage ;; the HTML so that Emacs shr renders it in the least-worst way. ;; ;; Note: Emacs shr renderer removes leading spaces and nbsp from