#lang racket/base (require (only-in html read-html-as-xml) racket/contract racket/file racket/format racket/function racket/match racket/path racket/promise racket/string (only-in scribble/core tag?) scribble/xref scribble/blueboxes setup/xref (only-in xml xml->xexpr element xexpr->string)) (provide binding->path+anchor path+anchor->html identifier->bluebox) (module+ test (require rackunit)) (define xref (delay/thread (load-collections-xref))) (define/contract (binding->path+anchor stx) (-> identifier? (or/c #f (cons/c path-string? (or/c #f string?)))) (let* ([xref (force xref)] [tag (xref-binding->definition-tag xref stx 0)] [p+a (and tag (tag->path+anchor xref tag))]) p+a)) (define (tag->path+anchor xref tag) (define-values (path anchor) (xref-tag->path+anchor xref tag)) (and path anchor (cons path anchor))) ;;; Scribble docs as HTML suitable for Emacs' shr renderer (define/contract (path+anchor->html path+anchor) (-> (or/c #f (cons/c path-string? (or/c #f string?))) (or/c #f string?)) (match path+anchor [(cons path anchor) (let* ([xexpr (get-raw-xexpr path anchor)] [xexpr (and xexpr (massage-xexpr path xexpr))] [html (and xexpr (xexpr->string xexpr))]) html)] [_ #f])) (define (get-raw-xexpr path anchor) (define (heading-element? x) (match x [(cons (or 'h1 'h2 'h3 'h4 'h5 'h6) _) #t] [_ #f])) (match (let loop ([es (main-elements (html-file->xexpr path))]) (match es [(list) (list)] [(cons (? (curryr anchored-element anchor) this) more) ;; Accumulate until another intrapara with an anchor, or ;; until a heading element indicating a new subsection. (cons this (let get ([es more]) (match es [(list) (list)] [(cons (? heading-element?) _) (list)] ;stop [(cons (? anchored-element) _) (list)] ;stop [(cons this more) (cons this (get more))])))] [(cons _ more) (loop more)])) [(list) #f] [xs `(div () ,@xs)])) (module+ test (test-case "procedure" (check-not-false (path+anchor->html (binding->path+anchor #'print)))) (test-case "syntax" (check-not-false (path+anchor->html (binding->path+anchor #'match)))) (test-case "parameter" (check-not-false (path+anchor->html (binding->path+anchor #'current-eval)))) (test-case "indented sub-item" (check-not-false (path+anchor->html (binding->path+anchor #'struct-out)))) (test-case "deftogether" (test-case "1 of 2" (check-not-false (path+anchor->html (binding->path+anchor #'lambda)))) (test-case "2 of 2" (check-not-false (path+anchor->html (binding->path+anchor #'λ))))) (check-not-false (path+anchor->html (binding->path+anchor #'xref-binding->definition-tag)))) (define (main-elements x) (match x [`(x () "\n" (html () (head ,_ . ,_) (body ,_ (div ([class "tocset"]) . ,_) (div ([class "maincolumn"]) (div ([class "main"]) . ,es)) . ,_))) es] [_ '()])) ;; anchored-element : xexpr? (or/c #f string?) -> (or/c #f string?) ;; When `name` is #f, return the first anchor having any name. ;; Otherwise, return the first anchor having `name`. (define (anchored-element x [name #f]) (define (anchor xs) (for/or ([x (in-list xs)]) (match x [`(a ((name ,a)) . ,_) (or (not name) (equal? name a))] [`(,tag ,attrs . ,es) (anchor es)] [_ #f]))) (match x [`(div ((class "SIntrapara")) (blockquote ((class "SVInsetFlow")) (table ,(list-no-order `(class "boxed RBoxed") _ ...) . ,es))) ;; That's likely sufficient to say we're in HTML resulting from a ;; Scribble defXXX form. From here on out, there can be some ;; variation, so just look recursively for anchors within `es'. (anchor es)] [`(blockquote ((class "leftindent")) (p ()) (div ((class "SIntrapara")) (blockquote ((class "SVInsetFlow")) (table ,(list-no-order `(class "boxed RBoxed") _ ...) . ,es))) ,_ ...) (anchor es)] [_ #f])) (define (html-file->xexpr pathstr) (xml->xexpr (element #f #f 'x '() (read-html-as-xml (open-input-string (file->string pathstr)))))) ;; This is a big ole pile of poo, attempting to simplify and massage ;; the HTML so that Emacs shr renders it in the least-worst way. ;; ;; Note: Emacs shr renderer removes leading spaces and nbsp from