Finish updating wordpress importer.

This commit is contained in:
Brit Butler 2012-08-20 22:36:48 -04:00
parent 9ab56bddd1
commit ca37d63df8
2 changed files with 31 additions and 23 deletions

3
TODO
View file

@ -13,10 +13,11 @@ Plugins? Injection support for HEAD and BODY. What about predicate-based injecti
How is it served? Hunchentoot, Lighttpd, S3, whomever!
TODO:
; fix plugins: s3
; doc themes and plugins
; populate prev and next links
; implement plugins: atom, markdown, pygment/highlighting
; actually start using this for redlinernotes.com? Move over May-2007 to present.
; fix plugins: s3
;; Incremental compile: only "touched" posts+tags+months. By-20 must be redone, of course.
;; ^^ Long as we're overhauling, maybe come up with a commithook scheme that runs main
;; and main becomes load-config, compile-theme, compile-blog?

View file

@ -1,4 +1,4 @@
(eval-when (:load-toplevel)
(eval-when (:compile-toplevel :load-toplevel)
(ql:quickload '(coleslaw cxml cl-ppcre local-time)))
(defpackage :coleslaw-import
@ -7,29 +7,37 @@
#:load-config
#:*config*
#:repo)
(:import-from :local-time #:+short-month-names+
#:encode-timestamp)
(:import-from :local-time #:+short-month-names+)
(:import-from :cl-ppcre #:regex-replace-all))
(in-package :coleslaw-import)
(defun node-val (name post)
(flet ((value (node)
(let ((child (dom:last-child node)))
(when child (dom:data child)))))
(let ((nodes (dom:get-elements-by-tag-name post name)))
(if (string= "category" name)
(loop for node across nodes collecting (value node))
(when (plusp (length nodes)) (value (elt nodes 0)))))))
(defun get-timestamp (post)
(destructuring-bind (day date month year time tz)
(cl-ppcre:split " " (node-val "pubDate" post))
(format nil "~a-~2,'0d-~2,'0d ~a" year (position month +short-month-names+
:test #'string=) date time)))
(defun import-post (post)
(labels ((nodes (name)
(dom:get-elements-by-tag-name post name))
(value (node)
(let ((child (dom:last-child node)))
(when child (dom:data child))))
(node-val (name)
(let ((nodes (nodes name)))
(if (string= "category" name)
(loop for node across nodes collecting (value node))
(when (plusp (length nodes)) (value (elt nodes 0)))))))
(when (and (string= "publish" (node-val "wp:status")) ; is it public?
(string= "post" (node-val "wp:post_type"))) ; is it a post?
(export-post (node-val "title") (node-val "category") (node-val "pubDate")
(regex-replace-all (string #\Newline)
(node-val "content:encoded") "<br>")
(format nil "~a.post" (slugify (node-val "title")))))))
(when (and (string= "publish" (node-val "wp:status" post)) ; is it public?
(string= "post" (node-val "wp:post_type" post)) ; is it a post?
(string>= (get-timestamp post) "2007-05"))
(let ((content (node-val "content:encoded" post))
(slug (slugify (node-val "title" post))))
(when (string= "" slug)
(error "No valid slug-title for post ~a." (get-timestamp post)))
(export-post (node-val "title" post) (node-val "category" post)
(get-timestamp post) content
(format nil "~a.post" slug)))))
(defun export-post (title tags date content path)
(with-open-file (out (merge-pathnames path (repo *config*))
@ -43,12 +51,11 @@
(format out "date: ~A~%" date)
(format out "format: html~%") ; post format: html, md, rst, etc
(format out ";;;;;~%")
(format out "~A~%" (post-content post))))
(format out "~A~%" (regex-replace-all (string #\Newline) content "<br>"))))
(defun import-posts (filepath)
(let* ((xml (cxml:parse-file filepath (cxml-dom:make-dom-builder)))
(posts (dom:get-elements-by-tag-name xml "item")))
(load-config)
(ensure-directories-exist (repo *config*))
(dolist (post posts)
(import-post post))))
(loop for post across posts do (import-post post))))