diff --git a/coleslaw.asd b/coleslaw.asd index fe13837..24d224d 100644 --- a/coleslaw.asd +++ b/coleslaw.asd @@ -13,7 +13,8 @@ :inferior-shell :cl-fad :cl-ppcre - :closer-mop) + :closer-mop + :cl-unicode) :serial t :components ((:file "packages") (:file "util") diff --git a/src/content.lisp b/src/content.lisp index a19be2d..87698c3 100644 --- a/src/content.lisp +++ b/src/content.lisp @@ -22,16 +22,24 @@ ;; Slugs -(defun slug-char-p (char) +(defun slug-char-p (char &key (allowed-chars (list #\- #\~))) "Determine if CHAR is a valid slug (i.e. URL) character." - (or (char<= #\0 char #\9) - (char<= #\a char #\z) - (char<= #\A char #\Z) - (member char '(#\_ #\-)))) + ; use the first char of the general unicode category as kind of + ; hyper general category + (let ((cat (aref (cl-unicode:general-category char) 0)) + (allowed-cats (list #\L #\N))) ; allowed Unicode categories in URLs + (cond + ((member cat allowed-cats) t) + ((member char allowed-chars) t) + (t nil)))) + +(defun unicode-space-p (char) + "Determine if CHAR is a kind of whitespace by unicode category means" + (char= (aref (cl-unicode:general-category char) 0) #\Z)) (defun slugify (string) "Return a version of STRING suitable for use as a URL." - (remove-if-not #'slug-char-p (substitute #\- #\Space string))) + (remove-if-not #'slug-char-p (substitute-if #\- #'unicode-space-p string))) ;; Content Types