md4tj/md4tj_parse.el

222 lines
7.9 KiB
EmacsLisp

;;; md4tj_parse --- Summary
;;; Commentary:
;;; Code:
(require 'cl-lib)
(defun getline ()
"Get current line from loaded buffer."
(buffer-substring-no-properties (line-beginning-position) (line-end-position)))
(defun md4tj-begin-tag (tag &optional attrs)
"Return beginning html tag for TAG with optional ATTRS."
(concat "<" tag
(mapconcat 'identity (cl-map 'listp (lambda (l) (concat " " (nth 0 l) "=" "\"" (nth 1 l) "\"")) attrs) "")
">"))
(defun md4tj-end-tag (tag)
"Return end html tag for TAG."
(concat "</" tag ">"))
(defun md4tj-process-header (line)
"Process LINE known to be header, return HTML."
(let ((level (length (nth 0 (split-string line)))))
(if (or (< level 0) (> level 6))
(error (concat "Error parsing: " line "\n"))
(concat (md4tj-begin-tag (concat "h" (number-to-string level)))
(mapconcat 'identity (cdr (split-string line)) " ")
(md4tj-end-tag (concat "h" (number-to-string level)))))))
(defun md4tj-process-paragraph (line)
"Process LINE that is paragraph, return HTML."
(concat (md4tj-begin-tag "p") line (md4tj-end-tag "p")))
(defun md4tj-process-line (line)
"Process all inline elements of the LINE, return HTML."
;; Finally strikethrough
(replace-regexp-in-string
"~~\\(.*\\)~~"
"<s>\\1</s>"
;; Then highlight
(replace-regexp-in-string
"==\\(.*\\)=="
"<mark>\\1</mark>"
;; Then links
(replace-regexp-in-string
"\\[\\([^\\[]*\\)](\\([^\\[]*\\))"
(concat (md4tj-begin-tag "a" (list '("href" "\\2"))) "\\1" (md4tj-end-tag "a"))
;; Then images
(replace-regexp-in-string
"!\\[\\([^\\[]*\\)](\\([^\\[]*\\))"
(md4tj-begin-tag "img" (list '("src" "\\2") '("alt" "\\1")))
;; Then videos
(replace-regexp-in-string
"!!\\[\\([^\\[]*\\)](\\([^\\[]*\\))"
(concat (md4tj-begin-tag "video" (list '("src" "\\2") '("type" "video/webm") '("controls" "true"))) "\\1" (md4tj-end-tag "video"))
;; Then emphasis
(replace-regexp-in-string
"\\*\\(.*\\)\\*"
"<em>\\1</em>"
;; Then strong
(replace-regexp-in-string
"\\*\\*\\(.*\\)\\*\\*"
"<strong>\\1</strong>"
;; First code
(replace-regexp-in-string
"`\\(.*\\)`"
"<code>\\1</code>" line)))))))))
(defun md4tj-clean-multiline (line)
"Clean LINE of markdown syntax for ul."
(replace-regexp-in-string
"^```" ""
(replace-regexp-in-string
"^[0-9]+\\. " ""
(replace-regexp-in-string "^- " "" line))))
(defun md4tj-clean-code-for-html (line)
"Escape characters in LINE that would be misinterpreted by the browser."
(string-replace
"<" "&lt;"
(string-replace ">" "&gt;" line)))
(defun md4tj-convert-line-to-html (line state)
"Process LINE with STATE and return html."
(let ((cleanline (md4tj-clean-multiline line)))
;; If this is a signal to include another file
(cond ((string-match "^@@INCLUDE" line) (md4tj-parse-to-string (nth 1 (split-string line))))
((string-match "^@@LASTUPDATED" line) (concat "Last updated: " (current-time-string)))
;; If this is some other signal, ignore
((string-match "^@@" line) "")
;; Otherwise, process as normal
(t
(concat
;; Beginning of multiline block
(cond ((eq state 'beginul) "<ul>\n<li>")
((eq state 'beginol) "<ol>\n<li>")
((eq state 'begincode) "<pre>\n<code>")
((eq state 'ul) "<li>")
((eq state 'ol) "<li>")
((eq state 'code) "")
((eq state 'endul) "</ul>\n")
((eq state 'endol) "</ol>\n")
((eq state 'endcode) "</code>\n</pre>\n")
(t ""))
;; Body
(cond ((or (eq state 'code) (eq state 'begincode)) (md4tj-clean-code-for-html cleanline))
((string-match "^#+ " cleanline) (md4tj-process-header (md4tj-process-line cleanline)))
((string= "---" cleanline) "<hr>") ;; horizontal line
((= (length cleanline) 0) "<br/>") ;; blank line
(t (md4tj-process-paragraph (md4tj-process-line cleanline))))
;; End of multiline block
(cond ((or (eq state 'ul) (eq state 'beginul)) "</li>")
((or (eq state 'ol) (eq state 'beginol)) "</li>")
((eq state 'code) "")
(t "")))))))
(defun md4tj-next-state (currline prevstate)
"Return the state based on CURRLINE and PREVSTATE."
(cond ((and (string-match "^- " currline) (not (or (eq prevstate 'beginul) (eq prevstate 'ul))) 'beginul))
((and (string-match "^- " currline) (or (eq prevstate 'beginul) (eq prevstate 'ul)) 'ul))
((and (not (string-match "^- " currline)) (or (eq prevstate 'ul) (eq prevstate 'beginul))) 'endul)
((and (string-match "^[0-9]+\\. " currline) (not (or (eq prevstate 'beginol) (eq prevstate 'ol))) 'beginol))
((and (string-match "^[0-9]+\\. " currline) (or (eq prevstate 'beginol) (eq prevstate 'ol)) 'ol))
((and (not (string-match "^[0-9]+\\. " currline)) (or (eq prevstate 'ol) (eq prevstate 'beginol))) 'endol)
((and (string-match "^```" currline) (not (or (eq prevstate 'begincode) (eq prevstate 'code))) 'begincode))
((and (not (string-match "```$" currline)) (or (eq prevstate 'begincode) (eq prevstate 'code)) 'code))
((and (string-match "```$" currline) (or (eq prevstate 'code) (eq prevstate 'begincode))) 'endcode)
(t 'normal)))
(defun md4tj-begin ()
"Insert beginning code for all html."
(concat "<!DOCTYPE html>\n"
(md4tj-begin-tag "html" (list (list "lang" "en-us"))) "\n"))
(defun md4tj-finalize (state)
"Finalizes HTML document by inserting missing end tags based on STATE."
(concat
(cond ((or (eq state 'beginul) (eq state 'ul)) "</ul>")
((or (eq state 'beginol) (eq state 'ol)) "</ol>")
((or (eq state 'begincode) (eq state 'code)) "</code>")
(t ""))
"</body>\n"
"</html>"))
(defun md4tj-find-metas ()
"Return all lines starting with @@ as list of split strings."
(save-excursion
(let ((meta-list nil))
(goto-char (point-min))
(while (re-search-forward "^@@" nil t)
(setq meta-list (cons (split-string (getline)) meta-list)))
meta-list)))
(defun md4tj-list-to-tuple-list (list)
"Convert LIST to tuple list."
(let ((tuples nil))
(dolist (i (number-sequence 0 (- (length list) 1) 2))
(setq tuples (cons (cons (nth i list) (list (nth (+ i 1) list))) tuples)))
(reverse tuples)))
(defun md4tj-meta-to-html (meta)
"Convert META to html."
(cond ((string= (nth 0 meta) "@@TITLE")
(concat "<title>" (mapconcat 'identity (cdr meta) " ") "</title>\n"))
((string= (nth 0 meta) "@@META")
(concat (md4tj-begin-tag "meta" (md4tj-list-to-tuple-list (cdr meta)))))
((string= (nth 0 meta) "@@CSS")
(concat (md4tj-begin-tag "link" (list (list "rel" "stylesheet") (list "href" (nth 1 meta))))))))
(defun md4tj-head ()
"Return text for head element on current buffer."
(concat
(md4tj-begin-tag "head") "\n"
(mapconcat 'md4tj-meta-to-html (md4tj-find-metas) "\n")
"\n"
(md4tj-end-tag "head") "\n"))
(defun md4tj-parse (mdfile outfile)
"Entry point to parse MDFILE and output to OUTFILE."
(let ((inbuf (generate-new-buffer " in"))
(outbuf (generate-new-buffer " out"))
(state 'normal)
(line nil))
(set-buffer inbuf)
(insert-file-contents mdfile)
(goto-char (point-min))
(set-buffer outbuf)
(insert (md4tj-begin))
(insert (with-current-buffer inbuf (md4tj-head)))
(insert (md4tj-begin-tag "body"))
(while (with-current-buffer inbuf (< (point) (point-max)))
(setq line (with-current-buffer inbuf (getline)))
(setq state (md4tj-next-state line state))
;; Insert next line(s) into output file
(insert (concat (md4tj-convert-line-to-html line state) "\n"))
;; Advance input file by a line
(with-current-buffer inbuf (forward-line)))
(insert (md4tj-finalize state))
;; Write outbuf to outfile
(write-region nil nil outfile nil)))
(defun md4tj-parse-to-string (mdfile)
"Parse MDFILE, return conversion to HTML as string."
(with-temp-buffer
(let ((acc nil)
(line nil)
(state 'normal))
(insert-file-contents mdfile)
(goto-char (point-min))
(while (< (point) (point-max))
(setq line (getline))
(setq state (md4tj-next-state line state))
(setq acc (concat acc (md4tj-convert-line-to-html line state) "\n"))
(forward-line))
acc)))
(provide 'md4tj_parse)
;;; md4tj_parse.el ends here