;;; md4tj_parse --- Summary ;;; Commentary: ;;; Code: (require 'cl-lib) (defun getline () "Get current line from loaded buffer." (buffer-substring-no-properties (line-beginning-position) (line-end-position))) (defun md4tj-begin-tag (tag &optional attrs) "Return beginning html tag for TAG with optional ATTRS." (concat "<" tag (mapconcat 'identity (cl-map 'listp (lambda (l) (concat " " (nth 0 l) "=" "\"" (nth 1 l) "\"")) attrs) "") ">")) (defun md4tj-end-tag (tag) "Return end html tag for TAG." (concat "")) (defun md4tj-process-header (line) "Process LINE known to be header, return HTML." (let ((level (length (nth 0 (split-string line))))) (if (or (< level 0) (> level 6)) (error (concat "Error parsing: " line "\n")) (concat (md4tj-begin-tag (concat "h" (number-to-string level))) (mapconcat 'identity (cdr (split-string line)) " ") (md4tj-end-tag (concat "h" (number-to-string level))))))) (defun md4tj-process-paragraph (line) "Process LINE that is paragraph, return HTML." (concat (md4tj-begin-tag "p") line (md4tj-end-tag "p"))) (defun md4tj-process-line (line) "Process all inline elements of the LINE, return HTML." ;; Finally strikethrough (replace-regexp-in-string "~~\\(.*\\)~~" "\\1" ;; Then highlight (replace-regexp-in-string "==\\(.*\\)==" "\\1" ;; Then links (replace-regexp-in-string "\\[\\([^\\[]*\\)](\\([^\\[(]*\\))" (concat (md4tj-begin-tag "a" (list '("href" "\\2"))) "\\1" (md4tj-end-tag "a")) ;; Then images (replace-regexp-in-string "!\\[\\([^\\[]*\\)](\\([^\\[(]*\\))" (md4tj-begin-tag "img" (list '("src" "\\2") '("alt" "\\1"))) ;; Then videos (replace-regexp-in-string "!!\\[\\([^\\[]*\\)](\\([^\\[(]*\\))" (concat (md4tj-begin-tag "video" (list '("src" "\\2") '("type" "video/webm") '("controls" "true"))) "\\1" (md4tj-end-tag "video")) ;; Then emphasis (replace-regexp-in-string "\\*\\(.*\\)\\*" "\\1" ;; Then strong (replace-regexp-in-string "\\*\\*\\(.*\\)\\*\\*" "\\1" ;; First code (replace-regexp-in-string "`\\(.*\\)`" "\\1" line))))))))) (defun md4tj-clean-multiline (line) "Clean LINE of markdown syntax for ul." (replace-regexp-in-string "^```" "" (replace-regexp-in-string "^[0-9]+\\. " "" (replace-regexp-in-string "^- " "" line)))) (defun md4tj-clean-code-for-html (line) "Escape characters in LINE that would be misinterpreted by the browser." (string-replace "<" "<" (string-replace ">" ">" line))) (defun md4tj-convert-line-to-html (line state) "Process LINE with STATE and return html." (let ((cleanline (md4tj-clean-multiline line))) ;; If this is a signal to include another file (cond ((string-match "^@@INCLUDE" line) (md4tj-parse-to-string (nth 1 (split-string line)))) ((string-match "^@@LASTUPDATED" line) (concat "Last updated: " (current-time-string))) ;; If this is some other signal, ignore ((string-match "^@@" line) "") ;; Otherwise, process as normal (t (concat ;; Beginning multiline block/ending prev multiline block (mapconcat #'md4tj-state-to-html state "\n") ;; Body (cond ((or (eq (nth 1 state) 'code) (eq (nth 1 state) 'begincode)) (md4tj-clean-code-for-html cleanline)) ((string-match "^#+ " cleanline) (md4tj-process-header (md4tj-process-line cleanline))) ((string= "---" cleanline) "
") ;; horizontal line ((= (length cleanline) 0) "
") ;; blank line (t (md4tj-process-paragraph (md4tj-process-line cleanline)))) ;; End of multiline block (cond ((or (eq (nth 1 state) 'ul) (eq (nth 1 state) 'beginul)) "") ((or (eq (nth 1 state) 'ol) (eq (nth 1 state) 'beginol)) "") ((eq (nth 1 state) 'code) "") (t ""))))))) (defun md4tj-state-to-html (state) "Convert STATE to html." (cond ((eq state 'beginul) "\n") ((eq state 'endol) "\n") ((eq state 'endcode) "\n\n") (t ""))) (defun md4tj-next-state (currline prevstate) "Return the state based on CURRLINE and PREVSTATE." (list ;; End state (cond ((and (string-match "^- " currline) (or (eq prevstate 'ol) (eq prevstate 'beginol))) 'endol) ((and (string-match "^[0-9]+\\. " currline) (or (eq prevstate 'ul) (eq prevstate 'beginul))) 'endul) ((and (not (string-match "^- " currline)) (or (eq prevstate 'beginul) (eq prevstate 'ul))) 'endul) ((and (not (string-match "[0-9]+\\. " currline)) (or (eq prevstate 'beginol) (eq prevstate 'ol))) 'endul) ((and (string-match "```$" currline) (or (eq prevstate 'code) (eq prevstate 'begincode))) 'endcode) (t 'nothing)) ;; Begin state (or next line's prevstate) (cond ((and (string-match "^- " currline) (not (or (eq prevstate 'beginul) (eq prevstate 'ul))) 'beginul)) ((and (string-match "^- " currline) (or (eq prevstate 'beginul) (eq prevstate 'ul)) 'ul)) ((and (string-match "^[0-9]+\\. " currline) (not (or (eq prevstate 'beginol) (eq prevstate 'ol))) 'beginol)) ((and (string-match "^[0-9]+\\. " currline) (or (eq prevstate 'beginol) (eq prevstate 'ol)) 'ol)) ((and (string-match "^```" currline) (not (or (eq prevstate 'begincode) (eq prevstate 'code))) 'begincode)) ((and (not (string-match "```$" currline)) (or (eq prevstate 'begincode) (eq prevstate 'code)) 'code)) (t 'normal)))) (defun md4tj-begin () "Insert beginning code for all html." (concat "\n" (md4tj-begin-tag "html" (list (list "lang" (or (nth 1 (car (cl-remove-if-not (lambda (meta) (string= (car meta) "@@LANG")) (md4tj-find-metas)))) "en")))) "\n")) (defun md4tj-finalize (state) "Finalizes HTML document by inserting missing end tags based on STATE." (concat (cond ((or (eq state 'beginul) (eq state 'ul)) "") ((or (eq state 'beginol) (eq state 'ol)) "") ((or (eq state 'begincode) (eq state 'code)) "") (t "")) "\n" "")) (defun md4tj-find-metas () "Return all lines starting with @@ as list of split strings." (save-excursion (let ((meta-list nil)) (goto-char (point-min)) (while (re-search-forward "^@@" nil t) (setq meta-list (cons (split-string (getline)) meta-list))) meta-list))) (defun md4tj-list-to-tuple-list (list) "Convert LIST to tuple list." (let ((tuples nil)) (dolist (i (number-sequence 0 (- (length list) 1) 2)) (setq tuples (cons (cons (nth i list) (list (nth (+ i 1) list))) tuples))) (reverse tuples))) (defun md4tj-meta-to-html (meta) "Convert META to html." (cond ((string= (nth 0 meta) "@@TITLE") (concat "" (mapconcat 'identity (cdr meta) " ") "\n")) ((string= (nth 0 meta) "@@META") (concat (md4tj-begin-tag "meta" (md4tj-list-to-tuple-list (cdr meta))))) ((string= (nth 0 meta) "@@CSS") (concat (md4tj-begin-tag "link" (list (list "rel" "stylesheet") (list "href" (nth 1 meta)))))))) (defun md4tj-head () "Return text for head element on current buffer." (concat (md4tj-begin-tag "head") "\n" (mapconcat 'md4tj-meta-to-html (md4tj-find-metas) "\n") "\n" (md4tj-end-tag "head") "\n")) (defun md4tj-parse (mdfile outfile) "Entry point to parse MDFILE and output to OUTFILE." (let ((inbuf (generate-new-buffer " in")) (outbuf (generate-new-buffer " out")) (fullstate (list 'nothing 'normal)) (line nil)) (set-buffer inbuf) (insert-file-contents mdfile) (goto-char (point-min)) (set-buffer outbuf) (insert (md4tj-begin)) (insert (with-current-buffer inbuf (md4tj-head))) (insert (md4tj-begin-tag "body")) (while (with-current-buffer inbuf (< (point) (point-max))) (setq line (with-current-buffer inbuf (getline))) (setq fullstate (md4tj-next-state line (nth 1 fullstate))) ;; Insert next line(s) into output file (insert (concat (md4tj-convert-line-to-html line fullstate) "\n")) ;; Advance input file by a line (with-current-buffer inbuf (forward-line))) (insert (md4tj-finalize (nth 1 fullstate))) ;; Write outbuf to outfile (write-region nil nil outfile nil))) (defun md4tj-parse-to-string (mdfile) "Parse MDFILE, return conversion to HTML as string." (with-temp-buffer (let ((acc nil) (line nil) (fullstate (list 'nothing 'normal))) (insert-file-contents mdfile) (goto-char (point-min)) (while (< (point) (point-max)) (setq line (getline)) (setq fullstate (md4tj-next-state line (nth 1 fullstate))) (setq acc (concat acc (md4tj-convert-line-to-html line fullstate) "\n")) (forward-line)) acc))) (provide 'md4tj_parse) ;;; md4tj_parse.el ends here