;;; md4tj_parse --- Summary ;;; Commentary: ;;; Code: (require 'cl-lib) (defun getline () "Get current line from loaded buffer." (buffer-substring-no-properties (line-beginning-position) (line-end-position))) (defun md4tj-begin-tag (tag &optional attrs) "Return beginning html tag for TAG with optional ATTRS." (concat "<" tag (mapconcat 'identity (cl-map 'listp (lambda (l) (concat " " (nth 0 l) "=" "\"" (nth 1 l) "\"")) attrs) "") ">")) (defun md4tj-end-tag (tag) "Return end html tag for TAG." (concat "")) (defun md4tj-process-header (line) "Process LINE known to be header, return HTML." (let ((level (length (nth 0 (split-string line))))) (if (or (< level 0) (> level 6)) (error (concat "Error parsing: " line "\n")) (concat (md4tj-begin-tag (concat "h" (number-to-string level))) (mapconcat 'identity (cdr (split-string line)) " ") (md4tj-end-tag (concat "h" (number-to-string level))))))) (defun md4tj-process-paragraph (line) "Process LINE that is paragraph, return HTML." (concat (md4tj-begin-tag "p") line (md4tj-end-tag "p"))) (defun md4tj-process-line (line) "Process all inline elements of the LINE, return HTML." ;; Finally links (replace-regexp-in-string "\\[\\(.*\\)\\](\\(.*\\))" (concat (md4tj-begin-tag "a" (list '("href" "\\2"))) "\\1" (md4tj-end-tag "a")) ;; Then images (replace-regexp-in-string "!\\[\\(.*\\)\\](\\(.*\\))" (md4tj-begin-tag "img" (list '("src" "\\2") '("alt" "\\1"))) ;; Then emphasis (replace-regexp-in-string "\\*\\(.*\\)\\*" "\\1" ;; Then strong (replace-regexp-in-string "\\*\\*\\(.*\\)\\*\\*" "\\1" ;; First code (replace-regexp-in-string "`\\(.*\\)`" "\\1" line)))))) (defun md4tj-clean-multiline (line) "Clean LINE of markdown syntax for ul." (replace-regexp-in-string "^```" "" (replace-regexp-in-string "^[0-9]+\\. " "" (replace-regexp-in-string "^- " "" line)))) (defun md4tj-clean-code-for-html (line) "Escape characters in LINE that would be misinterpreted by the browser." (string-replace "<" "<" (string-replace ">" ">" line))) (defun md4tj-convert-line-to-html (line state) "Process LINE with STATE and return html." (let ((cleanline (md4tj-clean-multiline line))) (concat ;; Beginning of multiline block (cond ((eq state 'beginul) "\n") ((eq state 'endol) "\n") ((eq state 'endcode) "\n\n") (t "")) ;; Body (cond ((or (eq state 'code) (eq state 'begincode)) (md4tj-clean-code-for-html cleanline)) ((string-match "^#+ " cleanline) (md4tj-process-header (md4tj-process-line cleanline))) ((string= "---" cleanline) "
") ;; horizontal line ((= (length cleanline) 0) "
") ;; blank line (t (md4tj-process-paragraph (md4tj-process-line cleanline)))) ;; End of multiline block (cond ((or (eq state 'ul) (eq state 'beginul)) "") ((or (eq state 'ol) (eq state 'beginol)) "") ((eq state 'code) "") (t ""))))) (defun md4tj-next-state (currline prevstate) "Return the state based on CURRLINE and PREVSTATE." (cond ((and (string-match "^- " currline) (not (or (eq prevstate 'beginul) (eq prevstate 'ul))) 'beginul)) ((and (string-match "^- " currline) (or (eq prevstate 'beginul) (eq prevstate 'ul)) 'ul)) ((and (not (string-match "^- " currline)) (or (eq prevstate 'ul) (eq prevstate 'beginul))) 'endul) ((and (string-match "^[0-9]+\\. " currline) (not (or (eq prevstate 'beginol) (eq prevstate 'ol))) 'beginol)) ((and (string-match "^[0-9]+\\. " currline) (or (eq prevstate 'beginol) (eq prevstate 'ol)) 'ol)) ((and (not (string-match "^[0-9]+\\. " currline)) (or (eq prevstate 'ol) (eq prevstate 'beginol))) 'endol) ((and (string-match "^```" currline) (not (or (eq prevstate 'begincode) (eq prevstate 'code))) 'begincode)) ((and (not (string-match "```$" currline)) (or (eq prevstate 'begincode) (eq prevstate 'code)) 'code)) ((and (string-match "```$" currline) (or (eq prevstate 'code) (eq prevstate 'begincode))) 'endcode) (t 'normal))) (defun md4tj-finalize (state) "Finalizes HTML document by inserting missing end tags based on STATE." (concat (cond ((or (eq state 'beginul) (eq state 'ul)) "") ((or (eq state 'beginol) (eq state 'ol)) "") ((or (eq state 'begincode) (eq state 'code)) "") (t "")) "\n" "")) (defun md4tj-parse (mdfile outfile) "Entry point to parse MDFILE and output to OUTFILE." (let ((inbuf (generate-new-buffer " in")) (outbuf (generate-new-buffer " out")) (state 'normal) (line nil)) (set-buffer inbuf) (insert-file-contents mdfile) (goto-char (point-min)) (set-buffer outbuf) (insert "\n") (while (with-current-buffer inbuf (< (point) (point-max))) (setq line (with-current-buffer inbuf (getline))) (setq state (md4tj-next-state line state)) ;; Insert next line(s) into output file (insert (concat (md4tj-convert-line-to-html line state) "\n")) ;; Advance input file by a line (with-current-buffer inbuf (forward-line))) (insert (md4tj-finalize state)) ;; Write outbuf to outfile (write-region nil nil outfile nil))) (provide 'md4tj_parse) ;;; md4tj_parse.el ends here