Edit

thodg/cl-css-parser

Branch :

  • Properties
  • Git HTTP https://git.kmx.io/thodg/cl-css-parser.git
    Git SSH git@git.kmx.io:thodg/cl-css-parser.git
    Public ? true
    Name
    Description

    CSS parser using cl-stream

    Users
    -
    +
    thodg
    Tags

  • parser.lisp
  • (in-package :common-lisp-user)
    
    (defpackage :parse-css
      (:use :common-lisp)
      (:export #:parser))
    
    (in-package :parse-css)
    
    ;;  Input buffers
    
    (defun make-input-buffer (&optional string (start 0))
      (let* ((length (when string (- (length string) start)))
    	 (ib (make-array (if string
    			     (* 64 (ceiling length 64))
    			     64)
    			 :element-type 'fixnum
    			 :adjustable t
    			 :fill-pointer 0)))
        (when string
          (setf (fill-pointer ib) length)
          (replace ib string :start2 start))
        ib))
    
    ;;  Parser
    
    (defclass token ()
      ((string :initarg :string
    	   :accessor token-string
    	   :type string)
       (line :initarg :line
    	 :initform 0
    	 :accessor token-line
    	 :type fixnum)
       (character :initarg :character
    	      :initform 0
    	      :accessor token-character
    	      :type fixnum)))
    
    (defclass parser-token (token)
      ((start :initarg :start
    	  :accessor token-start
    	  :type fixnum)))
    
    (defclass parser ()
      ((input :initarg :stream
    	   :reader parser-input
    	   :type stream)
       (input-line :initarg :input-line
    	       :initform 0
    	       :accessor parser-input-line
    	       :type fixnum)
       (input-character :initarg :input-character
    		    :initform -1
    		    :accessor parser-input-character
    		    :type fixnum)
       (eof :initform nil
    	:accessor parser-eof)
       (ib :initform (make-input-buffer)
           :accessor parser-ib
           :type array)
       (match-start :initform 0
    		:accessor parser-match-start
    		:type fixnum)
       (token-stack :initform ()
    		:accessor parser-token-stack
    		:type list)))
    
    (defgeneric push-token (parser))
    (defgeneric pop-token (parser))
    (defgeneric make-token (parser class &rest initargs))
    (defgeneric discard-token (parser))
    (defgeneric ib-push-extend (parser character))
    (defgeneric input-char (parser))
    (defgeneric input-length (parser length))
    (defgeneric parser-match-char (parser &optional char-index))
    (defgeneric match (parser item))
    (defgeneric match-until (parser item))
    (defgeneric match-option (parser function))
    (defgeneric match-times (parser function min max))
    (defgeneric match-comment (parser))
    (defgeneric match-newline (parser))
    (defgeneric match-whitespace (parser))
    (defgeneric match-hex-digit (parser))
    (defgeneric match-escape (parser))
    (defgeneric whitespace-token (parser))
    (defgeneric match-ws* (parser))
    (defgeneric match-ident-char (parser))
    (defgeneric match-ident-char* (parser))
    (defgeneric ident-token (parser))
    (defgeneric function-token (parser))
    (defgeneric at-keyword-token (parser))
    (defgeneric match-string-char (parser end-char))
    (defgeneric match-string (parser end-char))
    (defgeneric string-token (parser))
    (defgeneric match-non-printable (parser))
    (defgeneric match-url-unquoted-char (parser))
    (defgeneric match-url-unquoted (parser))
    (defgeneric url-token (parser))
    (defgeneric match-digit (parser))
    (defgeneric match-digit+ (parser))
    (defgeneric number-token (parser))
    (defgeneric dimension-token (parser))
    (defgeneric percentage-token (parser))
    (defgeneric unicode-range-token (parser))
    (defgeneric include-match-token (parser))
    (defgeneric dash-match-token (parser))
    (defgeneric prefix-match-token (parser))
    (defgeneric suffix-match-token (parser))
    (defgeneric substring-match-token (parser))
    (defgeneric column-token (parser))
    (defgeneric cdo-token (parser))
    (defgeneric cdc-token (parser))
    
    ;;  Parser input
    
    (defmethod ib-string ((ib array) (start fixnum) (end fixnum))
      (let* ((length (- end start))
    	 (s (make-string length)))
        (labels ((at (i j)
    	       (let ((ib-char (aref ib i)))
    		 (unless (or (= length j)
    			     (= ib-char -1))
    		   (setf (char s j) (code-char ib-char))
    		   (at (1+ i) (1+ j))))))
          (at start 0))))
        
    (defmethod ib-push-extend ((p parser) (c fixnum))
      (let ((ib (parser-ib p)))
        (let* ((fill-pointer (fill-pointer ib))
    	   (new-fill-pointer (1+ fill-pointer)))
          (if (= fill-pointer (array-dimension ib 0))
    	  (setf (parser-ib p) (adjust-array ib (+ fill-pointer 64)
    					    :fill-pointer new-fill-pointer))
    	  (setf (fill-pointer ib) new-fill-pointer))
          (locally (declare (optimize (safety 0)))
    	(setf (aref ib fill-pointer) c))
          fill-pointer)))
    
    (defun read-char-code (stream)
      (let ((c (read-char stream nil nil)))
        (if c (char-code c) -1)))
    
    (defmethod input-char ((p parser))
      (let* ((in (parser-input p))
    	 (c (read-char-code in))
    	 (pos (ib-push-extend p c)))
        (cond ((or (and (= #x000A c)
    		    (not (and (< 0 pos)
    			      (= #x000D (aref (parser-ib p) (1- pos))))))
    	       (= #x000D c)
    	       (= #x000C c))
    	   (setf (parser-input-character p) 0)
    	   (incf (parser-input-line p)))
    	  (t
    	   (incf (parser-input-character p))))
        c))
    
    (defmethod input-length ((p parser) (length fixnum))
      (when (< (- (fill-pointer (parser-ib p))
    	      (parser-match-start p))
    	   length)
        (input-char p)
        (input-length p length)))
    
    (defmethod parser-match-char ((p parser) &optional (index 0))
      (input-length p (1+ index))
      (aref (parser-ib p) (+ (parser-match-start p) index)))
    
    ;;  Token stack
    
    (defmethod push-token ((p parser))
      (let ((token (make-instance 'parser-token
    			      :start (parser-match-start p)
    			      :line (parser-input-line p)
    			      :character (parser-input-character p))))
        (push token (parser-token-stack p))))
    
    (defmethod pop-token ((p parser))
      (assert (parser-token-stack p))
      (let* ((ib (parser-ib p))
    	 (fill-pointer (fill-pointer ib))
    	 (token (pop (parser-token-stack p)))
    	 (match-start (parser-match-start p)))
        (setf (token-string token) (ib-string ib
    					  (token-start token)
    					  match-start))
        (when (endp (parser-token-stack p))
          (replace ib ib :start2 match-start :end2 fill-pointer)
          (setf (parser-match-start p) 0
    	    (fill-pointer (parser-ib p)) (- fill-pointer match-start)))
        token))
    
    (defmethod make-token ((p parser) (class symbol) &rest initargs)
      (let ((pt (pop-token p)))
        (apply #'make-instance class
    	   :string (token-string pt)
    	   :line (token-line pt)
    	   :character (token-character pt)
    	   initargs)))
    
    (defmethod discard-token ((p parser))
      (pop-token p)
      nil)
    
    ;;  Pattern matching
    
    (defmethod ib= ((p parser) (s string) &key (start1 0))
      (let ((ib (parser-ib p))
    	(end (length s)))
        (when (<= end (- (fill-pointer ib) start1))
          (locally (declare (optimize (safety 0)))
    	(labels ((at (m i)
    		   (declare (type fixnum m i))
    		   (cond ((= end i)
    			  t)
    			 ((= (aref ib m) (char-code (char s i)))
    			  (at (the fixnum (1+ m)) (the fixnum (1+ i))))
    			 (t
    			  nil))))
    	  (at start1 0))))))
    
    (defmethod match ((p parser) (s string))
      (input-length p (length s))
      (when (ib= p s :start1 (parser-match-start p))
        (incf (parser-match-start p) (length s))))
    
    (defmethod match ((p parser) (c fixnum))
      (when (= (the fixnum (parser-match-char p)) c)
        (incf (parser-match-start p))))
    
    (defmethod match ((p parser) (c character))
      (match p (char-code c)))
    
    (defmethod match-until ((p parser) (s string))
      (input-length p (length s))
      (labels ((maybe-eat ()
    	     (or (match p s)
    		 (and (not (match p -1))
    		      (progn
    			(input-char p)
    			(incf (parser-match-start p))
    			(maybe-eat))))))
        (maybe-eat)))
    
    (defmethod match-option ((p parser) (f function))
      (or (funcall f p)
          (parser-match-start p)))
    
    (defmacro match-not (p &body body)
      (let ((parser (gensym "PARSER-"))
    	(match-start (gensym "MATCH-START-"))
    	(result (gensym "RESULT-")))
        `(let* ((,parser ,p)
    	    (,match-start (parser-match-start ,parser))
    	    (,result (progn ,@body)))
           (cond ((or ,result
    		  (match p -1))
    	      (setf (parser-match-start ,parser) ,match-start)
    	      nil)
    	     (t
    	      (incf (parser-match-start p)))))))
    
    (defmacro match-sequence (p &body body)
      (let ((parser (gensym "PARSER-"))
    	(match-start (gensym "MATCH-START-"))
    	(result (gensym "RESULT-")))
        `(let* ((,parser ,p)
    	    (,match-start (parser-match-start ,parser))
    	    (,result (progn ,@body)))
           (cond (,result
    	      ,result)
    	     (t
    	      (setf (parser-match-start ,parser) ,match-start)
    	      nil)))))
    
    (defmethod match-times ((p parser) (f function) (min fixnum) (max fixnum))
      (match-sequence p
        (labels ((match-min ()
    	       (cond ((= 0 min)
    		      (match-max))
    		     ((funcall f p)
    		      (decf min)
    		      (decf max)
    		      (match-min))
    		     (t
    		      nil)))
    	     (match-max ()
    	       (cond ((and (< 0 max) (funcall f p))
    		      (decf max)
    		      (match-max))
    		     (t
    		      (parser-match-start p)))))
          (match-min))))
    
    (defmethod match-times ((p parser) (f function) (min fixnum) (max null))
      (match-sequence p
        (labels ((match-min ()
    	       (cond ((= 0 min)
    		      (match-max))
    		     ((funcall f p)
    		      (decf min)
    		      (match-min))
    		     (t
    		      nil)))
    	     (match-max ()
    	       (cond ((funcall f p)
    		      (match-max))
    		     (t
    		      (parser-match-start p)))))
          (match-min))))
    
    ;;  Productions
    
    (defclass comment-token (token) ())
    
    (defmethod comment-token ((p parser))
      (push-token p)
      (if (match p "/*")
          (progn (match-until p "*/")
    	     (make-token p 'comment-token))
          (discard-token p)))
    
    (defmethod match-newline ((p parser))
      (or (match p #\Newline)
          (match p (coerce '(#\Return #\Newline) 'string))
          (match p #\Return)
          (match p #\Linefeed)))
    
    (defmethod match-whitespace ((p parser))
      (or (match p #\Space)
          (match p #\Tab)
          (match-newline p)))
    
    (defmethod match-hex-digit ((p parser))
      (let ((c (the fixnum (parser-match-char p))))
        (when (or (<= (char-code #\0) c (char-code #\9))
    	      (<= (char-code #\a) c (char-code #\f))
    	      (<= (char-code #\A) c (char-code #\F)))
          (incf (parser-match-start p)))))
           
    (defmethod match-escape ((p parser))
      (match-sequence p
        (and (match p #\\)
    	 (or (when (match-times p #'match-hex-digit 1 6)
    	       (match-whitespace p)
    	       (parser-match-start p))
    	     (when (not (match-newline p))
    	       (parser-match-start p))))))
    
    (defclass whitespace-token (token) ())
    
    (defmethod whitespace-token ((p parser))
      (push-token p)
      (if (match-times p #'match-whitespace 1 nil)
          (make-token p 'whitespace-token)
          (discard-token p)))
    
    (defmethod match-ws* ((p parser))
      (match-option p #'whitespace-token))
    
    (defclass ident-token (token) ())
    
    (defmethod match-ident-char ((p parser))
      (or (match-escape p)
          (let ((c (the fixnum (parser-match-char p))))
    	(when (or (<= (char-code #\a) c (char-code #\z))
    		  (<= (char-code #\A) c (char-code #\Z))
    		  (<= (char-code #\0) c (char-code #\9))
    		  (=  (char-code #\_) c)
    		  (=  (char-code #\-) c)
    		  (<  #x007F c))
    	  (incf (parser-match-start p))))))
    
    (defmethod match-ident-char* ((p parser))
      (match-times p #'match-ident-char 0 nil))
    
    (defmethod ident-token ((p parser))
      (match-sequence p
        (push-token p)
        (match p #\-)
        (cond ((or (match-escape p)
    	       (let ((c (the fixnum (parser-match-char p))))
    		 (when (or (<= (char-code #\a) c (char-code #\z))
    			   (<= (char-code #\A) c (char-code #\Z))
    			   (=  (char-code #\_) c)
    			   (<  #x007F c))
    		   (incf (parser-match-start p)))))
    	   (match-ident-char* p)
    	   (make-token p 'ident-token))
    	  (t
    	   (discard-token p)))))
    
    (defclass identified-token (token)
      ((ident :initarg :ident
    	  :reader token-ident
    	  :type ident-token)))
    
    (defclass function-token (identified-token) ())
    
    (defmethod function-token ((p parser))
      (match-sequence p
        (push-token p)
        (let ((ident (ident-token p)))
          (if (and ident (match p #\())
    	  (make-token p 'function-token :ident ident)
    	  (discard-token p)))))
    
    (defclass at-keyword-token (identified-token) ())
    
    (defmethod at-keyword-token ((p parser))
      (match-sequence p
        (push-token p)
        (if (match p #\@)
    	(let ((ident (ident-token p)))
    	  (if ident
    	      (make-token p 'at-keyword-token :ident ident)
    	      (discard-token p)))
    	(discard-token p))))
    
    (defclass hash-token (token) ())
    
    (defmethod hash-token ((p parser))
      (match-sequence p
        (push-token p)
        (if (match p #\#)
    	(and (match-ident-char* p)
    	     (make-token p 'hash-token))
    	(discard-token p))))
    
    (defclass string-token (token) ())
    
    (defgeneric string-token-string (string-token))
    
    (defmethod string-token-string ((s string-token))
      (let ((string (token-string s)))
        (subseq string 1 (1- (length string)))))
    
    (defmethod match-string-char ((p parser) (end-char character))
      (or (match-sequence p
    	(and (match p #\\)
    	     (match-newline p)))
          (match-escape p)
          (match-not p
    	(or (match p end-char)
    	    (match p #\\)
    	    (match-newline p)))))
    
    (defmethod match-string ((p parser) (end-char character))
      (match-sequence p
        (match p end-char)
        (match-times p (lambda (p) (match-string-char p end-char)) 0 nil)
        (match p end-char)))
    
    (defmethod string-token ((p parser))
      (push-token p)
      (if (or (match-string p #\")
    	  (match-string p #\'))
          (make-token p 'string-token)))
    
    (defclass url-token (identified-token)
      ((url :initarg :url
    	:reader token-url
    	:type token)))
    
    (defmethod match-non-printable ((p parser))
      (let ((c (the fixnum (parser-match-char p))))
        (when (or (<= #x0000 c #x0008)
    	      (=  #x000B c)
    	      (<= #x000E c #x001F)
    	      (=  #x007F c))
          (incf (parser-match-start p)))))
    
    (defmethod match-url-unquoted-char ((p parser))
      (or (match-escape p)
          (match-not p
    	(or (match p #\")
    	    (match p #\')
    	    (match p #\()
    	    (match p #\))
    	    (match p #\\)
    	    (match-whitespace p)
    	    (match-non-printable p)))))
    
    (defmethod match-url-unquoted ((p parser))
      (push-token p)
      (if (match-times p #'match-url-unquoted-char 1 nil)
          (make-token p 'token)
          (discard-token p)))
    
    (defmethod url-token ((p parser))
      (push-token p)
      (or (match-sequence p
    	(let ((ident (ident-token p)))
    	  (and (string= "url" (token-string ident))
    	       (match p #\()
    	       (match-ws* p)
    	       (let ((url (or (string-token p)
    			      (match-url-unquoted p))))
    		 (match-ws* p)
    		 (when (match p #\))
    		   (make-token p (url-token :ident ident :url url)))))))
          (discard-token p)))
    
    (defclass number-token (token) ())
    
    (defmethod match-digit ((p parser))
      (let ((c (the fixnum (parser-match-char p))))
        (when (<= (char-code #\0) c (char-code #\9))
          (incf (parser-match-start p)))))
    
    (defmethod match-digit+ ((p parser))
      (match-times p #'match-digit 1 nil))
    
    (defmethod number-token ((p parser))
      (push-token p)
      (if (match-sequence p
    	(and (or (match p #\-)
    		 (match p #\+)
    		 t)
    	     (or (match-sequence p
    		   (and (match p #\.)
    			(match-digit+ p)))
    		 (match-sequence p
    		   (match-digit+ p)
    		   (match p #\.)
    		   (match-digit+ p))
    		 (match-digit+ p))
    	     (or (match-sequence p
    		   (and (or (match p #\E)
    			    (match p #\e))
    			(or (match p #\-)
    			    (match p #\+)
    			    t)
    			(match-digit+ p)))
    		 (parser-match-start p))))
          (make-token p 'number-token)
          (discard-token p)))
    
    (defclass numbered-token (token)
      ((number :initarg :number
    	   :reader token-number
    	   :type number-token)))
    
    (defclass dimension-token (identified-token numbered-token) ())
    
    (defmethod dimension-token ((p parser))
      (push-token p)
      (or (match-sequence p
    	(let ((number (number-token p)))
    	  (when number
    	    (let ((ident (ident-token p)))
    	      (when ident
    		(make-token p 'dimension-token
    			    :number number
    			    :ident ident))))))
          (discard-token p)))
    
    (defclass percentage-token (numbered-token) ())
    
    (defmethod percentage-token ((p parser))
      (push-token p)
      (or (match-sequence p
    	(let ((number (number-token p)))
    	  (when number
    	    (when (match p #\%)
    	      (make-token p 'percentage-token
    			  :number number)))))
          (discard-token p)))
    
    (defclass unicode-range-token (token) ())
    
    (defmethod unicode-range-token ((p parser))
      (push-token p)
      (or (match-sequence p
    	(and (or (match p #\u)
    		 (match p #\U))
    	     (match p #\+)
    	     (or (match-sequence p
    		   (and (match-times p #'match-hex-digit 1 6)
    			(match p #\-)
    			(match-times p #'match-hex-digit 1 6)))
    		 (match-sequence p
    		   (let ((start (parser-match-start p)))
    		     (and (match-times p #'match-hex-digit 0 5)
    			  (let ((digits (- (parser-match-start p) start)))
    			    (match-times p (lambda (p) (match p #\?))
    					 1 (- 6 digits))))))
    		 (match-times p #'match-hex-digit 1 6))
    	     (make-token p 'unicode-range-token)))
          (discard-token p)))
    
    (defclass include-match-token (token) ())
    
    (defmethod include-match-token ((p parser))
      (push-token p)
      (if (match p "~=")
          (make-token p 'include-match-token)
          (discard-token p)))
    
    (defclass dash-match-token (token) ())
    
    (defmethod dash-match-token ((p parser))
      (push-token p)
      (if (match p "|=")
          (make-token p 'dash-match-token)
          (discard-token p)))
    
    (defclass prefix-match-token (token) ())
    
    (defmethod prefix-match-token ((p parser))
      (push-token p)
      (if (match p "^=")
          (make-token p 'prefix-match-token)
          (discard-token p)))
    
    (defclass suffix-match-token (token) ())
    
    (defmethod suffix-match-token ((p parser))
      (push-token p)
      (if (match p "$=")
          (make-token p 'suffix-match-token)
          (discard-token p)))
    
    (defclass substring-match-token (token) ())
    
    (defmethod substring-match-token ((p parser))
      (push-token p)
      (if (match p "*=")
          (make-token p 'substring-match-token)
          (discard-token p)))
    
    (defclass column-token (token) ())
    
    (defmethod column-token ((p parser))
      (push-token p)
      (if (match p "||")
          (make-token p 'column-token)
          (discard-token p)))
    
    (defclass cdo-token (token) ())
    
    (defmethod cdo-token ((p parser))
      (push-token p)
      (if (match p "<!--")
          (make-token p 'cdo-token)
          (discard-token p)))
    
    (defclass cdc-token (token) ())
    
    (defmethod cdc-token ((p parser))
      (push-token p)
      (if (match p "-->")
          (make-token p 'cdc-token)
          (discard-token p)))
    
    (defclass left-paren-token (token) ())
    
    (defmethod left-paren-token ((p parser))
      (push-token p)
      (if (match p #\()
          (make-token p 'left-paren-token)
          (discard-token p)))
    
    (defclass right-paren-token (token) ())
    
    (defmethod right-paren-token ((p parser))
      (push-token p)
      (if (match p #\))
          (make-token p 'right-paren-token)
          (discard-token p)))
    
    (defclass comma-token (token) ())
    
    (defmethod comma-token ((p parser))
      (push-token p)
      (if (match p #\,)
          (make-token p 'comma-token)
          (discard-token p)))
    
    (defclass colon-token (token) ())
    
    (defmethod colon-token ((p parser))
      (push-token p)
      (if (match p #\:)
          (make-token p 'colon-token)
          (discard-token p)))
    
    (defclass semicolon-token (token) ())
    
    (defmethod semicolon-token ((p parser))
      (push-token p)
      (if (match p #\;)
          (make-token p 'semicolon-token)
          (discard-token p)))
    
    (defclass [-token (token) ())
    
    (defmethod [-token ((p parser))
      (push-token p)
      (if (match p #\[)
          (make-token p '[-token)
          (discard-token p)))
    
    (defclass ]-token (token) ())
    
    (defmethod ]-token ((p parser))
      (push-token p)
      (if (match p #\])
          (make-token p ']-token)
          (discard-token p)))
    
    (defclass {-token (token) ())
    
    (defmethod {-token ((p parser))
      (push-token p)
      (if (match p #\{)
          (make-token p '{-token)
          (discard-token p)))
    
    (defclass }-token (token) ())
    
    (defmethod }-token ((p parser))
      (push-token p)
      (if (match p #\})
          (make-token p '}-token)
          (discard-token p)))
    
    (defclass eof-token (token) ())
    
    (defmethod eof-token ((p parser))
      (push-token p)
      (if (match p -1)
          (make-token p 'eof-token)
          (discard-token p)))
    
    (defclass delim-token (token) ())
    
    (defmethod delim-token ((p parser))
      (push-token p)
      (input-length p 1)
      (incf (parser-match-start p))
      (make-token p 'delim-token))
    
    ;;  Tokenizer
    
    (defmethod consume-token ((p parser))
      (or (whitespace-token p)
          (string-token p)
          (hash-token p)
          (suffix-match-token p)
          (left-paren-token p)
          (right-paren-token p)
          (substring-match-token p)
          (number-token p)
          (comma-token p)
          (cdc-token p)
          (comment-token p)
          (colon-token p)
          (semicolon-token p)
          (cdo-token p)
          (at-keyword-token p)
          ([-token p)
          (]-token p)
          (prefix-match-token p)
          ({-token p)
          (}-token p)
          (unicode-range-token p)
          (ident-token p)
          (dash-match-token p)
          (include-match-token p)
          (eof-token p)
          (delim-token p)))
    
    ;;  Lexer
    
    (defun lex-stream (stream)
      (let ((p (make-instance 'parser :stream stream))
    	(document nil))
        (labels ((push-tokens ()
    	       (let ((token (consume-token p)))
    		 (push token document)
    		 (unless (typep token 'eof-token)
    		   (push-tokens)))))
          (push-tokens))
        (nreverse document)))
    
    (defun lex-string (string)
      (with-input-from-string (stream string)
        (lex-stream stream)))
    
    (defun lex-file (filespec)
      (with-open-file (stream filespec
    			  :element-type 'character
    			  :external-format :utf-8)
        (lex-stream stream)))
    
    ;;  Tests
    
    #+test
    (lex-string "/* hello world */ body { color: #ff0000; }")
    
    ;;  Parsing