Edit

thodg/cl-css-lexer/css-lexer.lisp

Branch :

  • css-lexer.lisp
  • (in-package :css-lexer)
    
    (defclass css-lexer (lexer) ())
    
    (defgeneric match-comment (lexer))
    (defgeneric match-newline (lexer))
    (defgeneric match-whitespace (lexer))
    (defgeneric match-hex-digit (lexer))
    (defgeneric match-escape (lexer))
    (defgeneric whitespace-token (lexer))
    (defgeneric match-ws* (lexer))
    (defgeneric match-ident-char (lexer))
    (defgeneric match-ident-char* (lexer))
    (defgeneric ident-token (lexer))
    (defgeneric function-token (lexer))
    (defgeneric at-keyword-token (lexer))
    (defgeneric match-string-char (lexer end-char))
    (defgeneric match-string (lexer end-char))
    (defgeneric string-token (lexer))
    (defgeneric match-non-printable (lexer))
    (defgeneric match-url-unquoted-char (lexer))
    (defgeneric match-url-unquoted (lexer))
    (defgeneric url-token (lexer))
    (defgeneric match-digit (lexer))
    (defgeneric match-digit+ (lexer))
    (defgeneric number-token (lexer))
    (defgeneric dimension-token (lexer))
    (defgeneric percentage-token (lexer))
    (defgeneric unicode-range-token (lexer))
    (defgeneric include-match-token (lexer))
    (defgeneric dash-match-token (lexer))
    (defgeneric prefix-match-token (lexer))
    (defgeneric suffix-match-token (lexer))
    (defgeneric substring-match-token (lexer))
    (defgeneric column-token (lexer))
    (defgeneric cdo-token (lexer))
    (defgeneric cdc-token (lexer))
    (defgeneric consume-token (lexer))
    
    (defclass css-token (token) ())
    (defclass printable (token) ())
    
    (defclass identified-token (css-token)
      ((ident :initarg :ident
    	  :reader token-ident
    	  :type ident-token)))
    
    (defclass comment-token (css-token) ())
    (defclass whitespace-token (css-token) ())
    (defclass ident-token (printable css-token) ())
    (defclass function-token (identified-token) ())
    (defclass at-keyword-token (identified-token) ())
    (defclass hash-token (printable css-token) ())
    (defclass string-token (css-token) ())
    
    (defclass url-token (identified-token)
      ((url :initarg :url
    	:reader token-url
    	:type token)))
    
    (defclass number-token (printable css-token) ())
    
    (defclass numbered-token (css-token)
      ((number :initarg :number
    	   :reader token-number
    	   :type number-token)))
    
    (defclass dimension-token (identified-token numbered-token) ())
    (defclass percentage-token (numbered-token) ())
    (defclass unicode-range-token (css-token) ())
    (defclass include-match-token (css-token) ())
    (defclass dash-match-token (css-token) ())
    (defclass prefix-match-token (css-token) ())
    (defclass suffix-match-token (css-token) ())
    (defclass substring-match-token (css-token) ())
    (defclass column-token (css-token) ())
    (defclass cdo-token (css-token) ())
    (defclass cdc-token (css-token) ())
    (defclass left-paren-token (css-token) ())
    (defclass right-paren-token (css-token) ())
    (defclass comma-token (css-token) ())
    (defclass colon-token (css-token) ())
    (defclass semicolon-token (css-token) ())
    (defclass [-token (css-token) ())
    (defclass ]-token (css-token) ())
    (defclass {-token (css-token) ())
    (defclass }-token (css-token) ())
    (defclass eof-token (css-token) ())
    (defclass delim-token (printable css-token) ())
    
    (defmethod comment-token ((lx lexer))
      (push-token lx)
      (if (match lx "/*")
          (progn (match-until lx "*/")
    	     (make-token lx 'comment-token))
          (discard-token lx)))
    
    (let ((rn (coerce '(#\Return #\Newline) 'string)))
      (defmethod match-newline ((lx lexer))
        (or (match lx #\Newline)
            (match lx rn)
            (match lx #\Return)
            (match lx #\Linefeed))))
    
    (defmethod match-whitespace ((lx lexer))
      (or (match lx #\Space)
          (match lx #\Tab)
          (match-newline lx)))
    
    (defmethod match-hex-digit ((lx lexer))
      (let ((c (the fixnum (lexer-match-char lx 0))))
        (when (or (<= (char-code #\0) c (char-code #\9))
    	      (<= (char-code #\a) c (char-code #\f))
    	      (<= (char-code #\A) c (char-code #\F)))
          (incf (the fixnum (lexer-match-start lx))))))
           
    (defmethod match-escape ((lx lexer))
      (match-sequence lx
        (and (match lx #\\)
    	 (or (when (match-times lx #'match-hex-digit 1 6)
    	       (match-whitespace lx)
    	       (lexer-match-start lx))
    	     (when (not (match-newline lx))
    	       (lexer-match-start lx))))))
    
    (defmethod whitespace-token ((lx lexer))
      (push-token lx)
      (if (match-times lx #'match-whitespace 1 nil)
          (make-token lx 'whitespace-token)
          (discard-token lx)))
    
    (defmethod match-ws* ((lx lexer))
      (match-option lx #'whitespace-token))
    
    (defmethod match-ident-char ((lx lexer))
      (or (match-escape lx)
          (let ((c (the character (lexer-match-char lx 0))))
    	(when (or (char<= #\a c #\z)
    		  (char<= #\A c #\Z)
    		  (char<= #\0 c #\9)
    		  (char=  #\_ c)
    		  (char=  #\- c)
    		  (<  #x007F (char-code c)))
    	  (incf (the fixnum (lexer-match-start lx)))))))
    
    (defmethod match-ident-char* ((lx lexer))
      (match-times lx #'match-ident-char 0 nil))
    
    (defmethod ident-token ((lx lexer))
      (match-sequence lx
        (push-token lx)
        (match lx #\-)
        (cond ((or (match-escape lx)
    	       (let ((c (the character (lexer-match-char lx 0))))
    		 (when (or (char<= #\a c #\z)
    			   (char<= #\A c #\Z)
    			   (char=  #\_ c)
    			   (<  #x007F (char-code c)))
    		   (incf (the fixnum (lexer-match-start lx))))))
    	   (match-ident-char* lx)
    	   (make-token lx 'ident-token))
    	  (t
    	   (discard-token lx)))))
    
    (defmethod function-token ((lx lexer))
      (match-sequence lx
        (push-token lx)
        (let ((ident (ident-token lx)))
          (if (and ident (match lx #\())
    	  (make-token lx 'function-token :ident ident)
    	  (discard-token lx)))))
    
    (defmethod at-keyword-token ((lx lexer))
      (match-sequence lx
        (push-token lx)
        (if (match lx #\@)
    	(let ((ident (ident-token lx)))
    	  (if ident
    	      (make-token lx 'at-keyword-token :ident ident)
    	      (discard-token lx)))
    	(discard-token lx))))
    
    (defmethod hash-token ((lx lexer))
      (match-sequence lx
        (push-token lx)
        (if (match lx #\#)
    	(and (match-ident-char* lx)
    	     (make-token lx 'hash-token))
    	(discard-token lx))))
    
    (defgeneric string-token-string (string-token))
    
    (defmethod string-token-string ((s string-token))
      (let ((string (token-string s)))
        (declare (type (vector character) string))
        (subseq string 1 (1- (length string)))))
    
    (defmethod match-string-char ((lx lexer) (end-char character))
      (or (match-sequence lx
    	(and (match lx #\\)
    	     (match-newline lx)))
          (match-escape lx)
          (match-not lx (lambda (lx)
                          (or (match lx end-char)
                              (match lx #\\)
                              (match-newline lx))))))
    
    (defmethod match-string ((lx lexer) (end-char character))
      (match-sequence lx
        (and (match lx end-char)
             (match-times lx (lambda (lx) (match-string-char lx end-char)) 0 nil)
             (match lx end-char))))
    
    (defmethod string-token ((lx lexer))
      (push-token lx)
      (if (or (match-string lx #\")
    	  (match-string lx #\'))
          (make-token lx 'string-token)))
    
    (defmethod match-non-printable ((lx lexer))
      (let ((c (the fixnum (lexer-match-char lx 0))))
        (when (or (<= #x0000 c #x0008)
    	      (=  #x000B c)
    	      (<= #x000E c #x001F)
    	      (=  #x007F c))
          (incf (the fixnum (lexer-match-start lx))))))
    
    (defmethod match-url-unquoted-char ((lx lexer))
      (or (match-escape lx)
          (match-not lx
    	(or (match lx #\")
    	    (match lx #\')
    	    (match lx #\()
    	    (match lx #\))
    	    (match lx #\\)
    	    (match-whitespace lx)
    	    (match-non-printable lx)))))
    
    (defmethod match-url-unquoted ((lx lexer))
      (push-token lx)
      (if (match-times lx #'match-url-unquoted-char 1 nil)
          (make-token lx 'token)
          (discard-token lx)))
    
    (defmethod url-token ((lx lexer))
      (push-token lx)
      (or (match-sequence lx
    	(let ((ident (ident-token lx)))
    	  (and (string= "url" (the (vector character)
                                       (token-string ident)))
    	       (match lx #\()
    	       (match-ws* lx)
    	       (let ((url (or (string-token lx)
    			      (match-url-unquoted lx))))
    		 (match-ws* lx)
    		 (when (match lx #\))
    		   (make-token lx 'url-token :ident ident :url url))))))
          (discard-token lx)))
    
    (defmethod match-digit ((lx lexer))
      (let ((c (the character (lexer-match-char lx 0))))
        (when (char<= #\0 c #\9)
          (incf (the fixnum (lexer-match-start lx))))))
    
    (defmethod match-digit+ ((lx lexer))
      (match-times lx #'match-digit 1 nil))
    
    (defmethod number-token ((lx lexer))
      (push-token lx)
      (if (match-sequence lx
    	(and (or (match lx #\-)
    		 (match lx #\+)
    		 t)
    	     (or (match-sequence lx
    		   (and (match lx #\.)
    			(match-digit+ lx)))
    		 (match-sequence lx
    		   (match-digit+ lx)
    		   (match lx #\.)
    		   (match-digit+ lx))
    		 (match-digit+ lx))
    	     (or (match-sequence lx
    		   (and (or (match lx #\E)
    			    (match lx #\e))
    			(or (match lx #\-)
    			    (match lx #\+)
    			    t)
    			(match-digit+ lx)))
    		 (lexer-match-start lx))))
          (make-token lx 'number-token)
          (discard-token lx)))
    
    (defmethod dimension-token ((lx lexer))
      (push-token lx)
      (or (match-sequence lx
    	(let ((number (number-token lx)))
    	  (when number
    	    (let ((ident (ident-token lx)))
    	      (when ident
    		(make-token lx 'dimension-token
    			    :number number
    			    :ident ident))))))
          (discard-token lx)))
    
    (defmethod percentage-token ((lx lexer))
      (push-token lx)
      (or (match-sequence lx
    	(let ((number (number-token lx)))
    	  (when number
    	    (when (match lx #\%)
    	      (make-token lx 'percentage-token
    			  :number number)))))
          (discard-token lx)))
    
    (defmethod unicode-range-token ((lx lexer))
      (push-token lx)
      (or (match-sequence lx
    	(and (or (match lx #\u)
    		 (match lx #\U))
    	     (match lx #\+)
    	     (or (match-sequence lx
    		   (and (match-times lx #'match-hex-digit 1 6)
    			(match lx #\-)
    			(match-times lx #'match-hex-digit 1 6)))
    		 (match-sequence lx
    		   (let ((start (the fixnum (lexer-match-start lx))))
    		     (and (match-times lx #'match-hex-digit 0 5)
    			  (let ((digits (- (the fixnum
                                                    (lexer-match-start lx))
                                               start)))
                                (declare (type fixnum digits))
    			    (match-times lx (lambda (lx) (match lx #\?))
    					 1 (the fixnum (- 6 digits)))))))
    		 (match-times lx #'match-hex-digit 1 6))
    	     (make-token lx 'unicode-range-token)))
          (discard-token lx)))
    
    (defmethod include-match-token ((lx lexer))
      (push-token lx)
      (if (match lx "~=")
          (make-token lx 'include-match-token)
          (discard-token lx)))
    
    (defmethod dash-match-token ((lx lexer))
      (push-token lx)
      (if (match lx "|=")
          (make-token lx 'dash-match-token)
          (discard-token lx)))
    
    (defmethod prefix-match-token ((lx lexer))
      (push-token lx)
      (if (match lx "^=")
          (make-token lx 'prefix-match-token)
          (discard-token lx)))
    
    (defmethod suffix-match-token ((lx lexer))
      (push-token lx)
      (if (match lx "$=")
          (make-token lx 'suffix-match-token)
          (discard-token lx)))
    
    (defmethod substring-match-token ((lx lexer))
      (push-token lx)
      (if (match lx "*=")
          (make-token lx 'substring-match-token)
          (discard-token lx)))
    
    (defmethod column-token ((lx lexer))
      (push-token lx)
      (if (match lx "||")
          (make-token lx 'column-token)
          (discard-token lx)))
    
    (defmethod cdo-token ((lx lexer))
      (push-token lx)
      (if (match lx "<!--")
          (make-token lx 'cdo-token)
          (discard-token lx)))
    
    (defmethod cdc-token ((lx lexer))
      (push-token lx)
      (if (match lx "-->")
          (make-token lx 'cdc-token)
          (discard-token lx)))
    
    (defmethod left-paren-token ((lx lexer))
      (push-token lx)
      (if (match lx #\()
          (make-token lx 'left-paren-token)
          (discard-token lx)))
    
    (defmethod right-paren-token ((lx lexer))
      (push-token lx)
      (if (match lx #\))
          (make-token lx 'right-paren-token)
          (discard-token lx)))
    
    (defmethod comma-token ((lx lexer))
      (push-token lx)
      (if (match lx #\,)
          (make-token lx 'comma-token)
          (discard-token lx)))
    
    (defmethod colon-token ((lx lexer))
      (push-token lx)
      (if (match lx #\:)
          (make-token lx 'colon-token)
          (discard-token lx)))
    
    (defmethod semicolon-token ((lx lexer))
      (push-token lx)
      (if (match lx #\;)
          (make-token lx 'semicolon-token)
          (discard-token lx)))
    
    (defmethod [-token ((lx lexer))
      (push-token lx)
      (if (match lx #\[)
          (make-token lx '[-token)
          (discard-token lx)))
    
    (defmethod ]-token ((lx lexer))
      (push-token lx)
      (if (match lx #\])
          (make-token lx ']-token)
          (discard-token lx)))
    
    (defmethod {-token ((lx lexer))
      (push-token lx)
      (if (match lx #\{)
          (make-token lx '{-token)
          (discard-token lx)))
    
    (defmethod }-token ((lx lexer))
      (push-token lx)
      (if (match lx #\})
          (make-token lx '}-token)
          (discard-token lx)))
    
    (defmethod eof-token ((lx lexer))
      (push-token lx)
      (cond ((and (lexer-in-eof lx)
                  (= (lexer-match-start lx)
                     (fill-pointer (lexer-buffer lx))))
             (setf (lexer-eof-p lx) t)
             (make-token lx 'eof-token))
            (t
             (discard-token lx))))
    
    (defmethod delim-token ((lx lexer))
      (push-token lx)
      (lexer-input-n lx 1)
      (incf (the fixnum (lexer-match-start lx)))
      (make-token lx 'delim-token))
    
    ;;  CSS lexer
    
    (defmethod stream-element-type ((lx css-lexer))
      'css-token)
    
    (defmethod lexer-token ((lx css-lexer))
      (or (eof-token lx)
          (whitespace-token lx)
          (string-token lx)
          (hash-token lx)
          (suffix-match-token lx)
          (left-paren-token lx)
          (right-paren-token lx)
          (substring-match-token lx)
          (number-token lx)
          (comma-token lx)
          (cdc-token lx)
          (comment-token lx)
          (colon-token lx)
          (semicolon-token lx)
          (cdo-token lx)
          (at-keyword-token lx)
          ([-token lx)
          (]-token lx)
          (prefix-match-token lx)
          ({-token lx)
          (}-token lx)
          (unicode-range-token lx)
          (ident-token lx)
          (dash-match-token lx)
          (include-match-token lx)
          (delim-token lx)
          (error "no matching css token")))
    
    (defun css-lexer (stream)
      (assert (eq 'character (stream-element-type stream)))
      (make-instance 'css-lexer :stream stream))
    
    ;;  print-object
    
    (defmethod print-object ((object identified-token) stream)
      (declare (type cl:stream stream))
      (print-unreadable-object (object stream :type t)
        (format stream "~S" (token-ident object))))
    
    (defmethod print-object ((object numbered-token) stream)
      (declare (type cl:stream stream))
      (print-unreadable-object (object stream :type t)
        (format stream "~S" (token-number object))))
    
    (defmethod print-object ((object printable) stream)
      (declare (type cl:stream stream))
      (print-unreadable-object (object stream :type t)
        (format stream "~S" (token-string object))))
    
    (defmethod print-object ((object string-token) stream)
      (declare (type cl:stream stream))
      (print-unreadable-object (object stream :type t)
        (format stream "~S" (string-token-string object))))
    
    (defmethod print-object ((object token) stream)
      (declare (type cl:stream stream))
      (print-unreadable-object (object stream :type t)))