diff --git a/defpackage.lisp b/defpackage.lisp
new file mode 100644
index 0000000..90dbad5
--- /dev/null
+++ b/defpackage.lisp
@@ -0,0 +1,6 @@
+
+(in-package :common-lisp-user)
+
+(defpackage :parse-css
+ (:use :common-lisp)
+ (:export #:parser))
diff --git a/lexer.lisp b/lexer.lisp
new file mode 100644
index 0000000..709e180
--- /dev/null
+++ b/lexer.lisp
@@ -0,0 +1,795 @@
+
+(in-package :parse-css)
+
+;; Input buffers
+
+(defun make-input-buffer (&optional string (start 0))
+ (let* ((length (when string (- (length string) start)))
+ (ib (make-array (if string
+ (* 64 (ceiling length 64))
+ 64)
+ :element-type 'fixnum
+ :adjustable t
+ :fill-pointer 0)))
+ (when string
+ (setf (fill-pointer ib) length)
+ (replace ib string :start2 start))
+ ib))
+
+;; Parser
+
+(defclass token ()
+ ((string :initarg :string
+ :accessor token-string
+ :type string)
+ (line :initarg :line
+ :initform 0
+ :accessor token-line
+ :type fixnum)
+ (character :initarg :character
+ :initform 0
+ :accessor token-character
+ :type fixnum)))
+
+(defclass parser-token (token)
+ ((start :initarg :start
+ :accessor token-start
+ :type fixnum)))
+
+(defclass parser ()
+ ((input :initarg :stream
+ :reader parser-input
+ :type stream)
+ (input-line :initarg :input-line
+ :initform 0
+ :accessor parser-input-line
+ :type fixnum)
+ (input-character :initarg :input-character
+ :initform -1
+ :accessor parser-input-character
+ :type fixnum)
+ (eof :initform nil
+ :accessor parser-eof)
+ (ib :initform (make-input-buffer)
+ :accessor parser-ib
+ :type array)
+ (match-start :initform 0
+ :accessor parser-match-start
+ :type fixnum)
+ (token-stack :initform ()
+ :accessor parser-token-stack
+ :type list)))
+
+(defgeneric push-token (parser))
+(defgeneric pop-token (parser))
+(defgeneric make-token (parser class &rest initargs))
+(defgeneric discard-token (parser))
+(defgeneric ib-push-extend (parser character))
+(defgeneric input-char (parser))
+(defgeneric input-length (parser length))
+(defgeneric parser-match-char (parser &optional char-index))
+(defgeneric match (parser item))
+(defgeneric match-until (parser item))
+(defgeneric match-option (parser function))
+(defgeneric match-times (parser function min max))
+(defgeneric match-comment (parser))
+(defgeneric match-newline (parser))
+(defgeneric match-whitespace (parser))
+(defgeneric match-hex-digit (parser))
+(defgeneric match-escape (parser))
+(defgeneric whitespace-token (parser))
+(defgeneric match-ws* (parser))
+(defgeneric match-ident-char (parser))
+(defgeneric match-ident-char* (parser))
+(defgeneric ident-token (parser))
+(defgeneric function-token (parser))
+(defgeneric at-keyword-token (parser))
+(defgeneric match-string-char (parser end-char))
+(defgeneric match-string (parser end-char))
+(defgeneric string-token (parser))
+(defgeneric match-non-printable (parser))
+(defgeneric match-url-unquoted-char (parser))
+(defgeneric match-url-unquoted (parser))
+(defgeneric url-token (parser))
+(defgeneric match-digit (parser))
+(defgeneric match-digit+ (parser))
+(defgeneric number-token (parser))
+(defgeneric dimension-token (parser))
+(defgeneric percentage-token (parser))
+(defgeneric unicode-range-token (parser))
+(defgeneric include-match-token (parser))
+(defgeneric dash-match-token (parser))
+(defgeneric prefix-match-token (parser))
+(defgeneric suffix-match-token (parser))
+(defgeneric substring-match-token (parser))
+(defgeneric column-token (parser))
+(defgeneric cdo-token (parser))
+(defgeneric cdc-token (parser))
+
+;; Parser input
+
+(defmethod ib-string ((ib array) (start fixnum) (end fixnum))
+ (let* ((length (- end start))
+ (s (make-string length)))
+ (labels ((at (i j)
+ (let ((ib-char (aref ib i)))
+ (unless (or (= length j)
+ (= ib-char -1))
+ (setf (char s j) (code-char ib-char))
+ (at (1+ i) (1+ j))))))
+ (at start 0))))
+
+(defmethod ib-push-extend ((p parser) (c fixnum))
+ (let ((ib (parser-ib p)))
+ (let* ((fill-pointer (fill-pointer ib))
+ (new-fill-pointer (1+ fill-pointer)))
+ (if (= fill-pointer (array-dimension ib 0))
+ (setf (parser-ib p) (adjust-array ib (+ fill-pointer 64)
+ :fill-pointer new-fill-pointer))
+ (setf (fill-pointer ib) new-fill-pointer))
+ (locally (declare (optimize (safety 0)))
+ (setf (aref ib fill-pointer) c))
+ fill-pointer)))
+
+(defun read-char-code (stream)
+ (let ((c (read-char stream nil nil)))
+ (if c (char-code c) -1)))
+
+(defmethod input-char ((p parser))
+ (let* ((in (parser-input p))
+ (c (read-char-code in))
+ (pos (ib-push-extend p c)))
+ (cond ((or (and (= #x000A c)
+ (not (and (< 0 pos)
+ (= #x000D (aref (parser-ib p) (1- pos))))))
+ (= #x000D c)
+ (= #x000C c))
+ (setf (parser-input-character p) 0)
+ (incf (parser-input-line p)))
+ (t
+ (incf (parser-input-character p))))
+ c))
+
+(defmethod input-length ((p parser) (length fixnum))
+ (when (< (- (fill-pointer (parser-ib p))
+ (parser-match-start p))
+ length)
+ (input-char p)
+ (input-length p length)))
+
+(defmethod parser-match-char ((p parser) &optional (index 0))
+ (input-length p (1+ index))
+ (aref (parser-ib p) (+ (parser-match-start p) index)))
+
+;; Token stack
+
+(defmethod push-token ((p parser))
+ (let ((token (make-instance 'parser-token
+ :start (parser-match-start p)
+ :line (parser-input-line p)
+ :character (parser-input-character p))))
+ (push token (parser-token-stack p))))
+
+(defmethod pop-token ((p parser))
+ (assert (parser-token-stack p))
+ (let* ((ib (parser-ib p))
+ (fill-pointer (fill-pointer ib))
+ (token (pop (parser-token-stack p)))
+ (match-start (parser-match-start p)))
+ (setf (token-string token) (ib-string ib
+ (token-start token)
+ match-start))
+ (when (endp (parser-token-stack p))
+ (replace ib ib :start2 match-start :end2 fill-pointer)
+ (setf (parser-match-start p) 0
+ (fill-pointer (parser-ib p)) (- fill-pointer match-start)))
+ token))
+
+(defmethod make-token ((p parser) (class symbol) &rest initargs)
+ (let ((pt (pop-token p)))
+ (apply #'make-instance class
+ :string (token-string pt)
+ :line (token-line pt)
+ :character (token-character pt)
+ initargs)))
+
+(defmethod discard-token ((p parser))
+ (pop-token p)
+ nil)
+
+;; Pattern matching
+
+(defmethod ib= ((p parser) (s string) &key (start1 0))
+ (let ((ib (parser-ib p))
+ (end (length s)))
+ (when (<= end (- (fill-pointer ib) start1))
+ (locally (declare (optimize (safety 0)))
+ (labels ((at (m i)
+ (declare (type fixnum m i))
+ (cond ((= end i)
+ t)
+ ((= (aref ib m) (char-code (char s i)))
+ (at (the fixnum (1+ m)) (the fixnum (1+ i))))
+ (t
+ nil))))
+ (at start1 0))))))
+
+(defmethod match ((p parser) (s string))
+ (input-length p (length s))
+ (when (ib= p s :start1 (parser-match-start p))
+ (incf (parser-match-start p) (length s))))
+
+(defmethod match ((p parser) (c fixnum))
+ (when (= (the fixnum (parser-match-char p)) c)
+ (incf (parser-match-start p))))
+
+(defmethod match ((p parser) (c character))
+ (match p (char-code c)))
+
+(defmethod match-until ((p parser) (s string))
+ (input-length p (length s))
+ (labels ((maybe-eat ()
+ (or (match p s)
+ (and (not (match p -1))
+ (progn
+ (input-char p)
+ (incf (parser-match-start p))
+ (maybe-eat))))))
+ (maybe-eat)))
+
+(defmethod match-option ((p parser) (f function))
+ (or (funcall f p)
+ (parser-match-start p)))
+
+(defmacro match-not (p &body body)
+ (let ((parser (gensym "PARSER-"))
+ (match-start (gensym "MATCH-START-"))
+ (result (gensym "RESULT-")))
+ `(let* ((,parser ,p)
+ (,match-start (parser-match-start ,parser))
+ (,result (progn ,@body)))
+ (cond ((or ,result
+ (match p -1))
+ (setf (parser-match-start ,parser) ,match-start)
+ nil)
+ (t
+ (incf (parser-match-start p)))))))
+
+(defmacro match-sequence (p &body body)
+ (let ((parser (gensym "PARSER-"))
+ (match-start (gensym "MATCH-START-"))
+ (result (gensym "RESULT-")))
+ `(let* ((,parser ,p)
+ (,match-start (parser-match-start ,parser))
+ (,result (progn ,@body)))
+ (cond (,result
+ ,result)
+ (t
+ (setf (parser-match-start ,parser) ,match-start)
+ nil)))))
+
+(defmethod match-times ((p parser) (f function) (min fixnum) (max fixnum))
+ (match-sequence p
+ (labels ((match-min ()
+ (cond ((= 0 min)
+ (match-max))
+ ((funcall f p)
+ (decf min)
+ (decf max)
+ (match-min))
+ (t
+ nil)))
+ (match-max ()
+ (cond ((and (< 0 max) (funcall f p))
+ (decf max)
+ (match-max))
+ (t
+ (parser-match-start p)))))
+ (match-min))))
+
+(defmethod match-times ((p parser) (f function) (min fixnum) (max null))
+ (match-sequence p
+ (labels ((match-min ()
+ (cond ((= 0 min)
+ (match-max))
+ ((funcall f p)
+ (decf min)
+ (match-min))
+ (t
+ nil)))
+ (match-max ()
+ (cond ((funcall f p)
+ (match-max))
+ (t
+ (parser-match-start p)))))
+ (match-min))))
+
+;; Productions
+
+(defclass comment-token (token) ())
+
+(defmethod comment-token ((p parser))
+ (push-token p)
+ (if (match p "/*")
+ (progn (match-until p "*/")
+ (make-token p 'comment-token))
+ (discard-token p)))
+
+(defmethod match-newline ((p parser))
+ (or (match p #\Newline)
+ (match p (coerce '(#\Return #\Newline) 'string))
+ (match p #\Return)
+ (match p #\Linefeed)))
+
+(defmethod match-whitespace ((p parser))
+ (or (match p #\Space)
+ (match p #\Tab)
+ (match-newline p)))
+
+(defmethod match-hex-digit ((p parser))
+ (let ((c (the fixnum (parser-match-char p))))
+ (when (or (<= (char-code #\0) c (char-code #\9))
+ (<= (char-code #\a) c (char-code #\f))
+ (<= (char-code #\A) c (char-code #\F)))
+ (incf (parser-match-start p)))))
+
+(defmethod match-escape ((p parser))
+ (match-sequence p
+ (and (match p #\\)
+ (or (when (match-times p #'match-hex-digit 1 6)
+ (match-whitespace p)
+ (parser-match-start p))
+ (when (not (match-newline p))
+ (parser-match-start p))))))
+
+(defclass whitespace-token (token) ())
+
+(defmethod whitespace-token ((p parser))
+ (push-token p)
+ (if (match-times p #'match-whitespace 1 nil)
+ (make-token p 'whitespace-token)
+ (discard-token p)))
+
+(defmethod match-ws* ((p parser))
+ (match-option p #'whitespace-token))
+
+(defclass ident-token (token) ())
+
+(defmethod match-ident-char ((p parser))
+ (or (match-escape p)
+ (let ((c (the fixnum (parser-match-char p))))
+ (when (or (<= (char-code #\a) c (char-code #\z))
+ (<= (char-code #\A) c (char-code #\Z))
+ (<= (char-code #\0) c (char-code #\9))
+ (= (char-code #\_) c)
+ (= (char-code #\-) c)
+ (< #x007F c))
+ (incf (parser-match-start p))))))
+
+(defmethod match-ident-char* ((p parser))
+ (match-times p #'match-ident-char 0 nil))
+
+(defmethod ident-token ((p parser))
+ (match-sequence p
+ (push-token p)
+ (match p #\-)
+ (cond ((or (match-escape p)
+ (let ((c (the fixnum (parser-match-char p))))
+ (when (or (<= (char-code #\a) c (char-code #\z))
+ (<= (char-code #\A) c (char-code #\Z))
+ (= (char-code #\_) c)
+ (< #x007F c))
+ (incf (parser-match-start p)))))
+ (match-ident-char* p)
+ (make-token p 'ident-token))
+ (t
+ (discard-token p)))))
+
+(defclass identified-token (token)
+ ((ident :initarg :ident
+ :reader token-ident
+ :type ident-token)))
+
+(defclass function-token (identified-token) ())
+
+(defmethod function-token ((p parser))
+ (match-sequence p
+ (push-token p)
+ (let ((ident (ident-token p)))
+ (if (and ident (match p #\())
+ (make-token p 'function-token :ident ident)
+ (discard-token p)))))
+
+(defclass at-keyword-token (identified-token) ())
+
+(defmethod at-keyword-token ((p parser))
+ (match-sequence p
+ (push-token p)
+ (if (match p #\@)
+ (let ((ident (ident-token p)))
+ (if ident
+ (make-token p 'at-keyword-token :ident ident)
+ (discard-token p)))
+ (discard-token p))))
+
+(defclass hash-token (token) ())
+
+(defmethod hash-token ((p parser))
+ (match-sequence p
+ (push-token p)
+ (if (match p #\#)
+ (and (match-ident-char* p)
+ (make-token p 'hash-token))
+ (discard-token p))))
+
+(defclass string-token (token) ())
+
+(defgeneric string-token-string (string-token))
+
+(defmethod string-token-string ((s string-token))
+ (let ((string (token-string s)))
+ (subseq string 1 (1- (length string)))))
+
+(defmethod match-string-char ((p parser) (end-char character))
+ (or (match-sequence p
+ (and (match p #\\)
+ (match-newline p)))
+ (match-escape p)
+ (match-not p
+ (or (match p end-char)
+ (match p #\\)
+ (match-newline p)))))
+
+(defmethod match-string ((p parser) (end-char character))
+ (match-sequence p
+ (match p end-char)
+ (match-times p (lambda (p) (match-string-char p end-char)) 0 nil)
+ (match p end-char)))
+
+(defmethod string-token ((p parser))
+ (push-token p)
+ (if (or (match-string p #\")
+ (match-string p #\'))
+ (make-token p 'string-token)))
+
+(defclass url-token (identified-token)
+ ((url :initarg :url
+ :reader token-url
+ :type token)))
+
+(defmethod match-non-printable ((p parser))
+ (let ((c (the fixnum (parser-match-char p))))
+ (when (or (<= #x0000 c #x0008)
+ (= #x000B c)
+ (<= #x000E c #x001F)
+ (= #x007F c))
+ (incf (parser-match-start p)))))
+
+(defmethod match-url-unquoted-char ((p parser))
+ (or (match-escape p)
+ (match-not p
+ (or (match p #\")
+ (match p #\')
+ (match p #\()
+ (match p #\))
+ (match p #\\)
+ (match-whitespace p)
+ (match-non-printable p)))))
+
+(defmethod match-url-unquoted ((p parser))
+ (push-token p)
+ (if (match-times p #'match-url-unquoted-char 1 nil)
+ (make-token p 'token)
+ (discard-token p)))
+
+(defmethod url-token ((p parser))
+ (push-token p)
+ (or (match-sequence p
+ (let ((ident (ident-token p)))
+ (and (string= "url" (token-string ident))
+ (match p #\()
+ (match-ws* p)
+ (let ((url (or (string-token p)
+ (match-url-unquoted p))))
+ (match-ws* p)
+ (when (match p #\))
+ (make-token p (url-token :ident ident :url url)))))))
+ (discard-token p)))
+
+(defclass number-token (token) ())
+
+(defmethod match-digit ((p parser))
+ (let ((c (the fixnum (parser-match-char p))))
+ (when (<= (char-code #\0) c (char-code #\9))
+ (incf (parser-match-start p)))))
+
+(defmethod match-digit+ ((p parser))
+ (match-times p #'match-digit 1 nil))
+
+(defmethod number-token ((p parser))
+ (push-token p)
+ (if (match-sequence p
+ (and (or (match p #\-)
+ (match p #\+)
+ t)
+ (or (match-sequence p
+ (and (match p #\.)
+ (match-digit+ p)))
+ (match-sequence p
+ (match-digit+ p)
+ (match p #\.)
+ (match-digit+ p))
+ (match-digit+ p))
+ (or (match-sequence p
+ (and (or (match p #\E)
+ (match p #\e))
+ (or (match p #\-)
+ (match p #\+)
+ t)
+ (match-digit+ p)))
+ (parser-match-start p))))
+ (make-token p 'number-token)
+ (discard-token p)))
+
+(defclass numbered-token (token)
+ ((number :initarg :number
+ :reader token-number
+ :type number-token)))
+
+(defclass dimension-token (identified-token numbered-token) ())
+
+(defmethod dimension-token ((p parser))
+ (push-token p)
+ (or (match-sequence p
+ (let ((number (number-token p)))
+ (when number
+ (let ((ident (ident-token p)))
+ (when ident
+ (make-token p 'dimension-token
+ :number number
+ :ident ident))))))
+ (discard-token p)))
+
+(defclass percentage-token (numbered-token) ())
+
+(defmethod percentage-token ((p parser))
+ (push-token p)
+ (or (match-sequence p
+ (let ((number (number-token p)))
+ (when number
+ (when (match p #\%)
+ (make-token p 'percentage-token
+ :number number)))))
+ (discard-token p)))
+
+(defclass unicode-range-token (token) ())
+
+(defmethod unicode-range-token ((p parser))
+ (push-token p)
+ (or (match-sequence p
+ (and (or (match p #\u)
+ (match p #\U))
+ (match p #\+)
+ (or (match-sequence p
+ (and (match-times p #'match-hex-digit 1 6)
+ (match p #\-)
+ (match-times p #'match-hex-digit 1 6)))
+ (match-sequence p
+ (let ((start (parser-match-start p)))
+ (and (match-times p #'match-hex-digit 0 5)
+ (let ((digits (- (parser-match-start p) start)))
+ (match-times p (lambda (p) (match p #\?))
+ 1 (- 6 digits))))))
+ (match-times p #'match-hex-digit 1 6))
+ (make-token p 'unicode-range-token)))
+ (discard-token p)))
+
+(defclass include-match-token (token) ())
+
+(defmethod include-match-token ((p parser))
+ (push-token p)
+ (if (match p "~=")
+ (make-token p 'include-match-token)
+ (discard-token p)))
+
+(defclass dash-match-token (token) ())
+
+(defmethod dash-match-token ((p parser))
+ (push-token p)
+ (if (match p "|=")
+ (make-token p 'dash-match-token)
+ (discard-token p)))
+
+(defclass prefix-match-token (token) ())
+
+(defmethod prefix-match-token ((p parser))
+ (push-token p)
+ (if (match p "^=")
+ (make-token p 'prefix-match-token)
+ (discard-token p)))
+
+(defclass suffix-match-token (token) ())
+
+(defmethod suffix-match-token ((p parser))
+ (push-token p)
+ (if (match p "$=")
+ (make-token p 'suffix-match-token)
+ (discard-token p)))
+
+(defclass substring-match-token (token) ())
+
+(defmethod substring-match-token ((p parser))
+ (push-token p)
+ (if (match p "*=")
+ (make-token p 'substring-match-token)
+ (discard-token p)))
+
+(defclass column-token (token) ())
+
+(defmethod column-token ((p parser))
+ (push-token p)
+ (if (match p "||")
+ (make-token p 'column-token)
+ (discard-token p)))
+
+(defclass cdo-token (token) ())
+
+(defmethod cdo-token ((p parser))
+ (push-token p)
+ (if (match p "<!--")
+ (make-token p 'cdo-token)
+ (discard-token p)))
+
+(defclass cdc-token (token) ())
+
+(defmethod cdc-token ((p parser))
+ (push-token p)
+ (if (match p "-->")
+ (make-token p 'cdc-token)
+ (discard-token p)))
+
+(defclass left-paren-token (token) ())
+
+(defmethod left-paren-token ((p parser))
+ (push-token p)
+ (if (match p #\()
+ (make-token p 'left-paren-token)
+ (discard-token p)))
+
+(defclass right-paren-token (token) ())
+
+(defmethod right-paren-token ((p parser))
+ (push-token p)
+ (if (match p #\))
+ (make-token p 'right-paren-token)
+ (discard-token p)))
+
+(defclass comma-token (token) ())
+
+(defmethod comma-token ((p parser))
+ (push-token p)
+ (if (match p #\,)
+ (make-token p 'comma-token)
+ (discard-token p)))
+
+(defclass colon-token (token) ())
+
+(defmethod colon-token ((p parser))
+ (push-token p)
+ (if (match p #\:)
+ (make-token p 'colon-token)
+ (discard-token p)))
+
+(defclass semicolon-token (token) ())
+
+(defmethod semicolon-token ((p parser))
+ (push-token p)
+ (if (match p #\;)
+ (make-token p 'semicolon-token)
+ (discard-token p)))
+
+(defclass [-token (token) ())
+
+(defmethod [-token ((p parser))
+ (push-token p)
+ (if (match p #\[)
+ (make-token p '[-token)
+ (discard-token p)))
+
+(defclass ]-token (token) ())
+
+(defmethod ]-token ((p parser))
+ (push-token p)
+ (if (match p #\])
+ (make-token p ']-token)
+ (discard-token p)))
+
+(defclass {-token (token) ())
+
+(defmethod {-token ((p parser))
+ (push-token p)
+ (if (match p #\{)
+ (make-token p '{-token)
+ (discard-token p)))
+
+(defclass }-token (token) ())
+
+(defmethod }-token ((p parser))
+ (push-token p)
+ (if (match p #\})
+ (make-token p '}-token)
+ (discard-token p)))
+
+(defclass eof-token (token) ())
+
+(defmethod eof-token ((p parser))
+ (push-token p)
+ (if (match p -1)
+ (make-token p 'eof-token)
+ (discard-token p)))
+
+(defclass delim-token (token) ())
+
+(defmethod delim-token ((p parser))
+ (push-token p)
+ (input-length p 1)
+ (incf (parser-match-start p))
+ (make-token p 'delim-token))
+
+;; Tokenizer
+
+(defmethod consume-token ((p parser))
+ (or (whitespace-token p)
+ (string-token p)
+ (hash-token p)
+ (suffix-match-token p)
+ (left-paren-token p)
+ (right-paren-token p)
+ (substring-match-token p)
+ (number-token p)
+ (comma-token p)
+ (cdc-token p)
+ (comment-token p)
+ (colon-token p)
+ (semicolon-token p)
+ (cdo-token p)
+ (at-keyword-token p)
+ ([-token p)
+ (]-token p)
+ (prefix-match-token p)
+ ({-token p)
+ (}-token p)
+ (unicode-range-token p)
+ (ident-token p)
+ (dash-match-token p)
+ (include-match-token p)
+ (eof-token p)
+ (delim-token p)))
+
+;; Lexer
+
+(defun lex-stream (stream)
+ (let ((p (make-instance 'parser :stream stream))
+ (document nil))
+ (labels ((push-tokens ()
+ (let ((token (consume-token p)))
+ (push token document)
+ (unless (typep token 'eof-token)
+ (push-tokens)))))
+ (push-tokens))
+ (nreverse document)))
+
+(defun lex-string (string)
+ (with-input-from-string (stream string)
+ (lex-stream stream)))
+
+(defun lex-file (filespec)
+ (with-open-file (stream filespec
+ :element-type 'character
+ :external-format :utf-8)
+ (lex-stream stream)))
+
+;; Tests
+
+#+test
+(lex-string "/* hello world */ body { color: #ff0000; }")
diff --git a/parse-css.asd b/parse-css.asd
new file mode 100644
index 0000000..811d22e
--- /dev/null
+++ b/parse-css.asd
@@ -0,0 +1,18 @@
+
+(in-package :common-lisp-user)
+
+(defpackage :parse-css.system
+ (:use :common-lisp :asdf))
+
+(in-package :parse-css.system)
+
+(defsystem :parse-css
+ :name "parse-css"
+ :author "Thomas de Grivel <thoxdg@gmail.com>"
+ :version "0.1"
+ :description "CSS level 3 parser"
+ :depends-on ()
+ :components
+ ((:file "defpackage")
+ (:file "lexer" :depends-on ("defpackage"))
+ (:file "parser" :depends-on ("defpackage" "lexer"))))
diff --git a/parser.lisp b/parser.lisp
index 9739478..2aace0e 100644
--- a/parser.lisp
+++ b/parser.lisp
@@ -1,805 +1,2 @@
-(in-package :common-lisp-user)
-
-(defpackage :parse-css
- (:use :common-lisp)
- (:export #:parser))
-
(in-package :parse-css)
-
-;; Input buffers
-
-(defun make-input-buffer (&optional string (start 0))
- (let* ((length (when string (- (length string) start)))
- (ib (make-array (if string
- (* 64 (ceiling length 64))
- 64)
- :element-type 'fixnum
- :adjustable t
- :fill-pointer 0)))
- (when string
- (setf (fill-pointer ib) length)
- (replace ib string :start2 start))
- ib))
-
-;; Parser
-
-(defclass token ()
- ((string :initarg :string
- :accessor token-string
- :type string)
- (line :initarg :line
- :initform 0
- :accessor token-line
- :type fixnum)
- (character :initarg :character
- :initform 0
- :accessor token-character
- :type fixnum)))
-
-(defclass parser-token (token)
- ((start :initarg :start
- :accessor token-start
- :type fixnum)))
-
-(defclass parser ()
- ((input :initarg :stream
- :reader parser-input
- :type stream)
- (input-line :initarg :input-line
- :initform 0
- :accessor parser-input-line
- :type fixnum)
- (input-character :initarg :input-character
- :initform -1
- :accessor parser-input-character
- :type fixnum)
- (eof :initform nil
- :accessor parser-eof)
- (ib :initform (make-input-buffer)
- :accessor parser-ib
- :type array)
- (match-start :initform 0
- :accessor parser-match-start
- :type fixnum)
- (token-stack :initform ()
- :accessor parser-token-stack
- :type list)))
-
-(defgeneric push-token (parser))
-(defgeneric pop-token (parser))
-(defgeneric make-token (parser class &rest initargs))
-(defgeneric discard-token (parser))
-(defgeneric ib-push-extend (parser character))
-(defgeneric input-char (parser))
-(defgeneric input-length (parser length))
-(defgeneric parser-match-char (parser &optional char-index))
-(defgeneric match (parser item))
-(defgeneric match-until (parser item))
-(defgeneric match-option (parser function))
-(defgeneric match-times (parser function min max))
-(defgeneric match-comment (parser))
-(defgeneric match-newline (parser))
-(defgeneric match-whitespace (parser))
-(defgeneric match-hex-digit (parser))
-(defgeneric match-escape (parser))
-(defgeneric whitespace-token (parser))
-(defgeneric match-ws* (parser))
-(defgeneric match-ident-char (parser))
-(defgeneric match-ident-char* (parser))
-(defgeneric ident-token (parser))
-(defgeneric function-token (parser))
-(defgeneric at-keyword-token (parser))
-(defgeneric match-string-char (parser end-char))
-(defgeneric match-string (parser end-char))
-(defgeneric string-token (parser))
-(defgeneric match-non-printable (parser))
-(defgeneric match-url-unquoted-char (parser))
-(defgeneric match-url-unquoted (parser))
-(defgeneric url-token (parser))
-(defgeneric match-digit (parser))
-(defgeneric match-digit+ (parser))
-(defgeneric number-token (parser))
-(defgeneric dimension-token (parser))
-(defgeneric percentage-token (parser))
-(defgeneric unicode-range-token (parser))
-(defgeneric include-match-token (parser))
-(defgeneric dash-match-token (parser))
-(defgeneric prefix-match-token (parser))
-(defgeneric suffix-match-token (parser))
-(defgeneric substring-match-token (parser))
-(defgeneric column-token (parser))
-(defgeneric cdo-token (parser))
-(defgeneric cdc-token (parser))
-
-;; Parser input
-
-(defmethod ib-string ((ib array) (start fixnum) (end fixnum))
- (let* ((length (- end start))
- (s (make-string length)))
- (labels ((at (i j)
- (let ((ib-char (aref ib i)))
- (unless (or (= length j)
- (= ib-char -1))
- (setf (char s j) (code-char ib-char))
- (at (1+ i) (1+ j))))))
- (at start 0))))
-
-(defmethod ib-push-extend ((p parser) (c fixnum))
- (let ((ib (parser-ib p)))
- (let* ((fill-pointer (fill-pointer ib))
- (new-fill-pointer (1+ fill-pointer)))
- (if (= fill-pointer (array-dimension ib 0))
- (setf (parser-ib p) (adjust-array ib (+ fill-pointer 64)
- :fill-pointer new-fill-pointer))
- (setf (fill-pointer ib) new-fill-pointer))
- (locally (declare (optimize (safety 0)))
- (setf (aref ib fill-pointer) c))
- fill-pointer)))
-
-(defun read-char-code (stream)
- (let ((c (read-char stream nil nil)))
- (if c (char-code c) -1)))
-
-(defmethod input-char ((p parser))
- (let* ((in (parser-input p))
- (c (read-char-code in))
- (pos (ib-push-extend p c)))
- (cond ((or (and (= #x000A c)
- (not (and (< 0 pos)
- (= #x000D (aref (parser-ib p) (1- pos))))))
- (= #x000D c)
- (= #x000C c))
- (setf (parser-input-character p) 0)
- (incf (parser-input-line p)))
- (t
- (incf (parser-input-character p))))
- c))
-
-(defmethod input-length ((p parser) (length fixnum))
- (when (< (- (fill-pointer (parser-ib p))
- (parser-match-start p))
- length)
- (input-char p)
- (input-length p length)))
-
-(defmethod parser-match-char ((p parser) &optional (index 0))
- (input-length p (1+ index))
- (aref (parser-ib p) (+ (parser-match-start p) index)))
-
-;; Token stack
-
-(defmethod push-token ((p parser))
- (let ((token (make-instance 'parser-token
- :start (parser-match-start p)
- :line (parser-input-line p)
- :character (parser-input-character p))))
- (push token (parser-token-stack p))))
-
-(defmethod pop-token ((p parser))
- (assert (parser-token-stack p))
- (let* ((ib (parser-ib p))
- (fill-pointer (fill-pointer ib))
- (token (pop (parser-token-stack p)))
- (match-start (parser-match-start p)))
- (setf (token-string token) (ib-string ib
- (token-start token)
- match-start))
- (when (endp (parser-token-stack p))
- (replace ib ib :start2 match-start :end2 fill-pointer)
- (setf (parser-match-start p) 0
- (fill-pointer (parser-ib p)) (- fill-pointer match-start)))
- token))
-
-(defmethod make-token ((p parser) (class symbol) &rest initargs)
- (let ((pt (pop-token p)))
- (apply #'make-instance class
- :string (token-string pt)
- :line (token-line pt)
- :character (token-character pt)
- initargs)))
-
-(defmethod discard-token ((p parser))
- (pop-token p)
- nil)
-
-;; Pattern matching
-
-(defmethod ib= ((p parser) (s string) &key (start1 0))
- (let ((ib (parser-ib p))
- (end (length s)))
- (when (<= end (- (fill-pointer ib) start1))
- (locally (declare (optimize (safety 0)))
- (labels ((at (m i)
- (declare (type fixnum m i))
- (cond ((= end i)
- t)
- ((= (aref ib m) (char-code (char s i)))
- (at (the fixnum (1+ m)) (the fixnum (1+ i))))
- (t
- nil))))
- (at start1 0))))))
-
-(defmethod match ((p parser) (s string))
- (input-length p (length s))
- (when (ib= p s :start1 (parser-match-start p))
- (incf (parser-match-start p) (length s))))
-
-(defmethod match ((p parser) (c fixnum))
- (when (= (the fixnum (parser-match-char p)) c)
- (incf (parser-match-start p))))
-
-(defmethod match ((p parser) (c character))
- (match p (char-code c)))
-
-(defmethod match-until ((p parser) (s string))
- (input-length p (length s))
- (labels ((maybe-eat ()
- (or (match p s)
- (and (not (match p -1))
- (progn
- (input-char p)
- (incf (parser-match-start p))
- (maybe-eat))))))
- (maybe-eat)))
-
-(defmethod match-option ((p parser) (f function))
- (or (funcall f p)
- (parser-match-start p)))
-
-(defmacro match-not (p &body body)
- (let ((parser (gensym "PARSER-"))
- (match-start (gensym "MATCH-START-"))
- (result (gensym "RESULT-")))
- `(let* ((,parser ,p)
- (,match-start (parser-match-start ,parser))
- (,result (progn ,@body)))
- (cond ((or ,result
- (match p -1))
- (setf (parser-match-start ,parser) ,match-start)
- nil)
- (t
- (incf (parser-match-start p)))))))
-
-(defmacro match-sequence (p &body body)
- (let ((parser (gensym "PARSER-"))
- (match-start (gensym "MATCH-START-"))
- (result (gensym "RESULT-")))
- `(let* ((,parser ,p)
- (,match-start (parser-match-start ,parser))
- (,result (progn ,@body)))
- (cond (,result
- ,result)
- (t
- (setf (parser-match-start ,parser) ,match-start)
- nil)))))
-
-(defmethod match-times ((p parser) (f function) (min fixnum) (max fixnum))
- (match-sequence p
- (labels ((match-min ()
- (cond ((= 0 min)
- (match-max))
- ((funcall f p)
- (decf min)
- (decf max)
- (match-min))
- (t
- nil)))
- (match-max ()
- (cond ((and (< 0 max) (funcall f p))
- (decf max)
- (match-max))
- (t
- (parser-match-start p)))))
- (match-min))))
-
-(defmethod match-times ((p parser) (f function) (min fixnum) (max null))
- (match-sequence p
- (labels ((match-min ()
- (cond ((= 0 min)
- (match-max))
- ((funcall f p)
- (decf min)
- (match-min))
- (t
- nil)))
- (match-max ()
- (cond ((funcall f p)
- (match-max))
- (t
- (parser-match-start p)))))
- (match-min))))
-
-;; Productions
-
-(defclass comment-token (token) ())
-
-(defmethod comment-token ((p parser))
- (push-token p)
- (if (match p "/*")
- (progn (match-until p "*/")
- (make-token p 'comment-token))
- (discard-token p)))
-
-(defmethod match-newline ((p parser))
- (or (match p #\Newline)
- (match p (coerce '(#\Return #\Newline) 'string))
- (match p #\Return)
- (match p #\Linefeed)))
-
-(defmethod match-whitespace ((p parser))
- (or (match p #\Space)
- (match p #\Tab)
- (match-newline p)))
-
-(defmethod match-hex-digit ((p parser))
- (let ((c (the fixnum (parser-match-char p))))
- (when (or (<= (char-code #\0) c (char-code #\9))
- (<= (char-code #\a) c (char-code #\f))
- (<= (char-code #\A) c (char-code #\F)))
- (incf (parser-match-start p)))))
-
-(defmethod match-escape ((p parser))
- (match-sequence p
- (and (match p #\\)
- (or (when (match-times p #'match-hex-digit 1 6)
- (match-whitespace p)
- (parser-match-start p))
- (when (not (match-newline p))
- (parser-match-start p))))))
-
-(defclass whitespace-token (token) ())
-
-(defmethod whitespace-token ((p parser))
- (push-token p)
- (if (match-times p #'match-whitespace 1 nil)
- (make-token p 'whitespace-token)
- (discard-token p)))
-
-(defmethod match-ws* ((p parser))
- (match-option p #'whitespace-token))
-
-(defclass ident-token (token) ())
-
-(defmethod match-ident-char ((p parser))
- (or (match-escape p)
- (let ((c (the fixnum (parser-match-char p))))
- (when (or (<= (char-code #\a) c (char-code #\z))
- (<= (char-code #\A) c (char-code #\Z))
- (<= (char-code #\0) c (char-code #\9))
- (= (char-code #\_) c)
- (= (char-code #\-) c)
- (< #x007F c))
- (incf (parser-match-start p))))))
-
-(defmethod match-ident-char* ((p parser))
- (match-times p #'match-ident-char 0 nil))
-
-(defmethod ident-token ((p parser))
- (match-sequence p
- (push-token p)
- (match p #\-)
- (cond ((or (match-escape p)
- (let ((c (the fixnum (parser-match-char p))))
- (when (or (<= (char-code #\a) c (char-code #\z))
- (<= (char-code #\A) c (char-code #\Z))
- (= (char-code #\_) c)
- (< #x007F c))
- (incf (parser-match-start p)))))
- (match-ident-char* p)
- (make-token p 'ident-token))
- (t
- (discard-token p)))))
-
-(defclass identified-token (token)
- ((ident :initarg :ident
- :reader token-ident
- :type ident-token)))
-
-(defclass function-token (identified-token) ())
-
-(defmethod function-token ((p parser))
- (match-sequence p
- (push-token p)
- (let ((ident (ident-token p)))
- (if (and ident (match p #\())
- (make-token p 'function-token :ident ident)
- (discard-token p)))))
-
-(defclass at-keyword-token (identified-token) ())
-
-(defmethod at-keyword-token ((p parser))
- (match-sequence p
- (push-token p)
- (if (match p #\@)
- (let ((ident (ident-token p)))
- (if ident
- (make-token p 'at-keyword-token :ident ident)
- (discard-token p)))
- (discard-token p))))
-
-(defclass hash-token (token) ())
-
-(defmethod hash-token ((p parser))
- (match-sequence p
- (push-token p)
- (if (match p #\#)
- (and (match-ident-char* p)
- (make-token p 'hash-token))
- (discard-token p))))
-
-(defclass string-token (token) ())
-
-(defgeneric string-token-string (string-token))
-
-(defmethod string-token-string ((s string-token))
- (let ((string (token-string s)))
- (subseq string 1 (1- (length string)))))
-
-(defmethod match-string-char ((p parser) (end-char character))
- (or (match-sequence p
- (and (match p #\\)
- (match-newline p)))
- (match-escape p)
- (match-not p
- (or (match p end-char)
- (match p #\\)
- (match-newline p)))))
-
-(defmethod match-string ((p parser) (end-char character))
- (match-sequence p
- (match p end-char)
- (match-times p (lambda (p) (match-string-char p end-char)) 0 nil)
- (match p end-char)))
-
-(defmethod string-token ((p parser))
- (push-token p)
- (if (or (match-string p #\")
- (match-string p #\'))
- (make-token p 'string-token)))
-
-(defclass url-token (identified-token)
- ((url :initarg :url
- :reader token-url
- :type token)))
-
-(defmethod match-non-printable ((p parser))
- (let ((c (the fixnum (parser-match-char p))))
- (when (or (<= #x0000 c #x0008)
- (= #x000B c)
- (<= #x000E c #x001F)
- (= #x007F c))
- (incf (parser-match-start p)))))
-
-(defmethod match-url-unquoted-char ((p parser))
- (or (match-escape p)
- (match-not p
- (or (match p #\")
- (match p #\')
- (match p #\()
- (match p #\))
- (match p #\\)
- (match-whitespace p)
- (match-non-printable p)))))
-
-(defmethod match-url-unquoted ((p parser))
- (push-token p)
- (if (match-times p #'match-url-unquoted-char 1 nil)
- (make-token p 'token)
- (discard-token p)))
-
-(defmethod url-token ((p parser))
- (push-token p)
- (or (match-sequence p
- (let ((ident (ident-token p)))
- (and (string= "url" (token-string ident))
- (match p #\()
- (match-ws* p)
- (let ((url (or (string-token p)
- (match-url-unquoted p))))
- (match-ws* p)
- (when (match p #\))
- (make-token p (url-token :ident ident :url url)))))))
- (discard-token p)))
-
-(defclass number-token (token) ())
-
-(defmethod match-digit ((p parser))
- (let ((c (the fixnum (parser-match-char p))))
- (when (<= (char-code #\0) c (char-code #\9))
- (incf (parser-match-start p)))))
-
-(defmethod match-digit+ ((p parser))
- (match-times p #'match-digit 1 nil))
-
-(defmethod number-token ((p parser))
- (push-token p)
- (if (match-sequence p
- (and (or (match p #\-)
- (match p #\+)
- t)
- (or (match-sequence p
- (and (match p #\.)
- (match-digit+ p)))
- (match-sequence p
- (match-digit+ p)
- (match p #\.)
- (match-digit+ p))
- (match-digit+ p))
- (or (match-sequence p
- (and (or (match p #\E)
- (match p #\e))
- (or (match p #\-)
- (match p #\+)
- t)
- (match-digit+ p)))
- (parser-match-start p))))
- (make-token p 'number-token)
- (discard-token p)))
-
-(defclass numbered-token (token)
- ((number :initarg :number
- :reader token-number
- :type number-token)))
-
-(defclass dimension-token (identified-token numbered-token) ())
-
-(defmethod dimension-token ((p parser))
- (push-token p)
- (or (match-sequence p
- (let ((number (number-token p)))
- (when number
- (let ((ident (ident-token p)))
- (when ident
- (make-token p 'dimension-token
- :number number
- :ident ident))))))
- (discard-token p)))
-
-(defclass percentage-token (numbered-token) ())
-
-(defmethod percentage-token ((p parser))
- (push-token p)
- (or (match-sequence p
- (let ((number (number-token p)))
- (when number
- (when (match p #\%)
- (make-token p 'percentage-token
- :number number)))))
- (discard-token p)))
-
-(defclass unicode-range-token (token) ())
-
-(defmethod unicode-range-token ((p parser))
- (push-token p)
- (or (match-sequence p
- (and (or (match p #\u)
- (match p #\U))
- (match p #\+)
- (or (match-sequence p
- (and (match-times p #'match-hex-digit 1 6)
- (match p #\-)
- (match-times p #'match-hex-digit 1 6)))
- (match-sequence p
- (let ((start (parser-match-start p)))
- (and (match-times p #'match-hex-digit 0 5)
- (let ((digits (- (parser-match-start p) start)))
- (match-times p (lambda (p) (match p #\?))
- 1 (- 6 digits))))))
- (match-times p #'match-hex-digit 1 6))
- (make-token p 'unicode-range-token)))
- (discard-token p)))
-
-(defclass include-match-token (token) ())
-
-(defmethod include-match-token ((p parser))
- (push-token p)
- (if (match p "~=")
- (make-token p 'include-match-token)
- (discard-token p)))
-
-(defclass dash-match-token (token) ())
-
-(defmethod dash-match-token ((p parser))
- (push-token p)
- (if (match p "|=")
- (make-token p 'dash-match-token)
- (discard-token p)))
-
-(defclass prefix-match-token (token) ())
-
-(defmethod prefix-match-token ((p parser))
- (push-token p)
- (if (match p "^=")
- (make-token p 'prefix-match-token)
- (discard-token p)))
-
-(defclass suffix-match-token (token) ())
-
-(defmethod suffix-match-token ((p parser))
- (push-token p)
- (if (match p "$=")
- (make-token p 'suffix-match-token)
- (discard-token p)))
-
-(defclass substring-match-token (token) ())
-
-(defmethod substring-match-token ((p parser))
- (push-token p)
- (if (match p "*=")
- (make-token p 'substring-match-token)
- (discard-token p)))
-
-(defclass column-token (token) ())
-
-(defmethod column-token ((p parser))
- (push-token p)
- (if (match p "||")
- (make-token p 'column-token)
- (discard-token p)))
-
-(defclass cdo-token (token) ())
-
-(defmethod cdo-token ((p parser))
- (push-token p)
- (if (match p "<!--")
- (make-token p 'cdo-token)
- (discard-token p)))
-
-(defclass cdc-token (token) ())
-
-(defmethod cdc-token ((p parser))
- (push-token p)
- (if (match p "-->")
- (make-token p 'cdc-token)
- (discard-token p)))
-
-(defclass left-paren-token (token) ())
-
-(defmethod left-paren-token ((p parser))
- (push-token p)
- (if (match p #\()
- (make-token p 'left-paren-token)
- (discard-token p)))
-
-(defclass right-paren-token (token) ())
-
-(defmethod right-paren-token ((p parser))
- (push-token p)
- (if (match p #\))
- (make-token p 'right-paren-token)
- (discard-token p)))
-
-(defclass comma-token (token) ())
-
-(defmethod comma-token ((p parser))
- (push-token p)
- (if (match p #\,)
- (make-token p 'comma-token)
- (discard-token p)))
-
-(defclass colon-token (token) ())
-
-(defmethod colon-token ((p parser))
- (push-token p)
- (if (match p #\:)
- (make-token p 'colon-token)
- (discard-token p)))
-
-(defclass semicolon-token (token) ())
-
-(defmethod semicolon-token ((p parser))
- (push-token p)
- (if (match p #\;)
- (make-token p 'semicolon-token)
- (discard-token p)))
-
-(defclass [-token (token) ())
-
-(defmethod [-token ((p parser))
- (push-token p)
- (if (match p #\[)
- (make-token p '[-token)
- (discard-token p)))
-
-(defclass ]-token (token) ())
-
-(defmethod ]-token ((p parser))
- (push-token p)
- (if (match p #\])
- (make-token p ']-token)
- (discard-token p)))
-
-(defclass {-token (token) ())
-
-(defmethod {-token ((p parser))
- (push-token p)
- (if (match p #\{)
- (make-token p '{-token)
- (discard-token p)))
-
-(defclass }-token (token) ())
-
-(defmethod }-token ((p parser))
- (push-token p)
- (if (match p #\})
- (make-token p '}-token)
- (discard-token p)))
-
-(defclass eof-token (token) ())
-
-(defmethod eof-token ((p parser))
- (push-token p)
- (if (match p -1)
- (make-token p 'eof-token)
- (discard-token p)))
-
-(defclass delim-token (token) ())
-
-(defmethod delim-token ((p parser))
- (push-token p)
- (input-length p 1)
- (incf (parser-match-start p))
- (make-token p 'delim-token))
-
-;; Tokenizer
-
-(defmethod consume-token ((p parser))
- (or (whitespace-token p)
- (string-token p)
- (hash-token p)
- (suffix-match-token p)
- (left-paren-token p)
- (right-paren-token p)
- (substring-match-token p)
- (number-token p)
- (comma-token p)
- (cdc-token p)
- (comment-token p)
- (colon-token p)
- (semicolon-token p)
- (cdo-token p)
- (at-keyword-token p)
- ([-token p)
- (]-token p)
- (prefix-match-token p)
- ({-token p)
- (}-token p)
- (unicode-range-token p)
- (ident-token p)
- (dash-match-token p)
- (include-match-token p)
- (eof-token p)
- (delim-token p)))
-
-;; Lexer
-
-(defun lex-stream (stream)
- (let ((p (make-instance 'parser :stream stream))
- (document nil))
- (labels ((push-tokens ()
- (let ((token (consume-token p)))
- (push token document)
- (unless (typep token 'eof-token)
- (push-tokens)))))
- (push-tokens))
- (nreverse document)))
-
-(defun lex-string (string)
- (with-input-from-string (stream string)
- (lex-stream stream)))
-
-(defun lex-file (filespec)
- (with-open-file (stream filespec
- :element-type 'character
- :external-format :utf-8)
- (lex-stream stream)))
-
-;; Tests
-
-#+test
-(lex-string "/* hello world */ body { color: #ff0000; }")
-
-;; Parsing
-
-