Edit

thodg/re/re.lisp

Branch :

  • Show log

    Commit

  • Author : Thomas de Grivel
    Date : 2015-04-11 18:01:52
    Hash : 2a559c50
    Message : RE

  • re.lisp
  • ;;
    ;;  re  -  Simple syntax for regular expressions in Common Lisp
    ;;
    ;;  Copyright 2014 Thomas de Grivel <thomas@lowh.net>
    ;;
    ;;  Permission to use, copy, modify, and distribute this software for any
    ;;  purpose with or without fee is hereby granted, provided that the above
    ;;  copyright notice and this permission notice appear in all copies.
    ;;
    ;;  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
    ;;  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    ;;  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
    ;;  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    ;;  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    ;;  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    ;;  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    ;;
    
    (in-package :cl-user)
    
    (defpackage #:re
      (:nicknames #:cl-re)
      (:use :cl)
      (:export
       #:disable-re-syntax
       #:do-re-groups
       #:do-re-matches
       #:enable-re-syntax
       #:in-re-readtable
       #:re
       #:re-bind
       #:re-match
       #:re-matches
       #:re-readtable
       #:re-split
       #:re-subst
       #:re-subst*
       #:re-quote))
    
    (in-package #:re)
    
    ;;  CL-PPCRE sugar. All exported symbols are prefixed with RE-
    ;;  so (USE-PACKAGE #:RE) makes sense.
    
    (defun re (re &key
    		case-insensitive-mode
    		multi-line-mode
    		single-line-mode
    		extended-mode
    		destructive)
      (cl-ppcre:create-scanner re
    			   :case-insensitive-mode case-insensitive-mode
    			   :multi-line-mode multi-line-mode
    			   :single-line-mode single-line-mode
    			   :extended-mode extended-mode
    			   :destructive destructive))
    
    (defun re-match (re string &key
    			     (start 0)
    			     (end (length string))
    			     real-start-pos)
      (cl-ppcre:scan re string
    		 :start start
    		 :end end
    		 :real-start-pos real-start-pos))
    
    (defun re-matches (re string &key
    			     (start 0)
    			     (end (length string))
    			     sharedp)
      (cl-ppcre:all-matches-as-strings re string
    		 :start start
    		 :end end
    		 :sharedp sharedp))
    
    (defmacro do-re-matches (re (var) string &body body)
      (let ((n (gensym "N-")))
        `(let ((,n 0))
           (cl-ppcre:do-matches-as-strings (,var ,re ,string (when (< 0 ,n)
    							   ,n))
    	 (incf ,n)
    	 ,@body))))
    
    (defmacro re-bind (re var-list string &body body)
      `(cl-ppcre:register-groups-bind ,var-list (,re ,string)
         ,@(or body `((values ,@var-list)))))
    
    (defmacro do-re-groups (re var-list string &body body)
      `(cl-ppcre:do-register-groups ,var-list (,re ,string)
         ,@body))
    
    (defun re-split (re string &key
                                 (start 0)
                                 (end (length string))
                                 limit
                                 registers
                                 unmatched
                                 shared)
      (cl-ppcre:split re string
                      :start start
                      :end end
                      :limit limit
                      :with-registers-p registers
                      :omit-unmatched-p (not unmatched)
                      :sharedp shared))
    
    (defun re-subst (re subst string &key
    				   (start 0)
    				   (end (length string))
    				   preserve-case
    				   (element-type 'character))
      (cl-ppcre:regex-replace-all re string subst
    			      :start start
    			      :end end
    			      :preserve-case preserve-case
    			      :element-type element-type))
    
    (defun re-subst* (string &rest re-subst*)
      (if (endp re-subst*)
          string
          (destructuring-bind (re subst &rest rest) re-subst*
    	(apply #'re-subst* (re-subst re subst string) rest))))
    
    ;;  Quoting
    
    (defun re-quote (string &key (start 0) (end (length string)))
      (cl-ppcre:quote-meta-chars string :start start :end end))
    
    ;;  Reader syntax
    
    (defun re-mode-match (re)
      `(lambda (str)
         (re-match ,re str)))
    
    (defun re-mode-subst (re subst)
      `(lambda (str)
         (re-subst ,re ,subst str)))
    
    (defvar *re-modes* '((nil identity 1)
    		     (#\m re-mode-match 1)
    		     (#\s re-mode-subst 2)))
    
    (defun read-delimited-parts (count delimiter stream)
      "Reads COUNT strings separated by a DELIMITER character, from STREAM.
    The delimiter and '\' can be escaped with '\'.
    Other characters will not be escaped and '\' will be part of the string."
      (loop repeat count
            collect (with-output-to-string (out)
    		  (loop for c = (read-char stream)
    		        until (char= c delimiter)
    		     do (write-char (if (and (char= #\\ c)
    					     (let ((p (peek-char nil stream)))
    					       (or (char= delimiter p)
    						   (char= #\\ p))))
    					(read-char stream)
    					c)
    				    out)))))
    
    (defun read-re (stream &optional char n)
      (declare (ignore char n))
      (let* ((char (read-char stream))
    	 (mode (assoc char *re-modes*))
    	 (delimiter (if mode (read-char stream) char)))
        (destructuring-bind (fun count) (cdr (or mode (assoc nil *re-modes*)))
          (apply fun (read-delimited-parts count delimiter stream)))))
    
    (defvar *previous-readtables* nil)
    
    (defun re-readtable (&key merge (char #\#) (subchar #\~))
      (let ((rt (copy-readtable merge)))
        (if subchar
    	(set-dispatch-macro-character char subchar 'read-re rt)
    	(set-macro-character char 'read-re nil rt))
        rt))
    
    (defmacro in-re-readtable (&key merge (char #\#) (subchar #\~))
      `(eval-when (:compile-toplevel :load-toplevel :execute)
         (setq *readtable* (re-readtable :merge ,merge
    				     :char ,char
    				     :subchar ,subchar))))
    
    (defmacro enable-re-syntax (&optional (char #\#) (subchar #\~))
      `(eval-when (:compile-toplevel :load-toplevel :execute)
        (push *readtable* *previous-readtables*)
        (setq *readtable* (re-readtable :char ,char :subchar ,subchar))))
    
    (defmacro disable-re-syntax ()
      '(eval-when (:compile-toplevel :load-toplevel :execute)
        (when *previous-readtables*
          (setq *readtable* (pop *previous-readtables*)))))