Round 2 based on the chained lexer from "Higher Order Perl"
Code:
(defun return-matched-and-unmatched-strings (input-string search-pattern token-label make-token-function)
(let ((outlist nil)
(flag nil)
(last-index 0))
;;Seperates the numeric indices return from all-matches into chunks of chars
(mapc (lambda (x)
(if flag
(progn (push
(funcall make-token-function (subseq input-string last-index x) token-label)
outlist)
(setf flag nil))
(progn (push (subseq input-string last-index x) outlist)
(setf flag t)))
(setf last-index x)) (cl-ppcre:all-matches search-pattern input-string))
(push (subseq input-string last-index) outlist)
;;return in the order tokens were found
(nreverse (remove-if (lambda (x)
(and (stringp x)
(string= x ""))) outlist))))
(defun lexer (input token-label search-pattern make-token-function)
(mapcan (lambda (x)
(cond ((listp x) (list x))
((stringp x) (return-matched-and-unmatched-strings x search-pattern token-label make-token-function))))
input))
;(tokenizer "string" '((:name pattern func)
; (:name2 pattern2 func2)))
; =>
;(lexer (lexer (list "string") :name pattern func) :name2 pattern2 func2)
;let input = (list string)
;mapc lambda patterns
;;(setf input (lexer input (car x) (cadr x) (caadr x)))
;input
(defun tokenizer (input-string token-lists)
"Takes a string and a list of token patterns and returns the results of chain lexing the patterns.
Patterns are in the format (:name regex make-token-function)
Examples:
(tokenizer \"1 * 6 + 7\"
(list (list :whitespace \"\\\\s+\" (lambda (x y) \"\"))
(list :number \"[0-9]+\" (lambda (x y) (list y x)))
(list :operator \"[*/+-]\" (lambda (x y) (list y x)))))
((:NUMBER \"1\") (:OPERATOR \"*\") (:NUMBER \"6\") (:OPERATOR \"+\") (:NUMBER \"7\"))"
(let ((input (list input-string)))
(mapc (lambda (x)
(setf input (lexer input (car x) (cadr x) (caddr x)))) token-lists)
input))