git.fiddlerwoaroof.com
string-utils/string-utils.lisp
3f1c9190
 (defpackage :fwoar.string-utils
   (:use :cl)
   (:export #:get-part-modifier #:split))
 
 (in-package :fwoar.string-utils)
0bdecfa7
 
 (deftype array-length ()
   `(integer 0 ,array-dimension-limit))
 (deftype array-index ()
   `(integer 0 ,(1- array-dimension-limit)))
 
 (defun get-part-modifier (char string &optional count)
   (declare (optimize #+dev (debug 3) #-dev (speed 3))
 	   (type character char)
 	   (type string string))
   (flet ((count-splits (string)
 	   (declare (optimize (speed 3))
 		    (type simple-string string))
 	   (do* ((x (the array-length 0) (1+ x))
 		 (cur-char #1=(aref string x) #1#)
 		 (result (the array-length 0) (if (char= cur-char char)
 						  (1+ result)
 						  result)))
 		((= x (1- (length string))) (1+ result))
 	     (declare (type array-length result)))))
     (typecase string
       ((and string (not simple-string))
        (setf string (copy-seq string))))
     (unless count
       (setf count (count-splits string))))
   (let ((parts (make-array count :initial-element nil :element-type '(or string null)))
 	(start-idx 0)
 	(target-spot 0))
     (prog1 parts
       (dotimes (end-idx (length string))
 	(when (typecase string
 		(simple-string (char= (aref string end-idx) char))
 		(string (char= (aref string end-idx) char)))
 	  (setf (aref parts target-spot)
 		(make-array (- end-idx start-idx) :displaced-to string :displaced-index-offset start-idx
 			    :element-type 'character))
 	  (incf target-spot)
 	  (setf start-idx (1+ end-idx))
 	  (when (= target-spot (1- count))
 	    (return))))
       (when (<= start-idx (length string))
 	(setf (aref parts target-spot)
 	      (make-array (- (length string) start-idx)
 			  :displaced-to string :displaced-index-offset start-idx
 			  :element-type 'character))))))
 
 ;; TODO: implement test
 (defun %split-on-char (divider string &key count (test nil))
   (declare (optimize #+dev (debug 3) (speed 3) (space 2))
 	   (type (or null array-length) count)
 	   (type (or null function symbol) test)
 	   (type character divider)
 	   (type string string))
   (typecase string
     ((and string (not simple-string))
      (setf string (copy-seq string))))
   (check-type string simple-string)
 
   (flet ((count-splits (string)
 	   (declare (optimize (speed 3))
 		    (type simple-string string))
 	   (do* ((x (the array-length 0) (1+ x))
 		 (cur-char #1=(aref string x) #1#)
 		 (result (the array-length 0) (if (char= cur-char divider)
 						  (1+ result)
 						  result)))
 		((= x (1- (length string))) (1+ result))
 	     (declare (type array-length result))))
 	 (find-pos (start-pos)
 	   (declare (optimize (speed 3))
 		    (type array-index start-pos))
 	   (etypecase test
 	     (function (position divider string :start start-pos :test test))
 	     (null (position divider string :start start-pos))
 	     (symbol (position divider string :start start-pos :test (symbol-function test))))))
 
     (unless count
       (setf count (count-splits string)))
 
     (check-type count array-length)
     (let ((parts (make-array (1+ count) :fill-pointer 0))
 	  (start-pos (the fixnum 0)))
       (declare (dynamic-extent start-pos))
       (prog1 parts
 	(loop 
 	   for end-pos = (find-pos start-pos)
 	   while end-pos 
3fd4e1b4
 	   for num-parts from 0 to count
0bdecfa7
 	   do
 	     (vector-push (subseq string start-pos end-pos) parts)
 	     (setf start-pos (1+ end-pos))
 	   finally
 	     (when (< start-pos (length string))
 	       (vector-push (subseq string start-pos)
3fd4e1b4
 			    parts))
 	     (when (eql divider (aref string (1- (length string))))
 	       (vector-push (make-string 0)
0bdecfa7
 			    parts)))))))
 
 (defmacro twice (&body body)
   `(progn ,@body
 	  ,@body))
 
 
 (defun %split-on-string (divider string &key count (test nil))
   (declare (optimize #+dev (debug 3) (speed 3))
 	   (type string divider string)
 	   (type (or null function symbol) test)
 	   (type (or null array-index) count))
   (flet ((%search (start-pos)
 	   (declare (optimize (speed 3))
 		    (type array-index start-pos)
 		    (inline))
 	   (typecase divider
 	     (simple-string (typecase string
 			      (simple-string (search divider string :start2 start-pos))
 			      (string (search divider string :start2 start-pos))))
 	     (string (search divider string :start2 start-pos))))
 	 (%search-with-test (start-pos test)
 	   (declare (optimize (speed 3))
 		    (type array-index start-pos)
 		    (type function test)
 		    (inline))
 	   (typecase divider
 	     (simple-string (typecase string
 			      (simple-string (search divider string :start2 start-pos :test test))
 			      (string (search divider string :start2 start-pos :test test))))
 	     (string (search divider string :start2 start-pos :test test)))))
     (declare (dynamic-extent (function %search)
 			     (function %search-with-test)))
     (let ((num-parts (the array-length 0))
 	  (pattern-length (the array-length (length divider)))
 	  (search-test (typecase test
 			 (function test)
 			 (null)
 			 (symbol (symbol-function test))))
 	  (parts (make-array (if count count 100)
 			     :adjustable t
 			     :fill-pointer 0))
 	  (start-pos 0))
       (loop 
 	 for end-pos = (typecase search-test
 			 (function (%search-with-test start-pos search-test))
 			 (null (%search start-pos)))
 	 do
 	   (vector-push-extend (subseq string start-pos end-pos) parts) 
 	   (incf (the array-length num-parts))
 	 while end-pos
 	 do (setf start-pos (the array-length (+ pattern-length end-pos)))
 	 until (and count (>= (1+ num-parts) count))
 	 finally
 	   (when (and count end-pos)
 	     (vector-push-extend (subseq string (+ pattern-length end-pos)) parts))
 	   (return parts)))))
 
0052b5eb
 (defun split (divider string &key count (test nil) (type nil type-p))
0bdecfa7
   (declare (optimize #+dev (debug 3) (speed 3) (space 3)))
   (unless test
     (setf test
 	  (typecase divider
 	    (string 'equal)
 	    (t 'eql))))
0052b5eb
   (let ((result (etypecase divider
 		  (character (%split-on-char divider string :count count :test test))
 		  (string (if (= 1 (length divider))
 			      (%split-on-char (aref divider 0) string :count count :test test)
 			      (%split-on-string divider string :count count :test test))))))
     (if type-p
 	(coerce result type)
 	result)))