git.fiddlerwoaroof.com
string-utils/string-utils.lisp
3f1c9190
 (in-package :fwoar.string-utils)
0bdecfa7
 
 (deftype array-length ()
   `(integer 0 ,array-dimension-limit))
 (deftype array-index ()
   `(integer 0 ,(1- array-dimension-limit)))
 
 (defun get-part-modifier (char string &optional count)
   (declare (optimize #+dev (debug 3) #-dev (speed 3))
25d4bb7c
            (type character char)
            (type string string))
 
0bdecfa7
   (flet ((count-splits (string)
25d4bb7c
            (declare (optimize (speed 3))
                     (type simple-string string))
            (do* ((x (the array-length 0) (1+ x))
                  (cur-char #1=(aref string x) #1#)
                  (result (the array-length 0) (if (char= cur-char char)
                                                   (1+ result)
                                                   result)))
                 ((= x (1- (length string))) (1+ result))
              (declare (type array-length result)))))
0bdecfa7
     (typecase string
       ((and string (not simple-string))
        (setf string (copy-seq string))))
     (unless count
       (setf count (count-splits string))))
25d4bb7c
 
0bdecfa7
   (let ((parts (make-array count :initial-element nil :element-type '(or string null)))
25d4bb7c
         (start-idx 0)
         (target-spot 0))
0bdecfa7
     (prog1 parts
       (dotimes (end-idx (length string))
25d4bb7c
         (when (typecase string
                 (simple-string (char= (aref string end-idx) char))
                 (string (char= (aref string end-idx) char)))
           (setf (aref parts target-spot)
                 (make-array (- end-idx start-idx) :displaced-to string :displaced-index-offset start-idx
                             :element-type 'character))
           (incf target-spot)
           (setf start-idx (1+ end-idx))
           (when (= target-spot (1- count))
             (return))))
0bdecfa7
       (when (<= start-idx (length string))
25d4bb7c
         (setf (aref parts target-spot)
               (make-array (- (length string) start-idx)
                           :displaced-to string :displaced-index-offset start-idx
                           :element-type 'character))))))
0bdecfa7
 
 ;; TODO: implement test
 (defun %split-on-char (divider string &key count (test nil))
7d20282a
   (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
                      #-dev (speed 3) #-dev(space 2))
25d4bb7c
            (type (or null array-length) count)
            (type (or null function symbol) test)
            (type character divider)
            (type string string))
0bdecfa7
   (typecase string
     ((and string (not simple-string))
      (setf string (copy-seq string))))
   (check-type string simple-string)
 
   (flet ((count-splits (string)
7d20282a
            (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
                               #-dev (speed 3) #-dev(space 2))
25d4bb7c
                     (type simple-string string))
7d20282a
            (if (/= 0 (length string))
                (do* ((x (the array-length 0) (1+ x))
                      (cur-char #1=(aref string x) #1#)
                      (result (the array-length
                                   (if (char= cur-char divider)
                                       1
                                       0))
                              (if (char= cur-char divider)
                                  (1+ result)
                                  result)))
                     ((= x (1- (length string))) (1+ result))
                  (declare (type array-length result)))
                0))
25d4bb7c
          (find-pos (start-pos)
7d20282a
            (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
                      #-dev (speed 3) #-dev(space 2))
25d4bb7c
                     (type array-index start-pos))
            (etypecase test
              (function (position divider string :start start-pos :test test))
              (null (position divider string :start start-pos))
              (symbol (position divider string :start start-pos :test (symbol-function test))))))
0bdecfa7
 
     (unless count
       (setf count (count-splits string)))
 
     (check-type count array-length)
7d20282a
     (let ((parts (make-array (max count 1) :fill-pointer 0))
25d4bb7c
           (start-pos (the fixnum 0)))
0bdecfa7
       (declare (dynamic-extent start-pos))
       (prog1 parts
25d4bb7c
         (loop 
            for end-pos = (find-pos start-pos)
            while end-pos 
            do
              (vector-push (subseq string start-pos end-pos) parts)
              (setf start-pos (1+ end-pos))
            while (< (length parts) (1- count))
            finally
e828dd49
              (cond ((or (and end-pos count)
                         (< start-pos (length string)))
7d20282a
                     (vector-push (subseq string start-pos)
                                  parts))
                    ((not end-pos)
                     (vector-push "" parts))))))))
0bdecfa7
 
 (defun %split-on-string (divider string &key count (test nil))
   (declare (optimize #+dev (debug 3) (speed 3))
25d4bb7c
            (type string divider string)
            (type (or null function symbol) test)
            (type (or null array-index) count))
0bdecfa7
   (flet ((%search (start-pos)
25d4bb7c
            (declare (optimize (speed 3))
                     (type array-index start-pos)
                     (inline))
            (typecase divider
              (simple-string (typecase string
                               (simple-string (search divider string :start2 start-pos))
                               (string (search divider string :start2 start-pos))))
              (string (search divider string :start2 start-pos))))
          (%search-with-test (start-pos test)
            (declare (optimize (speed 3))
                     (type array-index start-pos)
                     (type function test)
                     (inline))
            (typecase divider
              (simple-string (typecase string
                               (simple-string (search divider string :start2 start-pos :test test))
                               (string (search divider string :start2 start-pos :test test))))
              (string (search divider string :start2 start-pos :test test)))))
0bdecfa7
     (declare (dynamic-extent (function %search)
25d4bb7c
                              (function %search-with-test)))
0bdecfa7
     (let ((num-parts (the array-length 0))
25d4bb7c
           (pattern-length (the array-length (length divider)))
           (search-test (typecase test
                          (function test)
                          (null)
                          (symbol (symbol-function test))))
           (parts (make-array (if count count 100)
                              :adjustable t
                              :fill-pointer 0))
           (start-pos 0))
0bdecfa7
       (loop 
25d4bb7c
          for end-pos = (typecase search-test
                          (function (%search-with-test start-pos search-test))
                          (null (%search start-pos)))
          do
            (vector-push-extend (subseq string start-pos end-pos) parts) 
            (incf (the array-length num-parts))
          while end-pos
          do (setf start-pos (the array-length (+ pattern-length end-pos)))
          until (and count (>= (1+ num-parts) count))
          finally
            (when (and count end-pos)
              (vector-push-extend (subseq string (+ pattern-length end-pos)) parts))
            (return parts)))))
0bdecfa7
 
0052b5eb
 (defun split (divider string &key count (test nil) (type nil type-p))
0bdecfa7
   (declare (optimize #+dev (debug 3) (speed 3) (space 3)))
   (unless test
     (setf test
25d4bb7c
           (typecase divider
             (string 'equal)
             (t 'eql))))
0052b5eb
   (let ((result (etypecase divider
25d4bb7c
                   (character (%split-on-char divider string :count count :test test))
                   (string (if (= 1 (length divider))
                               (%split-on-char (aref divider 0) string :count count :test test)
                               (%split-on-string divider string :count count :test test))))))
0052b5eb
     (if type-p
25d4bb7c
         (coerce result type)
         result)))