git.fiddlerwoaroof.com
string-utils/split.lisp
7ad97f91
 (in-package :fwoar.string-utils)
 
 (deftype array-length ()
   `(integer 0 ,array-dimension-limit))
 (deftype array-index ()
   `(integer 0 ,(1- array-dimension-limit)))
 
 (defun get-part-modifier (char string &optional count)
   (declare (optimize #+dev (debug 3) #-dev (speed 3))
            (type character char)
            (type string string))
 
   (flet ((count-splits (string)
            (declare (optimize (speed 3))
                     (type simple-string string))
            (do* ((x (the array-length 0) (1+ x))
                  (cur-char #1=(aref string x) #1#)
                  (result (the array-length 0) (if (char= cur-char char)
                                                   (1+ result)
                                                   result)))
                 ((= x (1- (length string))) (1+ result))
              (declare (type array-length result)))))
     (typecase string
       ((and string (not simple-string))
        (setf string (copy-seq string))))
     (unless count
       (setf count (count-splits string))))
 
   (let ((parts (make-array count :initial-element nil :element-type '(or string null)))
         (start-idx 0)
         (target-spot 0))
     (prog1 parts
       (dotimes (end-idx (length string))
         (when (typecase string
                 (simple-string (char= (aref string end-idx) char))
                 (string (char= (aref string end-idx) char)))
           (setf (aref parts target-spot)
60766c0a
                 (make-array (- end-idx start-idx)
                             :displaced-to string :displaced-index-offset start-idx
7ad97f91
                             :element-type 'character))
           (incf target-spot)
           (setf start-idx (1+ end-idx))
           (when (= target-spot (1- count))
             (return))))
       (when (<= start-idx (length string))
         (setf (aref parts target-spot)
               (make-array (- (length string) start-idx)
                           :displaced-to string :displaced-index-offset start-idx
                           :element-type 'character))))))
 
 ;; TODO: implement test
 (defun %split-on-char (divider string &key count (test nil))
   (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
                      #-dev (speed 3) #-dev(space 2))
            (type (or null array-length) count)
            (type (or null function symbol) test)
            (type character divider)
            (type string string))
   (typecase string
     ((and string (not simple-string))
      (setf string (copy-seq string))))
   (check-type string simple-string)
 
   (flet ((count-splits (string)
            (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
                               #-dev (speed 3) #-dev(space 2))
                     (type simple-string string))
            (if (/= 0 (length string))
                (do* ((x (the array-length 0) (1+ x))
                      (cur-char #1=(aref string x) #1#)
                      (result (the array-length
                                   (if (char= cur-char divider)
                                       1
                                       0))
                              (if (char= cur-char divider)
                                  (1+ result)
                                  result)))
                     ((= x (1- (length string))) (1+ result))
                  (declare (type array-length result)))
                0))
          (find-pos (start-pos)
            (declare (optimize #+dev (debug 3) #+dev (speed 0) #+dev (space 0)
60766c0a
                               #-dev (speed 3) #-dev(space 2))
7ad97f91
                     (type array-index start-pos))
            (etypecase test
              (function (position divider string :start start-pos :test test))
              (null (position divider string :start start-pos))
              (symbol (position divider string :start start-pos :test (symbol-function test))))))
 
     (unless count
       (setf count (count-splits string)))
 
     (check-type count array-length)
60766c0a
     (let ((parts (make-array (max (1+ count) 1) :fill-pointer 0))
7ad97f91
           (start-pos (the fixnum 0)))
       (declare (dynamic-extent start-pos))
       (prog1 parts
         (loop 
60766c0a
           for end-pos = (find-pos start-pos)
           while end-pos 
           do
7ad97f91
              (vector-push (subseq string start-pos end-pos) parts)
              (setf start-pos (1+ end-pos))
60766c0a
           while (< (length parts) count)
           finally
7ad97f91
              (cond ((or (and end-pos count)
                         (< start-pos (length string)))
                     (vector-push (subseq string start-pos)
                                  parts))
                    ((not end-pos)
                     (vector-push "" parts))))))))
 
 (defun %split-on-string (divider string &key count (test nil))
   (declare (optimize #+dev (debug 3) (speed 3))
            (type string divider string)
            (type (or null function symbol) test)
            (type (or null array-index) count))
   (flet ((%search (start-pos)
            (declare (optimize (speed 3))
                     (type array-index start-pos)
                     (inline))
            (typecase divider
              (simple-string (typecase string
                               (simple-string (search divider string :start2 start-pos))
                               (string (search divider string :start2 start-pos))))
              (string (search divider string :start2 start-pos))))
          (%search-with-test (start-pos test)
            (declare (optimize (speed 3))
                     (type array-index start-pos)
                     (type function test)
                     (inline))
            (typecase divider
              (simple-string (typecase string
                               (simple-string (search divider string :start2 start-pos :test test))
                               (string (search divider string :start2 start-pos :test test))))
              (string (search divider string :start2 start-pos :test test)))))
     (declare (dynamic-extent (function %search)
                              (function %search-with-test)))
     (let ((num-parts (the array-length 0))
           (pattern-length (the array-length (length divider)))
           (search-test (typecase test
                          (function test)
                          (null)
                          (symbol (symbol-function test))))
           (parts (make-array (if count count 100)
                              :adjustable t
                              :fill-pointer 0))
           (start-pos 0))
       (loop 
60766c0a
         for end-pos = (typecase search-test
                         (function (%search-with-test start-pos search-test))
                         (null (%search start-pos)))
         do
7ad97f91
            (vector-push-extend (subseq string start-pos end-pos) parts) 
            (incf (the array-length num-parts))
60766c0a
         while end-pos
         do (setf start-pos (the array-length (+ pattern-length end-pos)))
         until (and count (>= num-parts count))
         finally
7ad97f91
            (when (and count end-pos)
              (vector-push-extend (subseq string (+ pattern-length end-pos)) parts))
            (return parts)))))
 
 (defun split (divider string &key count (test nil) (type nil type-p))
   (declare (optimize #+dev (debug 3) (speed 3) (space 3)))
   (unless test
     (setf test
           (typecase divider
             (string 'equal)
             (t 'eql))))
   (let ((result (etypecase divider
                   (character (%split-on-char divider string :count count :test test))
                   (string (if (= 1 (length divider))
                               (%split-on-char (aref divider 0) string :count count :test test)
                               (%split-on-string divider string :count count :test test))))))
     (if type-p
         (coerce result type)
         result)))