(defpackage :levehnstein (:shadowing-import-from :data-lens :pick) (:use :cl :fw.lu :data-lens)) (in-package :levehnstein) ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; c at h at ;;; ;;; ;;; ;;; co t ho t ;;; ;;; ;;; ;;; oat coat ;;; ;;; ;;; ;;; ho g dog ;;; ;;; ;;; ;;; ;;; ;;;; ;;; ;;; ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; ;;; (defun collect-by-distance (pivot items) (let ((items (sort (copy-seq items) '< :key (op (levehnstein _ pivot))))) (nreverse (reduce (destructuring-lambda (acc (distance item)) (if acc (if (= (caar acc) distance) (list* (list* (caar acc) item (cdar acc)) (cdr acc)) (list* (list distance item) acc)) (list (list distance item)))) (map 'list (juxt (op (levehnstein _ pivot)) 'identity) items) :initial-value ())))) (defun levehnstein-1-p (a b) (let-by-length (a b) (or (equal a b) (cond ((= (length b) (length a)) (= (loop for x from 0 below (length a) when (not (eql (elt a x) (elt b x))) count 1) 1)) ((= (- (length b) (length a)) 1) (some 'identity (map 'list (op (equal a (remove _1 b :start _2 :count 1))) b (alexandria:iota (length b))))))))) (defun build-chain (items) (if (null items) () (let* ((selection (find-if (op (levehnstein-1-p _1 (car items))) (fisher-yates (cdr items)))) (next (when selection (build-chain (cons selection (without selection (cdr items))))))) (if selection (list* (list* (car items) (car next)) (cdr next)) (list* (list (car items)) (build-chain (cdr items))))))) (defun get-edits (items) (mapcar (op (cons _1 (mapcar (lambda (x) (levehnstein-1 _1 x)) items))) items)) #+null (defgeneric levehnstein-1 (a b) (:method ((a list) (b list)) (if (or (null a) (listp (car a))) (call-next-method) (= (levehnstein (car (last a)) (car b)) 1))) (:method (a b) (= (levehnstein a b) 1))) (defun fisher-yates (seq) (let* ((shuffled (alexandria:copy-sequence 'vector seq)) (n (length shuffled))) (loop for i from (1- n) downto 1 for j = (random (1+ i)) do (rotatef (elt shuffled j) (elt shuffled i))) (alexandria:copy-sequence (type-of seq) shuffled))) (defun levehnstein-sum (seq) (apply #'+ (mapcar #'car (cdr (funcall (derive 'levehnstein) seq))))) (defun pick-sequence (seq1 seq2) (fw.lu:if-let* ((seq1-score (levehnstein-sum seq1)) (seq2-score (levehnstein-sum seq2)) (_ (< seq1-score seq2-score))) (values seq1 seq1-score) (values seq2 seq2-score))) (defun edit-distance-1-p (a b) (if (equal a b) t (if (eql (elt a 0) (elt b 0)) (edit-distance-1-p (subseq a 1) (subseq b 1)) (equal (subseq a 1) (subseq b 1))))) (defun find-highest-out-of-place (seq) (multiple-value-bind (value idx) (funcall (alexandria:compose (maximizing #'< #'car) #'cdr (derive 'levehnstein)) seq) (values (1+ idx) (cdr value)))) (defun swap-highest (seq) (let ((oop (find-highest-out-of-place seq))) (concatenate 'list (subseq seq oop) (subseq seq 0 oop)))) (defun crossover (seq) (if (null seq) seq (if (oddp (length seq)) (append (crossover (cdr seq)) (list (car seq))) (let ((candidate (1+ (random (1- (length seq)))))) (append (list (elt seq candidate)) (subseq seq 1 candidate) (list (elt seq 0)) (subseq seq (1+ candidate))))))) (defun evolve (seq steps fitness chance) (loop repeat steps for candidate = seq then (if (= 0 (random chance)) (pick-sequence (and i-seq (swap-highest i-seq)) (pick-sequence (crossover i-seq) i-seq)) i-seq) for (i-seq score) = (multiple-value-list (funcall fitness seq (fisher-yates seq))) finally (return (values i-seq score)))) (defun evolve-halves (seq steps fitness chance) (let* ((halfway (random (length seq))) (first-part (subseq seq 0 halfway)) (second-part (subseq seq halfway)) (result (append (pick-sequence first-part (evolve (fisher-yates first-part) steps fitness chance)) (pick-sequence second-part (evolve (fisher-yates second-part) steps fitness chance))))) (evolve (pick-sequence seq result) steps fitness chance))) (defun aggregate-items (items) (loop with candidates = items for (item . remainder) on items while remainder do (setf candidates (remove item candidates :test 'equal)) (format t "~&~s ~s~%" item remainder) when (and item candidates) collect (loop for match in candidates when (and match (levehnstein-1 item match)) do (format t "~&~s ~s~%" item match) (return (append (alexandria:ensure-list item) (alexandria:ensure-list match)))))) (defmacro let-order ((min max) (a b) &body body) (alexandria:once-only (a b) `(destructuring-bind (,min ,max) (if (<= (length ,a) (length ,b)) (list ,a ,b) (list ,b ,a)) ,@body))) (defmacro let-by-length ((a b) &body body) `(let-order (,a ,b) (,a ,b) ,@body)) (defun levehnstein-equal (a b) (if (and (= (length a) (length b)) (> (length a) 0)) (if (eql (elt a 0) (elt b 0)) (levehnstein-equal (subseq a 1 nil) (subseq b 1 nil)) (1+ (levehnstein-equal (subseq a 1 nil) (subseq b 1 nil)))) 0)) (defun window (seq len &optional acc) (if (= (length seq) len) (cons seq acc) (window (subseq seq 1 nil) len (cons (subseq seq 0 len) acc)))) (defun minimal-levehnstein (a b) (let-order (a b) (a b) (apply #'min (mapcar (op (levehnstein-equal a _)) (window b (length a)))))) (defun align-things (a b) (let-by-length (a b) (cond ((equal (length a) (length b)) (values (coerce a 'list) (coerce b 'list))) ((equal a "") (values (make-sequence 'list (length b) :initial-element nil) (coerce b 'list))) ((equal (elt a 0) (elt b 0)) (multiple-value-bind (as bs) (align-things (subseq a 1) (subseq b 1)) (values (cons (elt a 0) as) (cons (elt b 0) bs)))) (t (multiple-value-bind (as bs) (align-things a (subseq b 1)) (values (cons nil as) (cons (elt b 0) bs))))))) (defun levehnstein (a b) (min (+ (minimal-levehnstein (coerce a 'list) (coerce b 'list)) (abs (- (length a) (length b)))) (multiple-value-call 'levehnstein-equal (align-things a b)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; #+nil (every 'identity (list (= (levehnstein "kitten" "sitting") 3) (= (levehnstein "closure" "clojure") (levehnstein "clojure" "closure") 1) (= (levehnstein "xyx" "xyyyx") 2) (= (levehnstein "" "123456") 6) (= (levehnstein "1234" "02345") 2) (= (levehnstein "abcd" "ad") 2) (= (levehnstein "Clojure" "Clojure") (levehnstein "" "") 0) (= (levehnstein "ttttattttctg" "tcaaccctaccat") 10) (= (levehnstein "gaattctaatctc" "caaacaaaaaattt") 9))) #+nil (defun levehnstein (a b) (labels ((window-internal (seq len acc) (if (= (length seq) len) (cons seq acc) (window-internal (subseq seq 1 nil) len (cons (subseq seq 0 len) acc)))) (min-offset-internal (haystack needle &optional (x 0) min-x min-dist) (if (null haystack) (list min-x min-dist (length needle)) (let* ((search (car haystack)) (dist (levehnstein needle search))) (destructuring-bind (min-x min-dist) (if (or (null min-dist) (< dist min-dist)) (list x dist) (list min-x min-dist)) (min-offset-internal (cdr haystack) needle (1+ x) min-x min-dist))))) (min-offset (needle haystack) (min-offset-internal (window-internal haystack (length needle) nil) needle)) (levehnstein-head (len a b) (levehnstein (subseq a 0 len) (subseq b 0 len))) (min-offset-tail (offset a b) (min-offset (subseq a offset) (subseq b offset))) (map-reduce (comb tr init seq1 seq2) (reduce (lambda (acc next) (funcall comb acc (apply tr next))) (map 'list 'list seq1 seq2) :initial-value init)) (difference-step (char-a char-b) (if (eql char-a char-b) 0 1)) (uneven-loop (a b &optional (x 1) (min-x x) min-total-dist) (destructuring-bind (tail-offset dist overlap) (min-offset-tail x a b) (let* ((head-distance (levehnstein-head x a b)) (leftover-tail (- (- (length b) x) (+ overlap tail-offset))) (total-dist (+ head-distance tail-offset dist leftover-tail))) (destructuring-bind (min-total-dist min-x) (if (< total-dist (or min-total-dist total-dist)) (list total-dist x) (list (or min-total-dist total-dist) min-x)) (if (= x (1- (length a))) min-total-dist (uneven-loop a b (1+ x) min-x min-total-dist))))))) (cond ((> (length a) (length b)) (levehnstein b a)) ((equal a "") (length b)) ((equal (subseq a 1) b) 1) ((equal (subseq b 1) a) 1) ((= (length a) (length b)) (map-reduce #'+ #'difference-step 0 a b)) (t (uneven-loop a b))))) (defun without (el items) (remove el items :test 'equal)) (defun all-pairs (items) (map 'list (lambda (item) (map 'list (lambda (item-2) (list item item-2)) (without item items))) items)) (defun make-word-graph (words) (funcall (alexandria:compose (op (alexandria:write-string-into-file _ "/tmp/foo.dot" :if-exists :supersede)) (op (format nil "~&graph {~%~{~a~%~}}~%" _)) (op (remove-duplicates _ :test 'equal)) (over (op (format nil "~{~s~^ -- ~};" (sort (subseq _ 0 2) 'string<)))) (op (apply 'concatenate 'list _)) (over (alexandria:compose (lambda (x) (remove-if-not (op (= _ 1)) x :key (op (car (last _))))) (lambda (x) (sort x '< :key (op (car (last _))))) (over (lambda (it) (concatenate 'list it (list (apply 'levehnstein it))))))) 'all-pairs) words))