(defpackage :levehnstein
(:shadowing-import-from :data-lens :pick)
(:use :cl
:fw.lu
:data-lens))
(in-package :levehnstein)
;;; ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; ;;;
;;; ;;;
;;; ;;;
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;
;;; ;;; ;;; ;;; ;;; ;;; ;;; ;;;
;;; ;;; c at h at ;;; ;;;
;;; ;;; co t ho t ;;; ;;;
;;; ;;; oat coat ;;; ;;;
;;; ;;; ho g dog ;;; ;;;
;;; ;;; ;;; ;;;; ;;; ;;; ;;; ;;;
;;; ;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;
;;; ;;;
;;; ;;;
;;; ;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; ;;;
(defun collect-by-distance (pivot items)
(let ((items (sort (copy-seq items) '<
:key (op (levehnstein _ pivot)))))
(nreverse
(reduce (destructuring-lambda (acc (distance item))
(if acc
(if (= (caar acc) distance)
(list* (list* (caar acc) item (cdar acc))
(cdr acc))
(list* (list distance item) acc))
(list (list distance item))))
(map 'list (juxt (op (levehnstein _ pivot))
'identity)
items)
:initial-value ()))))
(defun levehnstein-1-p (a b)
(let-by-length (a b)
(or (equal a b)
(cond ((= (length b)
(length a))
(= (loop
for x from 0 below (length a)
when (not (eql (elt a x)
(elt b x)))
count 1)
1))
((= (- (length b)
(length a))
1)
(some 'identity
(map 'list
(op (equal a
(remove _1 b
:start _2
:count 1)))
b (alexandria:iota (length b)))))))))
(defun build-chain (items)
(if (null items)
()
(let* ((selection (find-if (op (levehnstein-1-p _1 (car items)))
(fisher-yates (cdr items))))
(next (when selection
(build-chain (cons selection
(without selection
(cdr items)))))))
(if selection
(list* (list* (car items) (car next))
(cdr next))
(list* (list (car items))
(build-chain (cdr items)))))))
(defun get-edits (items)
(mapcar (op (cons _1
(mapcar (lambda (x) (levehnstein-1 _1 x))
items)))
items))
#+null
(defgeneric levehnstein-1 (a b)
(:method ((a list) (b list))
(if (or (null a) (listp (car a)))
(call-next-method)
(= (levehnstein (car (last a)) (car b))
1)))
(:method (a b)
(= (levehnstein a b)
1)))
(defun fisher-yates (seq)
(let* ((shuffled (alexandria:copy-sequence 'vector seq))
(n (length shuffled)))
(loop for i from (1- n) downto 1
for j = (random (1+ i))
do (rotatef (elt shuffled j)
(elt shuffled i)))
(alexandria:copy-sequence (type-of seq) shuffled)))
(defun levehnstein-sum (seq)
(apply #'+
(mapcar #'car
(cdr (funcall (derive 'levehnstein)
seq)))))
(defun pick-sequence (seq1 seq2)
(fw.lu:if-let* ((seq1-score (levehnstein-sum seq1))
(seq2-score (levehnstein-sum seq2))
(_ (< seq1-score seq2-score)))
(values seq1 seq1-score)
(values seq2 seq2-score)))
(defun edit-distance-1-p (a b)
(if (equal a b)
t
(if (eql (elt a 0)
(elt b 0))
(edit-distance-1-p (subseq a 1) (subseq b 1))
(equal (subseq a 1) (subseq b 1)))))
(defun find-highest-out-of-place (seq)
(multiple-value-bind (value idx)
(funcall (alexandria:compose (maximizing #'< #'car)
#'cdr
(derive 'levehnstein))
seq)
(values (1+ idx)
(cdr value))))
(defun swap-highest (seq)
(let ((oop (find-highest-out-of-place seq)))
(concatenate 'list
(subseq seq oop)
(subseq seq 0 oop))))
(defun crossover (seq)
(if (null seq)
seq
(if (oddp (length seq))
(append (crossover (cdr seq))
(list (car seq)))
(let ((candidate (1+ (random (1- (length seq))))))
(append (list (elt seq candidate))
(subseq seq 1 candidate)
(list (elt seq 0))
(subseq seq (1+ candidate)))))))
(defun evolve (seq steps fitness chance)
(loop repeat steps
for candidate = seq then (if (= 0 (random chance))
(pick-sequence
(and i-seq (swap-highest i-seq))
(pick-sequence (crossover i-seq)
i-seq))
i-seq)
for (i-seq score) = (multiple-value-list
(funcall fitness seq (fisher-yates seq)))
finally (return (values i-seq score))))
(defun evolve-halves (seq steps fitness chance)
(let* ((halfway (random (length seq)))
(first-part (subseq seq 0 halfway))
(second-part (subseq seq halfway))
(result (append (pick-sequence first-part
(evolve (fisher-yates first-part) steps fitness chance))
(pick-sequence second-part
(evolve (fisher-yates second-part) steps fitness chance)))))
(evolve (pick-sequence seq result)
steps fitness chance)))
(defun aggregate-items (items)
(loop
with candidates = items
for (item . remainder) on items
while remainder
do
(setf candidates (remove item candidates :test 'equal))
(format t "~&~s ~s~%" item remainder)
when (and item candidates)
collect (loop
for match in candidates
when (and match (levehnstein-1 item match))
do
(format t "~&~s ~s~%" item match)
(return (append (alexandria:ensure-list item)
(alexandria:ensure-list match))))))
(defmacro let-order ((min max) (a b) &body body)
(alexandria:once-only (a b)
`(destructuring-bind (,min ,max) (if (<= (length ,a) (length ,b))
(list ,a ,b)
(list ,b ,a))
,@body)))
(defmacro let-by-length ((a b) &body body)
`(let-order (,a ,b) (,a ,b)
,@body))
(defun levehnstein-equal (a b)
(if (and (= (length a) (length b))
(> (length a) 0))
(if (eql (elt a 0) (elt b 0))
(levehnstein-equal (subseq a 1 nil) (subseq b 1 nil))
(1+ (levehnstein-equal (subseq a 1 nil) (subseq b 1 nil))))
0))
(defun window (seq len &optional acc)
(if (= (length seq) len)
(cons seq acc)
(window (subseq seq 1 nil)
len
(cons (subseq seq 0 len) acc))))
(defun minimal-levehnstein (a b)
(let-order (a b) (a b)
(apply #'min
(mapcar (op (levehnstein-equal a _))
(window b (length a))))))
(defun align-things (a b)
(let-by-length (a b)
(cond
((equal (length a) (length b))
(values (coerce a 'list)
(coerce b 'list)))
((equal a "")
(values (make-sequence 'list (length b)
:initial-element nil)
(coerce b 'list)))
((equal (elt a 0) (elt b 0))
(multiple-value-bind (as bs) (align-things (subseq a 1) (subseq b 1))
(values (cons (elt a 0) as)
(cons (elt b 0) bs))))
(t (multiple-value-bind (as bs) (align-things a (subseq b 1))
(values (cons nil as)
(cons (elt b 0) bs)))))))
(defun levehnstein (a b)
(min (+ (minimal-levehnstein (coerce a 'list)
(coerce b 'list))
(abs (- (length a)
(length b))))
(multiple-value-call 'levehnstein-equal (align-things a b))))
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
#+nil
(every 'identity
(list (= (levehnstein "kitten" "sitting") 3)
(= (levehnstein "closure" "clojure") (levehnstein "clojure" "closure") 1)
(= (levehnstein "xyx" "xyyyx") 2)
(= (levehnstein "" "123456") 6)
(= (levehnstein "1234" "02345") 2)
(= (levehnstein "abcd" "ad") 2)
(= (levehnstein "Clojure" "Clojure") (levehnstein "" "") 0)
(= (levehnstein "ttttattttctg" "tcaaccctaccat") 10)
(= (levehnstein "gaattctaatctc" "caaacaaaaaattt") 9)))
#+nil
(defun levehnstein (a b)
(labels ((window-internal (seq len acc)
(if (= (length seq) len)
(cons seq acc)
(window-internal (subseq seq 1 nil)
len
(cons (subseq seq 0 len) acc))))
(min-offset-internal (haystack needle &optional (x 0) min-x min-dist)
(if (null haystack)
(list min-x min-dist (length needle))
(let* ((search (car haystack))
(dist (levehnstein needle search)))
(destructuring-bind (min-x min-dist)
(if (or (null min-dist) (< dist min-dist))
(list x dist)
(list min-x min-dist))
(min-offset-internal (cdr haystack)
needle
(1+ x)
min-x
min-dist)))))
(min-offset (needle haystack)
(min-offset-internal (window-internal haystack (length needle) nil)
needle))
(levehnstein-head (len a b)
(levehnstein (subseq a 0 len)
(subseq b 0 len)))
(min-offset-tail (offset a b)
(min-offset (subseq a offset)
(subseq b offset)))
(map-reduce (comb tr init seq1 seq2)
(reduce (lambda (acc next)
(funcall comb acc (apply tr next)))
(map 'list 'list seq1 seq2)
:initial-value init))
(difference-step (char-a char-b) (if (eql char-a char-b) 0 1))
(uneven-loop (a b &optional (x 1) (min-x x) min-total-dist)
(destructuring-bind (tail-offset dist overlap) (min-offset-tail x a b)
(let* ((head-distance (levehnstein-head x a b))
(leftover-tail (- (- (length b) x)
(+ overlap tail-offset)))
(total-dist (+ head-distance tail-offset dist leftover-tail)))
(destructuring-bind (min-total-dist min-x)
(if (< total-dist (or min-total-dist total-dist))
(list total-dist x)
(list (or min-total-dist total-dist) min-x))
(if (= x (1- (length a)))
min-total-dist
(uneven-loop a b (1+ x) min-x min-total-dist)))))))
(cond ((> (length a) (length b)) (levehnstein b a))
((equal a "") (length b))
((equal (subseq a 1) b) 1)
((equal (subseq b 1) a) 1)
((= (length a) (length b)) (map-reduce #'+ #'difference-step 0 a b))
(t (uneven-loop a b)))))
(defun without (el items)
(remove el items :test 'equal))
(defun all-pairs (items)
(map 'list
(lambda (item)
(map 'list
(lambda (item-2)
(list item item-2))
(without item items)))
items))
(defun make-word-graph (words)
(funcall (alexandria:compose
(op (alexandria:write-string-into-file _ "/tmp/foo.dot" :if-exists :supersede))
(op (format nil "~&graph {~%~{~a~%~}}~%" _))
(op (remove-duplicates _ :test 'equal))
(over (op (format nil "~{~s~^ -- ~};" (sort (subseq _ 0 2) 'string<))))
(op (apply 'concatenate 'list _))
(over (alexandria:compose
(lambda (x) (remove-if-not (op (= _ 1)) x
:key (op (car (last _)))))
(lambda (x) (sort x '<
:key (op (car (last _)))))
(over (lambda (it) (concatenate 'list it (list (apply 'levehnstein it)))))))
'all-pairs)
words))