git.fiddlerwoaroof.com
Browse code

feat: add more metadata to the file indexer

- count for pre-existing combinations of file/shasum
- ts for the timestamp when the file was indexed
- randomly print the most recently processed file

Edward Langley authored on 21/06/2023 07:04:45
Showing 2 changed files
... ...
@@ -37,26 +37,31 @@
37 37
                 "create table if not exists
38 38
                   files_shasums (file text,
39 39
                                  shasum text,
40
-                                 size bigint)")
40
+                                 size bigint,
41
+                                 count bigint,
42
+                                 ts datetime default current_timestamp)")
41 43
                (sqlite:execute-non-query
42 44
                 db
43 45
                 "create unique index if not exists shasums_files_unique_assuc on files_shasums(file,shasum)")
44 46
                (loop for raw-file in files
45 47
                      for file = (uiop:parse-native-namestring raw-file)
46 48
                      do
49
+                        (when (< (random 1000000)
50
+                                 100)
51
+                          (format *error-output* "~&processed: ~a~%" (uiop:native-namestring file)))
47 52
                         (sqlite:with-transaction db
48 53
                           (with-open-file (s file :element-type '(unsigned-byte 8))
49 54
                             (let* ((sum (ironclad:byte-array-to-hex-string
50 55
                                          (ironclad:digest-file :sha256 file)))
51 56
                                    (length (file-length s)))
52 57
                               (sqlite:execute-single db
53
-                                                     "insert into files_shasums (file,shasum,size)
54
-                                                      values (?,?,?)
55
-                                                      on conflict do nothing"
58
+                                                     "insert into files_shasums (file,shasum,size,count)
59
+                                                      values (?,?,?, 1)
60
+                                                      on conflict do update set count = (count + 1),
61
+                                                                                ts = current_timestamp"
56 62
                                                      (uiop:native-namestring (truename file))
57 63
                                                      sum
58
-                                                     length)))))))
59
-           (terpri)))))
64
+                                                     length)))))))))))
60 65
 
61 66
 (defun dump ()
62 67
   (setf net.didierverna.clon:*context* nil