git.fiddlerwoaroof.com
Browse code

feat: finish delta handling

Edward Langley authored on 31/10/2023 09:29:42
Showing 6 changed files
... ...
@@ -50,6 +50,9 @@
50 50
                #:fiveam
51 51
                #:co.fwoar.cl-git)
52 52
   :serial t
53
+  :perform (test-op (o c)
54
+                    (unless (symbol-call :fiveam '#:run! :fwoar.cl-git)
55
+                      (error "some tests failed")))
53 56
   :components ((:module "tests"
54 57
                 :components ((:file "tests")
55 58
                              (:file "branch-resolution" :depends-on ("tests"))
... ...
@@ -40,6 +40,26 @@
40 40
             :sum (expt 2 n))))
41 41
 
42 42
 (defun trace-bases (pack delta)
43
+  (assert (typep delta 'delta))
44
+  (let* ((offset (second (base delta)))
45
+         (o (extract-object-at-pos pack
46
+                                   offset
47
+                                   (make-instance 'git-ref
48
+                                                  :hash "00000000"
49
+                                                  :repo nil)))
50
+         (obj (serapeum:assocdr :object-data o))
51
+         (raw (serapeum:assocdr :raw-data o)))
52
+    (if (typep obj 'delta)
53
+        (let ((next (trace-bases pack obj)))
54
+          (length next)
55
+          (apply-commands next
56
+                          (commands delta)))
57
+        (let ((base (apply-commands raw
58
+                                    (commands delta))))
59
+          (length base)
60
+          base))))
61
+
62
+(defun get-bases (pack delta)
43 63
   (if (typep delta 'delta)
44 64
       (let* ((offset (second (base delta)))
45 65
              (o (extract-object-at-pos pack
... ...
@@ -47,14 +67,9 @@
47 67
                                        (make-instance 'git-ref
48 68
                                                       :hash "00000000"
49 69
                                                       :repo nil)))
50
-             (obj (serapeum:assocdr :object-data o))
51
-             (raw (serapeum:assocdr :raw-data o)))
52
-        (if (typep obj 'delta)
53
-            (apply-commands (trace-bases pack obj)
54
-                            (commands delta))
55
-            (apply-commands (trace-bases pack raw)
56
-                            (commands delta))))
57
-      delta))
70
+             (obj (serapeum:assocdr :object-data o)))
71
+        (cons delta (get-bases pack obj)))
72
+      (list delta)))
58 73
 
59 74
 (defun partition-commands (data)
60 75
   (let ((idx 0))
... ...
@@ -98,9 +113,13 @@
98 113
 (defun apply-commands (base commands)
99 114
   (flexi-streams:with-output-to-sequence (s)
100 115
     (flet ((do-copy (offset cnt)
116
+             #+(or)
117
+             (format t "DOING :COPY ~d ~d~%" offset cnt)
101 118
              (write-sequence (subseq base offset (+ offset cnt))
102 119
                              s))
103 120
            (do-add (data)
121
+             #+(or)
122
+             (format t "DOING :ADD ~d~%" (length data))
104 123
              (write-sequence data s)))
105 124
       (loop for (command . args) in commands
106 125
             when (eql command :copy) do
... ...
@@ -108,6 +127,19 @@
108 127
             when (eql command :add) do
109 128
               (apply #'do-add args)))))
110 129
 
130
+(defun get-ofs-delta-offset-streaming (buf)
131
+  (let* ((idx 0))
132
+    (flet ((advance ()
133
+             (read-byte buf)))
134
+      (loop
135
+        for c = (advance)
136
+        for ofs = (logand c 127) then (+ (ash (1+ ofs)
137
+                                              7)
138
+                                         (logand c 127))
139
+        while (> (logand c 128) 0)
140
+        finally
141
+           (return (values (- ofs) idx))))))
142
+
111 143
 (defun get-ofs-delta-offset (buf)
112 144
   (let* ((idx 0))
113 145
     (flet ((advance ()
... ...
@@ -138,23 +170,19 @@
138 170
       (values (bit-vector->int result)
139 171
               (length parts)))))
140 172
 
141
-(defmethod -extract-object-of-type ((type (eql :ofs-delta)) s repository &key offset-from packfile)
142
-  (multiple-value-bind (offset consumed) (get-ofs-delta-offset s)
143
-    (let ((compressed-data (chipz:decompress
144
-                            nil
145
-                            (chipz:make-dstate 'chipz:zlib)
146
-                            (subseq s consumed))))
147
-      (multiple-value-bind (src-size consumed-1) (decode-size compressed-data)
148
-        (multiple-value-bind (delta-size consumed-2) (decode-size (subseq compressed-data
149
-                                                                          consumed-1))
150
-          (make-ofs-delta (list packfile
151
-                                (+ offset-from offset))
152
-                          (partition-commands (subseq compressed-data
153
-                                                      (+ consumed-1
154
-                                                         consumed-2)))
155
-                          repository
156
-                          src-size
157
-                          delta-size))))))
173
+(defmethod -extract-object-of-type ((type (eql :ofs-delta)) s repository &key offset-from packfile base)
174
+  (multiple-value-bind (src-size consumed-1) (decode-size s)
175
+    (multiple-value-bind (delta-size consumed-2) (decode-size (subseq s
176
+                                                                      consumed-1))
177
+      (make-ofs-delta (list packfile
178
+                            (+ offset-from base))
179
+                      (partition-commands (subseq s
180
+                                                  (+ consumed-1
181
+                                                     consumed-2)))
182
+                      repository
183
+                      src-size
184
+                      delta-size))))
185
+
158 186
 (defmethod -extract-object-of-type ((type (eql :ref-delta)) s repository &key offset-from)
159 187
   (make-ref-delta offset-from
160 188
                   (partition-commands s)
... ...
@@ -46,23 +46,62 @@
46 46
         (return-from find-object-in-pack-files
47 47
           (values pack mid sha))))))
48 48
 
49
+(defun raw-object-for-ref (packed-ref)
50
+  (let ((pack (packed-ref-pack packed-ref)))
51
+    (with-pack-streams (i p) pack
52
+      (file-position p (read-4-byte-offset pack (packed-ref-offset packed-ref)))
53
+      (get-object-from-pack p))))
54
+
55
+(defun get-object-from-pack (s)
56
+  (let* ((metadata (fwoar.bin-parser:extract-high s))
57
+         (type (object-type->sym (get-object-type metadata)))
58
+         (size (get-object-size metadata)))
59
+    (case type
60
+      (:ref-delta (error ":ref-delta not implemented yet"))
61
+      (:ofs-delta (get-ofs-delta-offset-streaming s)))
62
+    (let ((decompressed (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) s)))
63
+      (values (concatenate
64
+               '(vector fwoar.cl-git.types:octet)
65
+               (ecase type
66
+                 (:commit #.(babel:string-to-octets "commit" :encoding :ascii))
67
+                 (:blob #.(babel:string-to-octets "blob" :encoding :ascii))
68
+                 (:tree #.(babel:string-to-octets "tree" :encoding :ascii)))
69
+               #(32)
70
+               (babel:string-to-octets (prin1-to-string size ):encoding :ascii)
71
+               #(0)
72
+               decompressed)
73
+              size
74
+              (length decompressed)))))
75
+
76
+(defun get-ofs-delta-offset-streaming (buf)
77
+  (let* ((idx 0))
78
+    (flet ((advance ()
79
+             (read-byte buf)))
80
+      (loop
81
+        for c = (advance)
82
+        for ofs = (logand c 127) then (+ (ash (1+ ofs)
83
+                                              7)
84
+                                         (logand c 127))
85
+        while (> (logand c 128) 0)
86
+        finally
87
+           (return (values (- ofs) idx))))))
88
+
49 89
 (defun read-object-from-pack (s repository ref)
50 90
   (let* ((pos (file-position s))
51 91
          (metadata (fwoar.bin-parser:extract-high s))
52 92
          (type (object-type->sym (get-object-type metadata)))
53 93
          (size (get-object-size metadata))
54
-         (decompressed (if (member type '(:ofs-delta :ref-delta))
55
-                           (let ((buffer (make-array size :element-type '(unsigned-byte 8))))
56
-                             (read-sequence buffer s)
57
-                             buffer)
58
-                           (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) s)))
59
-         (object-data (extract-object-of-type type decompressed repository pos (pathname s) ref)))
94
+         (delta-base (case type
95
+                       (:ref-delta (error ":ref-delta not implemented yet"))
96
+                       (:ofs-delta (get-ofs-delta-offset-streaming s))))
97
+         (decompressed (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) s))
98
+         (object-data (extract-object-of-type type decompressed repository pos (pathname s) ref delta-base)))
60 99
     (list (cons :type (object-type->sym type))
61 100
           (cons :decompressed-size size)
62 101
           (cons :object-data object-data)
63 102
           (cons :raw-data decompressed))))
64 103
 
65
-(defun extract-object-of-type (type s repository pos packfile ref)
104
+(defun extract-object-of-type (type s repository pos packfile ref delta-base)
66 105
   (with-simple-restart (continue "Skip object of type ~s at position ~d"
67 106
                                  type
68 107
                                  pos)
... ...
@@ -71,7 +110,8 @@
71 110
                              repository
72 111
                              :offset-from pos
73 112
                              :packfile packfile
74
-                             :hash (ref-hash ref))))
113
+                             :hash (ref-hash ref)
114
+                             :base delta-base)))
75 115
 
76 116
 (defun pack-offset-for-object (index-file obj-number)
77 117
   (let ((offset-offset (getf index-file
... ...
@@ -111,7 +151,8 @@
111 151
                                 repo
112 152
                                 0
113 153
                                 nil
114
-                                ref)))))
154
+                                ref
155
+                                nil)))))
115 156
 
116 157
 (defgeneric extract-object (object)
117 158
   (:method ((object loose-ref))
... ...
@@ -1,17 +1,12 @@
1 1
 (in-package :fwoar.cl-git)
2 2
 
3
-(defmacro with-pack-streams ((idx-sym pack-sym) pack &body body)
4
-  (alexandria:once-only (pack)
5
-    `(with-open-file (,idx-sym (index-file ,pack) :element-type 'fwoar.cl-git.types:octet)
6
-       (with-open-file (,pack-sym (pack-file ,pack) :element-type 'fwoar.cl-git.types:octet)
7
-         ,@body))))
8
-
9 3
 (defun seek-to-object-in-pack (pack idx-stream pack-stream obj-number)
10 4
   (let* ((toc (idx-toc pack))
11 5
          (offset-offset (getf toc :4-byte-offsets)))
12 6
     (file-position idx-stream (+ offset-offset (* 4 obj-number)))
13 7
     (let ((object-offset-in-pack (read-bytes 4 'fwoar.bin-parser:be->int idx-stream)))
14
-      (file-position pack-stream object-offset-in-pack))))
8
+      (values (file-position pack-stream object-offset-in-pack)
9
+              object-offset-in-pack))))
15 10
 
16 11
 (defun extract-object-metadata-from-pack (pack obj-number)
17 12
   (with-pack-streams (s p) pack
... ...
@@ -32,10 +27,12 @@
32 27
           'vector))
33 28
 
34 29
 (defun get-object-size (bytes)
35
-  (let ((first (elt bytes 0))
36
-        (rest (subseq bytes 1)))
37
-    (logior (ash (fwoar.bin-parser:be->int rest) 4)
38
-            (logand first 15))))
30
+  (loop for c across bytes
31
+        for next = (logand c 15) then (logand c #x7f)
32
+        for shift = 0 then (if (= shift 0) 4 (+ shift 7))
33
+        for size = next then (+ size (ash next shift))
34
+        while (> (logand c #x80) 0)
35
+        finally (return size)))
39 36
 
40 37
 (defun get-object-type (bytes)
41 38
   (let ((first (elt bytes 0)))
... ...
@@ -98,6 +98,12 @@
98 98
 (defun pack (index pack repository)
99 99
   (fw.lu:new 'pack index pack repository))
100 100
 
101
+(defmacro with-pack-streams ((idx-sym pack-sym) pack &body body)
102
+  (alexandria:once-only (pack)
103
+    `(with-open-file (,idx-sym (index-file ,pack) :element-type 'fwoar.cl-git.types:octet)
104
+       (with-open-file (,pack-sym (pack-file ,pack) :element-type 'fwoar.cl-git.types:octet)
105
+         ,@body))))
106
+
101 107
 (defgeneric pack-files (repo)
102 108
   (:method ((repo git-repository))
103 109
     (mapcar (serapeum:op
... ...
@@ -158,8 +158,26 @@
158 158
 
159 159
 
160 160
 (defparameter *fake-repo-2* :fwoar.cl-git.git-objects.pack-2)
161
+(defclass fake-ref-2 (fake-ref)
162
+  ())
163
+(defun fake-ref-2 (repo hash)
164
+  (fwoar.lisputils:new 'fake-ref-2 repo hash))
165
+(defmethod fwoar.cl-git::packed-ref-pack ((ref fake-ref-2))
166
+  (let* ((pack-file (asdf:system-relative-pathname
167
+                     :co.fwoar.cl-git/tests
168
+                     "tests/sample-git-objects/pack-a0533639fdee4493fdbfc1b701872ace63b95e5f.pack"))
169
+         (index-file (asdf:system-relative-pathname
170
+                      :co.fwoar.cl-git/tests
171
+                      "tests/sample-git-objects/pack-a0533639fdee4493fdbfc1b701872ace63b95e5f.idx")))
172
+    (make-instance 'fwoar.cl-git::pack
173
+                   :repository nil
174
+                   :index index-file
175
+                   :pack pack-file)))
176
+(defmethod fwoar.cl-git::packed-ref-offset ((ref fake-ref-2))
177
+  (nth-value 1 (fwoar.cl-git::find-sha-in-pack (fwoar.cl-git::packed-ref-pack ref)
178
+                                               (fwoar.cl-git::ref-hash ref))))
161 179
 (defmethod fwoar.cl-git::ref ((repo (eql *fake-repo-2*)) hash)
162
-  (fake-ref repo hash))
180
+  (fake-ref-2 repo hash))
163 181
 (defmethod fwoar.cl-git::pack-files ((repo (eql *fake-repo-2*)))
164 182
   (list
165 183
    (let* ((pack-file (asdf:system-relative-pathname
... ...
@@ -187,6 +205,78 @@
187 205
                                (fwoar.cl-git::packed-ref *fake-repo-2* ref)))))))
188 206
     ))
189 207
 
208
+(fiveam:def-test test-pack-roundtrip ()
209
+  (let ((shas '("b7df27f1c873f5796462cdce8aabf46c1b3e3ff2"
210
+                "d468a84b54e73968d9426af96c1944c80ffa3a4f"
211
+                "4cc1ee4919056be337922f0a57e0bfe7281b8c57"
212
+                "4d4ea31b3d349ffd06e97469743f824578555edf"
213
+                "7df80f061ae5bf6177a1c0888d085281be2801e1"
214
+                "846489f7ae91bfaf0c78a6939b177697a89a81d0"
215
+                "bc7ccfbd98e684d9188b6833ec39f7d1d72edfdf"
216
+                "6089dc804725925c30d621c3d2f72c8b1b14bc17"
217
+                "500325f0022a9adc41929b58fbb5c2d55b60524b"
218
+                "72870f874f3ef712d9bea352e300b9b5f6aa60ee"
219
+                "0c24c8f931ad5c0d2e5add01710678abddd3ec03"
220
+                "e499f64d2ead6d14d74fe0f484d06f33bbd38261"
221
+                "efe60b9f578c4966cb2258ace1661edd080ca0dc"
222
+                "821ddf96c37e65ccc9a0f4bfe2b8ac6e255a2cb6"
223
+                "077088c8c359489ed1f6d8e441ec76438076542e"
224
+                "e1f7c67a8774d65bb941eeb2b41f71f333fa1a94"
225
+                "ff33293b415cc1907a6071650d045b3dffd8e5c0"
226
+                "e98a5866a0148fe573197e8c48a543fc3039f1bc"
227
+                "f09f6f1b30fd3579649f8abf23719901496accde"
228
+                "692f03101cd8ebf6830618805217b6348ddfd3a8"
229
+                "f710cf28a9f511911e1def85c4cb98bfbcfd9017"
230
+                "0b3ed8597e1968306c3732f7507256694357009f"
231
+                "88e003ecce9e9420632d0bab857270819e922674"
232
+                "5c205fb851671ff0938c86d7c0cc742f2ca2d32f"
233
+                "8f6a890959795d2b340615a074170ce404d7f2b9"
234
+                "e079ee4a351de0841c09f87dffdee333ef936673"
235
+                "347e97b1efa866e3bd00bbfb68c5b660e378f3b5"
236
+                "ad9b8a82065f70aac3da61e845ab2cd37a71e649"
237
+                "83674eea1c0a2f2df2886b38b9539ed1193b00c3"
238
+                "e623be68f6fd0c36dee0145a4c95dbbf85174774"
239
+                "a7c6e622cca243456481ffbeafaab739e4687681"
240
+                "b0bbceded2a17389a9a6ddc765398a257199c78e"
241
+                "82c268c1e7afe543ac14bf2748df53a729fa35cb"
242
+                "2fb0a2fc57327dc6a533b596a0643ad991847b3b"
243
+                "f1a12e8a19691afcd5ee08d615a1b4d14b5051f9"
244
+                "991d0162019ac2e21592553a10ab16eb337222d8"
245
+                "94acd859d12ae611e631cfd66b7ed164d6b5ac89"
246
+                "f115dea85d331cb5c01e247d77886bba2690e726"
247
+                "488cc8612e7b24a1737a260b10bff0037b55636e"
248
+                "be4ef77fd7da17393e02ef933e8d21e67be7fbec"
249
+                "a84a7f712398c1659f2e809d903ae51b44cf7f4a"
250
+                "8a9fe9f77149f74fed5c05388be8e5ffd4a31678"
251
+                "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"
252
+                "0306819e780fa57dc3bf6b99a0a059670b605ae0"
253
+                "a52be677adeda194bcdfdd12740f00535b6b0997"
254
+                "fb265bb344fee602dc175d1d5eac6bdc2d013a10"
255
+                "9db42f61f21e11529b9bc1c52ee118c03d663c04"
256
+                "197e10755343900cfbcb7fc6d863d4b3231e74d4"
257
+                "83324cbcb0ef5b778588cc6ba547c43c46bff8c6"
258
+                "88988d16b44fc03054807882783ed176162228f4"
259
+                "d2818bb88b8ec5235a8ae91309f31ba58d941d42"
260
+                "c1b83741c4dc3104f1686c20b143300db0a0e258"
261
+                "7e24a6a7a4349497fce06830fa132e9a8ef6fd06"
262
+                "9567a5825bf65b7e90d6f9a02574a00b53af9171"
263
+                "b757bb704b4c7a54622b7bd197ad5c1ea51ef2cc"
264
+                "ccccc07814249fc7a129bfffd07f09704d0f017b"
265
+                "a4b5b13466bb8e80d6f8015e2bf27667533ea441"
266
+                "3d894d70b6e1036034f22654408a382b6e303335"
267
+                "fed9d70ab2441d8c8abf19648668f885ed5a4986"
268
+                "b50c3a28d0bdab4d922d4b363cada4c582349178"
269
+                "4b825dc642cb6eb9a060e54bf8d69288fbee4904"
270
+                "71da880a8be0356b67d593fac348dfe429d1e0b6"
271
+                "cab7cafae3b61c5b101ee914cd4f5c8357e77fad"
272
+                "f03a8d1b4cea085ee9555037d09bca2dbfb990cb")))
273
+    (loop for commit in shas
274
+          for obj = (fwoar.cl-git::raw-object-for-ref
275
+                     (fwoar.cl-git::ref :fwoar.cl-git.git-objects.pack-2 commit))
276
+          do (5am:is (equal (crypto:byte-array-to-hex-string
277
+                             (crypto:digest-sequence :sha1 obj))
278
+                            commit)))))
279
+
190 280
 (fiveam:def-test pack-file-apply-delta-commands ()
191 281
   (flet ((test-ref (ref)
192 282
            (let* ((extracted-ref
... ...
@@ -209,14 +299,10 @@
209 299
 
210 300
     (test-ref "9776df71b5ddf298c56e99b7291f9e68906cf049")
211 301
 
212
-    #+(or) ;; broken
213 302
     (test-ref "31576396aff0fff28f69e0ef84571c0dc8cc43ec")
214 303
 
215
-    #+(or) ;; broken
216 304
     (test-ref "c516dfc248544509c3ae58e3a8c2ab81c225aa9c")
217 305
 
218
-    #+(or) ;; broken
219 306
     (test-ref "53d13ed284f8b57297d1b216e2bab7fb43f8db60")
220 307
 
221
-    #+(or) ;; broken
222 308
     (test-ref "912d31a169ddf1fca122d4c6fe1b1e6be7cd1176")))