Browse code
feat: continue implementing delta expansion
Edward Langley authored on 26/10/2023 09:53:59
Showing 8 changed files
Showing 8 changed files
- delta.lisp
- tests/git-objects.lisp
- tests/sample-git-objects/blob-3157639-fixture
- tests/sample-git-objects/blob-53d13ed-fixture
- tests/sample-git-objects/blob-87c2b9b-fixture
- tests/sample-git-objects/blob-912d31a-fixture
- tests/sample-git-objects/blob-9776df7-fixture
- tests/sample-git-objects/blob-c516dfc-fixture
... | ... |
@@ -5,13 +5,16 @@ |
5 | 5 |
(%base :initarg :base :reader base) |
6 | 6 |
(%commands :initarg :commands :reader commands) |
7 | 7 |
(%src-size :initarg :src-size :reader src-size) |
8 |
- (%delta-size :initarg :delta-size :reader delta-size))) |
|
8 |
+ (%delta-size :initarg :delta-size :reader delta-size)) |
|
9 |
+ (:documentation |
|
10 |
+ "The base type for deltified git objects")) |
|
9 | 11 |
|
10 | 12 |
(defclass+ ofs-delta (delta) |
11 | 13 |
()) |
12 | 14 |
|
13 | 15 |
(defclass+ ref-delta (delta) |
14 |
- ()) |
|
16 |
+ () |
|
17 |
+ (:documentation "TODO: mostly unimplemented/untested")) |
|
15 | 18 |
|
16 | 19 |
(defun make-ofs-delta (base commands repository src-size delta-size) |
17 | 20 |
(fw.lu:new 'ofs-delta base commands repository src-size delta-size)) |
... | ... |
@@ -36,19 +39,22 @@ |
36 | 39 |
:unless (zerop (aref bv ix)) |
37 | 40 |
:sum (expt 2 n)))) |
38 | 41 |
|
39 |
-(defun expand-copy (copy) |
|
40 |
- (destructuring-bind (command layout numbers) copy |
|
41 |
- (let* ((next-idx 0) |
|
42 |
- (parts (map '(vector (unsigned-byte 8)) |
|
43 |
- (lambda (layout-bit) |
|
44 |
- (if (= layout-bit 1) |
|
45 |
- (prog1 (elt numbers next-idx) |
|
46 |
- (incf next-idx)) |
|
47 |
- 0)) |
|
48 |
- (reverse layout)))) |
|
49 |
- (list command |
|
50 |
- (fwoar.bin-parser:le->int (subseq parts 0 4)) |
|
51 |
- (fwoar.bin-parser:le->int (subseq parts 4)))))) |
|
42 |
+(defun trace-bases (pack delta) |
|
43 |
+ (if (typep delta 'delta) |
|
44 |
+ (let* ((offset (second (base delta))) |
|
45 |
+ (o (extract-object-at-pos pack |
|
46 |
+ offset |
|
47 |
+ (make-instance 'git-ref |
|
48 |
+ :hash "00000000" |
|
49 |
+ :repo nil))) |
|
50 |
+ (obj (serapeum:assocdr :object-data o)) |
|
51 |
+ (raw (serapeum:assocdr :raw-data o))) |
|
52 |
+ (if (typep obj 'delta) |
|
53 |
+ (apply-commands (trace-bases pack obj) |
|
54 |
+ (commands delta)) |
|
55 |
+ (apply-commands (trace-bases pack raw) |
|
56 |
+ (commands delta)))) |
|
57 |
+ delta)) |
|
52 | 58 |
|
53 | 59 |
(defun partition-commands (data) |
54 | 60 |
(let ((idx 0)) |
... | ... |
@@ -72,10 +78,35 @@ |
72 | 78 |
(list :add |
73 | 79 |
(coerce (loop repeat (bit-vector->int insts) |
74 | 80 |
collect (advance)) |
75 |
- '(vector (unsigned-byte 8)))))))) |
|
81 |
+ '(vector (unsigned-byte 8))))))) |
|
82 |
+ (expand-copy (copy) |
|
83 |
+ (destructuring-bind (command layout numbers) copy |
|
84 |
+ (let* ((next-idx 0) |
|
85 |
+ (parts (map '(vector (unsigned-byte 8)) |
|
86 |
+ (lambda (layout-bit) |
|
87 |
+ (if (= layout-bit 1) |
|
88 |
+ (prog1 (elt numbers next-idx) |
|
89 |
+ (incf next-idx)) |
|
90 |
+ 0)) |
|
91 |
+ (reverse layout)))) |
|
92 |
+ (list command |
|
93 |
+ (fwoar.bin-parser:le->int (subseq parts 0 4)) |
|
94 |
+ (fwoar.bin-parser:le->int (subseq parts 4))))))) |
|
76 | 95 |
(loop while (< idx (length data)) |
77 | 96 |
collect (get-command))))) |
78 | 97 |
|
98 |
+(defun apply-commands (base commands) |
|
99 |
+ (flexi-streams:with-output-to-sequence (s) |
|
100 |
+ (flet ((do-copy (offset cnt) |
|
101 |
+ (write-sequence (subseq base offset (+ offset cnt)) |
|
102 |
+ s)) |
|
103 |
+ (do-add (data) |
|
104 |
+ (write-sequence data s))) |
|
105 |
+ (loop for (command . args) in commands |
|
106 |
+ when (eql command :copy) do |
|
107 |
+ (apply #'do-copy args) |
|
108 |
+ when (eql command :add) do |
|
109 |
+ (apply #'do-add args))))) |
|
79 | 110 |
|
80 | 111 |
(defun get-ofs-delta-offset (buf) |
81 | 112 |
(let* ((idx 0)) |
... | ... |
@@ -174,7 +174,6 @@ |
174 | 174 |
:pack pack-file)))) |
175 | 175 |
|
176 | 176 |
(fiveam:def-test pack-files-offsets () |
177 |
- |
|
178 | 177 |
(let* ((expectations-file |
179 | 178 |
(asdf:system-relative-pathname |
180 | 179 |
:co.fwoar.cl-git/tests |
... | ... |
@@ -187,3 +186,37 @@ |
187 | 186 |
(fwoar.cl-git::extract-object |
188 | 187 |
(fwoar.cl-git::packed-ref *fake-repo-2* ref))))))) |
189 | 188 |
)) |
189 |
+ |
|
190 |
+(fiveam:def-test pack-file-apply-delta-commands () |
|
191 |
+ (flet ((test-ref (ref) |
|
192 |
+ (let* ((extracted-ref |
|
193 |
+ (fwoar.cl-git::extract-object |
|
194 |
+ (fwoar.cl-git::packed-ref :fwoar.cl-git.git-objects.pack-2 ref))) |
|
195 |
+ (base-desc (fwoar.cl-git::base extracted-ref)) |
|
196 |
+ (pack (car (fwoar.cl-git::pack-files *fake-repo-2*))) |
|
197 |
+ (expectations-file |
|
198 |
+ (asdf:system-relative-pathname |
|
199 |
+ :co.fwoar.cl-git/tests |
|
200 |
+ (format nil "tests/sample-git-objects/blob-~a-fixture" |
|
201 |
+ (subseq ref 0 7)))) |
|
202 |
+ (expectations |
|
203 |
+ (alexandria:read-file-into-byte-vector expectations-file))) |
|
204 |
+ (5am:is |
|
205 |
+ (serapeum:vector= |
|
206 |
+ expectations |
|
207 |
+ (fwoar.cl-git::trace-bases pack extracted-ref)))))) |
|
208 |
+ (test-ref "87c2b9b2dfaa1fbf66b3fe88d3a925593886b159") |
|
209 |
+ |
|
210 |
+ (test-ref "9776df71b5ddf298c56e99b7291f9e68906cf049") |
|
211 |
+ |
|
212 |
+ #+(or) ;; broken |
|
213 |
+ (test-ref "31576396aff0fff28f69e0ef84571c0dc8cc43ec") |
|
214 |
+ |
|
215 |
+ #+(or) ;; broken |
|
216 |
+ (test-ref "c516dfc248544509c3ae58e3a8c2ab81c225aa9c") |
|
217 |
+ |
|
218 |
+ #+(or) ;; broken |
|
219 |
+ (test-ref "53d13ed284f8b57297d1b216e2bab7fb43f8db60") |
|
220 |
+ |
|
221 |
+ #+(or) ;; broken |
|
222 |
+ (test-ref "912d31a169ddf1fca122d4c6fe1b1e6be7cd1176"))) |
190 | 223 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,65 @@ |
1 |
+(in-package :fwoar.cl-git) |
|
2 |
+ |
|
3 |
+(defclass git-graph () |
|
4 |
+ ((%repo :initarg :repo :reader repo) |
|
5 |
+ (%depth :initarg :depth :reader depth) |
|
6 |
+ (%branches :reader branches) |
|
7 |
+ (%node-cache :reader node-cache :initform (make-hash-table :test 'equal)) |
|
8 |
+ (%edge-cache :reader edge-cache :initform (make-hash-table :test 'equal)))) |
|
9 |
+ |
|
10 |
+(defmethod initialize-instance :after ((object git-graph) &key) |
|
11 |
+ (setf (slot-value object '%branches) |
|
12 |
+ (fw.lu:alist-string-hash-table |
|
13 |
+ (funcall (data-lens:over |
|
14 |
+ (<>1 (data-lens:applying #'cons) |
|
15 |
+ (data-lens:transform-head |
|
16 |
+ (serapeum:op (subseq _1 0 (min (length _1) 7)))) |
|
17 |
+ #'reverse)) |
|
18 |
+ (branches (repo object)))))) |
|
19 |
+ |
|
20 |
+(defun git-graph (repo) |
|
21 |
+ (fw.lu:new 'git-graph repo)) |
|
22 |
+ |
|
23 |
+(defun get-commit-parents (repository commit) |
|
24 |
+ #+lispworks |
|
25 |
+ (declare (notinline mismatch serapeum:string-prefix-p)) |
|
26 |
+ (map 'list |
|
27 |
+ (serapeum:op (second (partition #\space _))) |
|
28 |
+ (remove-if-not (lambda (it) |
|
29 |
+ (serapeum:string-prefix-p "parent" it)) |
|
30 |
+ (nth-value 1 (parse-commit |
|
31 |
+ (split-object |
|
32 |
+ (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) |
|
33 |
+ (loose-object repository |
|
34 |
+ commit)))))))) |
|
35 |
+ |
|
36 |
+(defmethod cl-dot:graph-object-node ((graph git-graph) (commit string)) |
|
37 |
+ (alexandria:ensure-gethash commit |
|
38 |
+ (node-cache graph) |
|
39 |
+ (make-instance 'cl-dot:node |
|
40 |
+ :attributes `(:label ,(gethash #1=(subseq commit 0 7) |
|
41 |
+ (branches graph) |
|
42 |
+ #1#))))) |
|
43 |
+ |
|
44 |
+(defmethod cl-dot:graph-object-points-to ((graph git-graph) (commit string)) |
|
45 |
+ #+nil |
|
46 |
+ (loop |
|
47 |
+ for cur = (list commit) then parents |
|
48 |
+ for parents = (let ((f (get-commit-parents (repo graph) (car cur)))) |
|
49 |
+ f) |
|
50 |
+ until (or (not parents) |
|
51 |
+ (cdr parents)) |
|
52 |
+ finally (return (or parents |
|
53 |
+ (when (not (equal commit (car cur))) |
|
54 |
+ cur)))) |
|
55 |
+ |
|
56 |
+ (funcall (data-lens:<>1 (data-lens:over (serapeum:op |
|
57 |
+ (setf (gethash (list commit _1) |
|
58 |
+ (edge-cache graph)) |
|
59 |
+ t) |
|
60 |
+ _1)) |
|
61 |
+ (data-lens:exclude (serapeum:op |
|
62 |
+ (gethash (list commit _1) |
|
63 |
+ (edge-cache graph)))) |
|
64 |
+ (data-lens:over (serapeum:op (subseq _ 0 7)))) |
|
65 |
+ (get-commit-parents (repo graph) commit))) |
0 | 66 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,34 @@ |
1 |
+;;; -*- Mode:Lisp; Syntax:ANSI-Common-Lisp; Package: ASDF-USER -*- |
|
2 |
+(in-package :asdf-user) |
|
3 |
+ |
|
4 |
+(defsystem :cl-git |
|
5 |
+ :description "A pure-Lisp git implementation" |
|
6 |
+ :author "Ed L <edward@elangley.org>" |
|
7 |
+ :license "MIT" |
|
8 |
+ :pathname #-fw.dev nil #+fw.dev #p"PROJECTS:cl-git;" |
|
9 |
+ :depends-on (:alexandria |
|
10 |
+ :chipz |
|
11 |
+ :cl-dot |
|
12 |
+ :data-lens |
|
13 |
+ :fwoar-lisputils |
|
14 |
+ :fwoar-lisputils/bin-parser |
|
15 |
+ :ironclad |
|
16 |
+ :serapeum |
|
17 |
+ :split-sequence |
|
18 |
+ :uiop) |
|
19 |
+ :components ((:file "package") |
|
20 |
+ (:file "util" :depends-on ("package")) |
|
21 |
+ |
|
22 |
+ ;; data model |
|
23 |
+ (:file "model" :depends-on ("package")) |
|
24 |
+ (:file "protocol" :depends-on ("package" "model")) |
|
25 |
+ (:file "repository" :depends-on ("package" "model")) |
|
26 |
+ (:file "tree" :depends-on ("package" "model")) |
|
27 |
+ (:file "commit" :depends-on ("package" "model")) |
|
28 |
+ |
|
29 |
+ (:file "extract" :depends-on ("package" "commit" "tree")) |
|
30 |
+ (:file "branch" :depends-on ("package" "extract")) |
|
31 |
+ (:file "git" :depends-on ("package" "util" "model" "branch")) |
|
32 |
+ |
|
33 |
+ ;; stable programmer interface |
|
34 |
+ (:file "porcelain" :depends-on ("package" "git" "commit")))) |
0 | 35 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,81 @@ |
1 |
+(in-package :fwoar.cl-git) |
|
2 |
+ |
|
3 |
+(fw.lu:defun-ct batch-4 (bytes) |
|
4 |
+ (mapcar 'fwoar.bin-parser:be->int |
|
5 |
+ (serapeum:batches bytes 4))) |
|
6 |
+ |
|
7 |
+(fw.lu:defun-ct batch-20 (bytes) |
|
8 |
+ (serapeum:batches bytes 20)) |
|
9 |
+ |
|
10 |
+(defmacro sym->plist (&rest syms) |
|
11 |
+ `(list ,@(loop for sym in syms |
|
12 |
+ append (list (alexandria:make-keyword sym) |
|
13 |
+ sym)))) |
|
14 |
+ |
|
15 |
+(defmacro inspect- (s form) |
|
16 |
+ `(let ((result ,form)) |
|
17 |
+ (format ,s "~&~s (~{~s~^ ~})~%~4t~s~%" |
|
18 |
+ ',form |
|
19 |
+ ,(typecase form |
|
20 |
+ (list `(list ',(car form) ,@(cdr form))) |
|
21 |
+ (t `(list ,form))) |
|
22 |
+ result) |
|
23 |
+ result)) |
|
24 |
+ |
|
25 |
+(defun inspect-* (fn) |
|
26 |
+ (lambda (&rest args) |
|
27 |
+ (declare (dynamic-extent args)) |
|
28 |
+ (inspect- *trace-output* |
|
29 |
+ (apply fn args)))) |
|
30 |
+ |
|
31 |
+(defun partition (char string &key from-end (with-offset nil wo-p)) |
|
32 |
+ (let ((pos (position char string :from-end from-end))) |
|
33 |
+ (if pos |
|
34 |
+ (if wo-p |
|
35 |
+ (list (subseq string 0 (+ pos with-offset 1)) |
|
36 |
+ (subseq string (+ pos 1 with-offset))) |
|
37 |
+ (list (subseq string 0 pos) |
|
38 |
+ (subseq string (1+ pos)))) |
|
39 |
+ (list string |
|
40 |
+ nil)))) |
|
41 |
+ |
|
42 |
+(defun partition-subseq (subseq string &key from-end) |
|
43 |
+ (let ((pos (search subseq string :from-end from-end))) |
|
44 |
+ (if pos |
|
45 |
+ (list (subseq string 0 pos) |
|
46 |
+ (subseq string (+ (length subseq) pos))) |
|
47 |
+ (list string |
|
48 |
+ nil)))) |
|
49 |
+ |
|
50 |
+(serapeum:defalias ->sha-string |
|
51 |
+ (data-lens:<>1 (data-lens:over 'fwoar.bin-parser:byte-array-to-hex-string) |
|
52 |
+ 'batch-20)) |
|
53 |
+ |
|
54 |
+(defun read-bytes (count format stream) |
|
55 |
+ (let ((seq (make-array count :element-type 'serapeum:octet))) |
|
56 |
+ (read-sequence seq stream) |
|
57 |
+ (funcall format |
|
58 |
+ seq))) |
|
59 |
+ |
|
60 |
+(defun sp-ob (ob-string) |
|
61 |
+ (partition #\null |
|
62 |
+ ob-string)) |
|
63 |
+ |
|
64 |
+(defun split-object (object-data) |
|
65 |
+ (destructuring-bind (head tail) |
|
66 |
+ (partition 0 |
|
67 |
+ object-data) |
|
68 |
+ (destructuring-bind (type length) |
|
69 |
+ (partition #\space |
|
70 |
+ (babel:octets-to-string head :encoding :latin1)) |
|
71 |
+ (values tail |
|
72 |
+ (list type |
|
73 |
+ (parse-integer length)))))) |
|
74 |
+ |
|
75 |
+(defun parse-commit (commit) |
|
76 |
+ (destructuring-bind (metadata message) |
|
77 |
+ (partition-subseq #(#\newline #\newline) |
|
78 |
+ commit #+(or)(babel:octets-to-string commit :encoding :latin1)) |
|
79 |
+ (values message |
|
80 |
+ (map 'vector (serapeum:op (partition #\space _)) |
|
81 |
+ (fwoar.string-utils:split #\newline metadata))))) |
0 | 82 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,49 @@ |
1 |
+(in-package :fwoar.cl-git) |
|
2 |
+ |
|
3 |
+(fw.lu:defun-ct batch-4 (bytes) |
|
4 |
+ (mapcar 'fwoar.bin-parser:be->int |
|
5 |
+ (serapeum:batches bytes 4))) |
|
6 |
+ |
|
7 |
+(fw.lu:defun-ct batch-20 (bytes) |
|
8 |
+ (serapeum:batches bytes 20)) |
|
9 |
+ |
|
10 |
+(defmacro sym->plist (&rest syms) |
|
11 |
+ `(list ,@(loop for sym in syms |
|
12 |
+ append (list (alexandria:make-keyword sym) |
|
13 |
+ sym)))) |
|
14 |
+ |
|
15 |
+(defmacro inspect- (s form) |
|
16 |
+ `(let ((result ,form)) |
|
17 |
+ (format ,s "~&~s (~{~s~^ ~})~%~4t~s~%" |
|
18 |
+ ',form |
|
19 |
+ ,(typecase form |
|
20 |
+ (list `(list ',(car form) ,@(cdr form))) |
|
21 |
+ (t `(list ,form))) |
|
22 |
+ result) |
|
23 |
+ result)) |
|
24 |
+ |
|
25 |
+(defun inspect-* (fn) |
|
26 |
+ (lambda (&rest args) |
|
27 |
+ (declare (dynamic-extent args)) |
|
28 |
+ (inspect- *trace-output* |
|
29 |
+ (apply fn args)))) |
|
30 |
+ |
|
31 |
+(defun partition (char string &key from-end) |
|
32 |
+ (let ((pos (position char string :from-end from-end))) |
|
33 |
+ (if pos |
|
34 |
+ (list (subseq string 0 pos) |
|
35 |
+ (subseq string (1+ pos))) |
|
36 |
+ (list string |
|
37 |
+ nil)))) |
|
38 |
+ |
|
39 |
+(defun partition-subseq (subseq string &key from-end) |
|
40 |
+ (let ((pos (search subseq string :from-end from-end))) |
|
41 |
+ (if pos |
|
42 |
+ (list (subseq string 0 pos) |
|
43 |
+ (subseq string (+ (length subseq) pos))) |
|
44 |
+ (list string |
|
45 |
+ nil)))) |
|
46 |
+ |
|
47 |
+(serapeum:defalias ->sha-string |
|
48 |
+ (data-lens:<>1 (data-lens:over 'fwoar.bin-parser:byte-array-to-hex-string) |
|
49 |
+ 'batch-20)) |
0 | 50 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,163 @@ |
1 |
+(in-package :fwoar.cl-git) |
|
2 |
+ |
|
3 |
+(defun edges-in-fanout (toc s sha) |
|
4 |
+ (let* ((fanout-offset (getf toc :fanout))) |
|
5 |
+ (file-position s (+ fanout-offset (* 4 (1- (elt sha 0))))) |
|
6 |
+ (destructuring-bind ((_ . cur) (__ . next)) |
|
7 |
+ (fwoar.bin-parser:extract '((cur 4 fwoar.bin-parser:be->int) |
|
8 |
+ (next 4 fwoar.bin-parser:be->int)) |
|
9 |
+ s) |
|
10 |
+ (declare (ignore _ __)) |
|
11 |
+ (values cur next)))) |
|
12 |
+ |
|
13 |
+(defun find-sha-between-terms (toc s start end sha) |
|
14 |
+ (unless (>= start end) |
|
15 |
+ (let* ((sha-offset (getf toc :shas)) |
|
16 |
+ (mid (floor (+ start end) |
|
17 |
+ 2))) |
|
18 |
+ (file-position s (+ sha-offset (* 20 mid))) |
|
19 |
+ (let ((sha-at-mid (read-bytes 20 'fwoar.bin-parser:byte-array-to-hex-string s))) |
|
20 |
+ (cond ((string< sha sha-at-mid) |
|
21 |
+ (find-sha-between-terms toc s start mid sha)) |
|
22 |
+ ((string> sha sha-at-mid) |
|
23 |
+ (find-sha-between-terms toc s (1+ mid) end sha)) |
|
24 |
+ (t mid)))))) |
|
25 |
+ |
|
26 |
+(defun find-pack-containing (pack-file id) |
|
27 |
+ (with-open-file (s (index-file pack-file) |
|
28 |
+ :element-type '(unsigned-byte 8)) |
|
29 |
+ (let ((binary-sha (ironclad:hex-string-to-byte-array id)) |
|
30 |
+ (toc (idx-toc s))) |
|
31 |
+ (multiple-value-bind (_ end) (edges-in-fanout toc s binary-sha) |
|
32 |
+ (declare (ignore _)) |
|
33 |
+ (let ((midpoint (find-sha-between-terms toc s 0 end id))) |
|
34 |
+ (and midpoint |
|
35 |
+ (values pack-file |
|
36 |
+ midpoint))))))) |
|
37 |
+ |
|
38 |
+(defun find-object-in-pack-files (repo id) |
|
39 |
+ (dolist (pack-file (pack-files repo)) |
|
40 |
+ (multiple-value-bind (pack mid) (find-pack-containing pack-file id) |
|
41 |
+ (when pack |
|
42 |
+ (return-from find-object-in-pack-files |
|
43 |
+ (values pack mid)))))) |
|
44 |
+ |
|
45 |
+(defun behead (data) |
|
46 |
+ (elt (partition 0 data) |
|
47 |
+ 1)) |
|
48 |
+ |
|
49 |
+(defun tree-entry (data) |
|
50 |
+ (values-list (partition 0 data :with-offset 20))) |
|
51 |
+ |
|
52 |
+(defun format-tree-entry (entry) |
|
53 |
+ (destructuring-bind (info sha) (partition 0 entry) |
|
54 |
+ (concatenate 'vector |
|
55 |
+ (apply #'concatenate 'vector |
|
56 |
+ (serapeum:intersperse (vector (char-code #\tab)) |
|
57 |
+ (reverse |
|
58 |
+ (partition (char-code #\space) |
|
59 |
+ info)))) |
|
60 |
+ (list (char-code #\tab)) |
|
61 |
+ (babel:string-to-octets (elt (->sha-string sha) 0) :encoding *git-encoding*)))) |
|
62 |
+ |
|
63 |
+(defun tree-entries (data &optional accum) |
|
64 |
+ (if (<= (length data) 0) |
|
65 |
+ (apply #'concatenate 'vector |
|
66 |
+ (serapeum:intersperse (vector (char-code #\newline)) |
|
67 |
+ (nreverse accum))) |
|
68 |
+ (multiple-value-bind (next rest) (tree-entry data) |
|
69 |
+ (tree-entries rest |
|
70 |
+ (list* (format-tree-entry next) |
|
71 |
+ accum))))) |
|
72 |
+ |
|
73 |
+(defun extract-object-of-type (type s repository) |
|
74 |
+ (with-simple-restart (continue "Skip object of type ~s" type) |
|
75 |
+ (%extract-object-of-type type s repository))) |
|
76 |
+ |
|
77 |
+(defgeneric %extract-object-of-type (type s repository) |
|
78 |
+ (:method ((type integer) s repository) |
|
79 |
+ (extract-object-of-type (object-type->sym type) |
|
80 |
+ s |
|
81 |
+ repository)) |
|
82 |
+ |
|
83 |
+ (:method ((type (eql :commit)) s repository) |
|
84 |
+ s) |
|
85 |
+ |
|
86 |
+ (:method ((type (eql :blob)) s repository) |
|
87 |
+ s) |
|
88 |
+ |
|
89 |
+ (:method ((type (eql :tag)) s repository) |
|
90 |
+ s) |
|
91 |
+ |
|
92 |
+ (:method ((type (eql :tree)) s repository) |
|
93 |
+ (tree-entries s))) |
|
94 |
+ |
|
95 |
+(defun read-object-from-pack (s repository) |
|
96 |
+ (let* ((metadata (fwoar.bin-parser:extract-high s)) |
|
97 |
+ (type (object-type->sym (get-object-type metadata))) |
|
98 |
+ (size (get-object-size metadata)) |
|
99 |
+ (decompressed (if (member type '(:ofs-delta :ref-delta)) |
|
100 |
+ s |
|
101 |
+ (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) s))) |
|
102 |
+ (object-data (extract-object-of-type type decompressed repository))) |
|
103 |
+ (list (cons :type (object-type->sym type)) |
|
104 |
+ (cons :decompressed-size size) |
|
105 |
+ (cons :object-data object-data) |
|
106 |
+ (cons :raw-data object-data)))) |
|
107 |
+ |
|
108 |
+(defun extract-object-from-pack (pack obj-number) |
|
109 |
+ (with-open-file (s (index-file pack) :element-type '(unsigned-byte 8)) |
|
110 |
+ (with-open-file (p (pack-file pack) :element-type '(unsigned-byte 8)) |
|
111 |
+ (let* ((toc (idx-toc s)) |
|
112 |
+ (offset-offset (getf toc :4-byte-offsets))) |
|
113 |
+ (file-position s (+ offset-offset (* 4 obj-number))) |
|
114 |
+ (let ((object-offset-in-pack (read-bytes 4 'fwoar.bin-parser:be->int s))) |
|
115 |
+ (file-position p object-offset-in-pack) |
|
116 |
+ (read-object-from-pack p (repository pack))))))) |
|
117 |
+ |
|
118 |
+(defclass git-object () |
|
119 |
+ ((%repo :initarg :repo :reader object-repo) |
|
120 |
+ (%hash :initarg :hash :reader object-hash))) |
|
121 |
+(defclass loose-object (git-object) |
|
122 |
+ ((%file :initarg :file :reader loose-object-file))) |
|
123 |
+(defclass packed-object (git-object) |
|
124 |
+ ((%pack :initarg :pack :reader packed-object-pack) |
|
125 |
+ (%offset :initarg :offset :reader packed-object-offset))) |
|
126 |
+ |
|
127 |
+(defun object (repo id) |
|
128 |
+ (let ((repo-root (typecase repo |
|
129 |
+ (repository (root repo)) |
|
130 |
+ (string (namestring |
|
131 |
+ (truename repo)))))) |
|
132 |
+ (or (alexandria:when-let ((object-file (loose-object repo id))) |
|
133 |
+ (make-instance 'loose-object :repo repo-root :hash id :file object-file)) |
|
134 |
+ (multiple-value-bind (pack offset) (find-object-in-pack-files repo id) |
|
135 |
+ (when pack |
|
136 |
+ (make-instance 'packed-object :repo repo-root :offset offset :pack pack)))))) |
|
137 |
+ |
|
138 |
+(defun extract-loose-object (repo file) |
|
139 |
+ (with-open-file (s file :element-type '(unsigned-byte 8)) |
|
140 |
+ (alexandria:when-let ((result (chipz:decompress nil (chipz:make-dstate 'chipz:zlib) |
|
141 |
+ s))) |
|
142 |
+ (destructuring-bind (type rest) |
|
143 |
+ (partition (char-code #\space) result) |
|
144 |
+ (extract-object-of-type (object-type->sym (babel:octets-to-string type)) |
|
145 |
+ (elt (partition 0 rest) |
|
146 |
+ 1) |
|
147 |
+ repo))))) |
|
148 |
+ |
|
149 |
+(defgeneric extract-object-next (object) |
|
150 |
+ (:method ((object loose-object)) |
|
151 |
+ (extract-loose-object (object-repo object) |
|
152 |
+ (loose-object-file object))) |
|
153 |
+ (:method ((object packed-object)) |
|
154 |
+ (data-lens.lenses:view *object-data-lens* |
|
155 |
+ (extract-object-from-pack (packed-object-pack object) |
|
156 |
+ (packed-object-offset object))))) |
|
157 |
+ |
|
158 |
+(defun extract-object (repo id) |
|
159 |
+ (if (loose-object-p repo id) |
|
160 |
+ (extract-loose-object repo (loose-object repo id)) |
|
161 |
+ (data-lens.lenses:view *object-data-lens* |
|
162 |
+ (multiple-value-call 'extract-object-from-pack |
|
163 |
+ (find-object-in-pack-files (root repo) id))))) |
0 | 164 |
new file mode 100644 |
... | ... |
@@ -0,0 +1,140 @@ |
1 |
+(in-package :fwoar.cl-git) |
|
2 |
+ |
|
3 |
+(defun seek-to-object-in-pack (idx-stream pack-stream obj-number) |
|
4 |
+ (let* ((toc (idx-toc idx-stream)) |
|
5 |
+ (offset-offset (getf toc :4-byte-offsets))) |
|
6 |
+ (file-position idx-stream (+ offset-offset (* 4 obj-number))) |
|
7 |
+ (let ((object-offset-in-pack (read-bytes 4 'fwoar.bin-parser:be->int idx-stream))) |
|
8 |
+ (file-position pack-stream object-offset-in-pack)))) |
|
9 |
+ |
|
10 |
+(deftype octet () |
|
11 |
+ '(unsigned-byte 8)) |
|
12 |
+ |
|
13 |
+(defmacro with-open-files* ((&rest bindings) &body body) |
|
14 |
+ `(uiop:nest ,@(mapcar (serapeum:op |
|
15 |
+ `(with-open-file ,_1)) |
|
16 |
+ bindings) |
|
17 |
+ (progn |
|
18 |
+ ,@body))) |
|
19 |
+ |
|
20 |
+(defun extract-object-metadata-from-pack (pack obj-number) |
|
21 |
+ (with-open-files* ((s (index-file pack) :element-type 'octet) |
|
22 |
+ (p (pack-file pack) :element-type 'octet)) |
|
23 |
+ (seek-to-object-in-pack s p obj-number) |
|
24 |
+ (read-object-metadata-from-pack p))) |
|
25 |
+ |
|
26 |
+(defun turn-read-object-to-string (object) |
|
27 |
+ (data-lens.lenses:over *object-data-lens* |
|
28 |
+ 'babel:octets-to-string object)) |
|
29 |
+ |
|
30 |
+(defun fanout-table (s) |
|
31 |
+ (coerce (alexandria:assoc-value |
|
32 |
+ (fwoar.bin-parser:extract '((head 4) |
|
33 |
+ (version 4) |
|
34 |
+ (fanout-table #.(* 4 256) batch-4)) |
|
35 |
+ s) |
|
36 |
+ 'fanout-table) |
|
37 |
+ 'vector)) |
|
38 |
+ |
|
39 |
+(defun get-object-size (bytes) |
|
40 |
+ (let ((first (elt bytes 0)) |
|
41 |
+ (rest (subseq bytes 1))) |
|
42 |
+ (logior (ash (fwoar.bin-parser:be->int rest) 4) |
|
43 |
+ (logand first 15)))) |
|
44 |
+ |
|
45 |
+(defun get-object-type (bytes) |
|
46 |
+ (let ((first (elt bytes 0))) |
|
47 |
+ (ldb (byte 3 4) |
|
48 |
+ first))) |
|
49 |
+ |
|
50 |
+(defun get-shas-before (fanout-table first-sha-byte s) |
|
51 |
+ (let ((num-before (elt fanout-table first-sha-byte)) |
|
52 |
+ (num-total (alexandria:last-elt fanout-table))) |
|
53 |
+ (values (fwoar.bin-parser:extract (list (list 'shas (* 20 num-before) '->sha-string)) |
|
54 |
+ s) |
|
55 |
+ (- num-total num-before)))) |
|
56 |
+ |
|
57 |
+(defun advance-past-crcs (obj-count s) |
|
58 |
+ (file-position s |
|
59 |
+ (+ (file-position s) |
|
60 |
+ (* 4 obj-count)))) |
|
61 |
+ |
|
62 |
+(defun object-offset (object-number s) |
|
63 |
+ (file-position s |
|
64 |
+ (+ (file-position s) |
|
65 |
+ (* (1- object-number) |
|
66 |
+ 4))) |
|
67 |
+ (fwoar.bin-parser:extract '((offset 4 fwoar.bin-parser:be->int)) |
|
68 |
+ s)) |
|
69 |
+ |
|
70 |
+(defun idx-toc (idx-stream) |
|
71 |
+ (let* ((object-count (progn (file-position idx-stream 1028) |
|
72 |
+ (let ((buf (make-array 4))) |
|
73 |
+ (read-sequence buf idx-stream) |
|
74 |
+ (fwoar.bin-parser:be->int buf)))) |
|
75 |
+ (signature 0) |
|
76 |
+ (version 4) |
|
77 |
+ (fanout 8) |
|
78 |
+ (shas (+ fanout |
|
79 |
+ (* 4 256))) |
|
80 |
+ (packed-crcs (+ shas |
|
81 |
+ (* 20 object-count))) |
|
82 |
+ (4-byte-offsets (+ packed-crcs |
|
83 |
+ (* 4 object-count))) |
|
84 |
+ (8-byte-offsets-pro (+ 4-byte-offsets |
|
85 |
+ (* object-count 4))) |
|
86 |
+ (pack-sha (- (file-length idx-stream) |
|
87 |
+ 40)) |
|
88 |
+ (8-byte-offsets (when (/= 8-byte-offsets-pro pack-sha) |
|
89 |
+ 8-byte-offsets-pro)) |
|
90 |
+ (idx-sha (- (file-length idx-stream) |
|
91 |
+ 20))) |
|
92 |
+ (values (sym->plist signature |
|
93 |
+ version |
|
94 |
+ fanout |
|
95 |
+ shas |
|
96 |
+ packed-crcs |
|
97 |
+ 4-byte-offsets |
|
98 |
+ 8-byte-offsets |
|
99 |
+ pack-sha |
|
100 |
+ idx-sha) |
|
101 |
+ object-count))) |
|
102 |
+ |
|
103 |
+(defun collect-data (idx-toc s num) |
|
104 |
+ (let ((sha-idx (getf idx-toc :shas)) |
|
105 |
+ (crc-idx (getf idx-toc :packed-crcs)) |
|
106 |
+ (4-byte-offsets-idx (getf idx-toc :4-byte-offsets)) |
|
107 |
+ (8-byte-offsets-idx (getf idx-toc :8-byte-offsets))) |
|
108 |
+ (declare (ignore 8-byte-offsets-idx)) |
|
109 |
+ (values num |
|
110 |
+ (progn |
|
111 |
+ (file-position s (+ sha-idx (* num 20))) |
|
112 |
+ (read-bytes 20 'fwoar.bin-parser:byte-array-to-hex-string s)) |
|
113 |
+ (progn |
|
114 |
+ (file-position s (+ crc-idx (* num 4))) |
|
115 |
+ (read-bytes 4 'identity s)) |
|
116 |
+ (progn |
|
117 |
+ (file-position s (+ 4-byte-offsets-idx (* num 4))) |
|
118 |
+ (read-bytes 4 'fwoar.bin-parser:be->int s))))) |
|
119 |
+ |
|
120 |
+(defun read-object-metadata-from-pack (s) |
|
121 |
+ (let* ((metadata (fwoar.bin-parser:extract-high s)) |
|
122 |
+ (type-raw (get-object-type metadata)) |
|
123 |
+ (size (get-object-size metadata)) |
|
124 |
+ (type (object-type->sym type-raw))) |
|
125 |
+ (values (cons :type type) |
|
126 |
+ (cons :decompressed-size size)))) |
|
127 |
+ |
|
128 |
+(defun get-first-commits-from-pack (idx pack n) |
|
129 |
+ (let ((toc (idx-toc idx)) |
|
130 |
+ (result ())) |
|
131 |
+ (dotimes (i n (reverse result)) |
|
132 |
+ (multiple-value-bind (_ sha __ offset) (collect-data toc idx i) |
|
133 |
+ (declare (ignore _ __)) |
|
134 |
+ (file-position pack offset) |
|
135 |
+ (push `((:sha . ,sha) |
|
136 |
+ ,@(multiple-value-list |
|
137 |
+ (read-object-metadata-from-pack pack)) |
|
138 |
+ (:offset . ,offset)) |
|
139 |
+ result))))) |
|
140 |
+ |