Browse code
adjustments for ext2 stuff
dancy authored on 31/05/2007 23:13:08
Showing 4 changed files
Showing 4 changed files
... | ... |
@@ -34,7 +34,7 @@ v2: make-mime-part: Default external-format is :utf8." |
34 | 34 |
;; merchantability or fitness for a particular purpose. See the GNU |
35 | 35 |
;; Lesser General Public License for more details. |
36 | 36 |
;; |
37 |
-;; $Id: mime-api.cl,v 1.6 2007/04/17 22:01:42 layer Exp $ |
|
37 |
+;; $Id: mime-api.cl,v 1.7 2007/05/31 23:13:08 dancy Exp $ |
|
38 | 38 |
|
39 | 39 |
(defpackage :net.post-office |
40 | 40 |
(:use #:lisp #:excl) |
... | ... |
@@ -45,6 +45,7 @@ v2: make-mime-part: Default external-format is :utf8." |
45 | 45 |
#:mime-part-p |
46 | 46 |
#:mime-part-constructed-p |
47 | 47 |
#:map-over-parts |
48 |
+ #:decode-header-text |
|
48 | 49 |
|
49 | 50 |
;; macros |
50 | 51 |
#:mime-get-header |
... | ... |
@@ -362,6 +363,77 @@ This is a multi-part message in MIME format.~%")) |
362 | 363 |
elseif (message-rfc822-p (mime-part-type part) (mime-part-subtype part)) |
363 | 364 |
then (map-over-parts (mime-part-message part) function))) |
364 | 365 |
|
366 |
+(defparameter *charset-to-ef* |
|
367 |
+ '(("shift-jis" . :shiftjis) |
|
368 |
+ ("us-ascii" . :latin1) |
|
369 |
+ ("gbk" . :936) |
|
370 |
+ #+ignore("euc-kr" :iso-2022-kr) |
|
371 |
+ )) |
|
372 |
+ |
|
373 |
+(defun charset-to-external-format (charset) |
|
374 |
+ (setf charset (string-downcase charset)) |
|
375 |
+ (block nil |
|
376 |
+ (let ((ef (find-external-format charset :errorp nil))) |
|
377 |
+ (if ef |
|
378 |
+ (return ef)) |
|
379 |
+ (if (setf ef (cdr (assoc charset *charset-to-ef* :test #'string=))) |
|
380 |
+ (return (find-external-format ef))) |
|
381 |
+ (multiple-value-bind (matched x inner) |
|
382 |
+ (match-re "^windows-(\\d+)$" charset) |
|
383 |
+ (declare (ignore x)) |
|
384 |
+ (if (and matched (setf ef (find-external-format inner :errorp nil))) |
|
385 |
+ (return ef))) |
|
386 |
+ (multiple-value-bind (matched x dig) |
|
387 |
+ (match-re "^iso-8859-(\\d+)(?:-[ie])?$" charset) |
|
388 |
+ (declare (ignore x)) |
|
389 |
+ (if (and matched (setf ef (find-external-format (format nil "iso8859-~a" dig) :errorp nil))) |
|
390 |
+ (return ef))) |
|
391 |
+ |
|
392 |
+ nil))) |
|
393 |
+ |
|
394 |
+(defun decode-header-text (text) |
|
395 |
+ (declare (optimize (speed 3)) |
|
396 |
+ (string text)) |
|
397 |
+ (let ((pos 0) |
|
398 |
+ (len (length text))) |
|
399 |
+ (declare (fixnum pos len)) |
|
400 |
+ (with-output-to-string (res) |
|
401 |
+ (while (< pos len) |
|
402 |
+ (multiple-value-bind (matched whole charset encoding encoded) |
|
403 |
+ (match-re "=\\?([^?]+)\\?(q|b)\\?(.*?)\\?=" text |
|
404 |
+ :start pos |
|
405 |
+ :case-fold t |
|
406 |
+ :return :index) |
|
407 |
+ |
|
408 |
+ (if (null matched) |
|
409 |
+ (return)) |
|
410 |
+ |
|
411 |
+ ;; Write out the "before" stuff. |
|
412 |
+ (write-string text res :start pos :end (car whole)) |
|
413 |
+ |
|
414 |
+ (let* ((charset (subseq text (car charset) (cdr charset))) |
|
415 |
+ (ef (charset-to-external-format charset))) |
|
416 |
+ (if (null ef) |
|
417 |
+ (error "No external format found for MIME charset ~s" charset)) |
|
418 |
+ (write-string |
|
419 |
+ (if* (char-equal (char text (car encoding)) #\q) |
|
420 |
+ then (qp-decode-string text |
|
421 |
+ :start (car encoded) |
|
422 |
+ :end (cdr encoded) |
|
423 |
+ :external-format ef) |
|
424 |
+ else ;; FIXME: Clean this up with/if rfe6174 is completed. |
|
425 |
+ (octets-to-string |
|
426 |
+ (base64-string-to-usb8-array |
|
427 |
+ (subseq text (car encoded) (cdr encoded))) |
|
428 |
+ :external-format ef)) |
|
429 |
+ res)) |
|
430 |
+ |
|
431 |
+ (setf pos (cdr whole)))) |
|
432 |
+ |
|
433 |
+ ;; Write out the remaining portion. |
|
434 |
+ (write-string text res :start pos)))) |
|
435 |
+ |
|
436 |
+ |
|
365 | 437 |
;; Stuff ripped off from aserve |
366 | 438 |
|
367 | 439 |
(defun split-namestring (file) |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
;; merchantability or fitness for a particular purpose. See the GNU |
15 | 15 |
;; Lesser General Public License for more details. |
16 | 16 |
;; |
17 |
-;; $Id: mime-parse.cl,v 1.5 2007/05/29 18:25:50 layer Exp $ |
|
17 |
+;; $Id: mime-parse.cl,v 1.6 2007/05/31 23:13:08 dancy Exp $ |
|
18 | 18 |
|
19 | 19 |
(defpackage :net.post-office |
20 | 20 |
(:use #:lisp #:excl) |
... | ... |
@@ -66,22 +66,36 @@ |
66 | 66 |
(defmacro get-header (name headers) |
67 | 67 |
`(cdr (assoc ,name ,headers :test #'equalp))) |
68 | 68 |
|
69 |
+(defvar *mime-read-line-unread*) |
|
70 |
+ |
|
69 | 71 |
(defun parse-mime-structure (stream &key mbox) |
70 |
- (parse-mime-structure-1 stream nil nil 0 mbox)) |
|
72 |
+ (let ((*mime-read-line-unread* nil)) |
|
73 |
+ (multiple-value-bind (part stop-reason newpos) |
|
74 |
+ (parse-mime-structure-1 stream nil nil 0 mbox :outer t) |
|
75 |
+ (when (and part mbox (not (eq stop-reason :eof))) |
|
76 |
+ (format t "advancing to next mbox boundary~%") |
|
77 |
+ (multiple-value-bind (x y z newpos2) |
|
78 |
+ (read-until-boundary stream nil newpos t) |
|
79 |
+ (declare (ignore x y z)) |
|
80 |
+ (setf stop-reason :eof) |
|
81 |
+ (setf newpos newpos2))) |
|
82 |
+ (values part stop-reason newpos)))) |
|
71 | 83 |
|
72 | 84 |
;; Returns values: |
73 |
-;; 1) The part |
|
85 |
+;; 1) The part (or nil if EOF while reading readers) |
|
74 | 86 |
;; 2) The stop reason (:eof, :close-boundary, nil (meaning regular boundary)) |
75 | 87 |
;; 3) The new position |
76 | 88 |
|
77 | 89 |
;: mime-parse-message-rfc822, parse-mime-structure, mime-parse-multipart |
78 | 90 |
;: |
79 |
-(defun parse-mime-structure-1 (stream boundary digest pos mbox) |
|
91 |
+(defun parse-mime-structure-1 (stream boundary digest pos mbox &key outer) |
|
80 | 92 |
(let ((part (make-instance 'mime-part-parsed))) |
81 | 93 |
(setf (mime-part-position part) pos) |
82 | 94 |
(setf (mime-part-boundary part) boundary) |
83 | 95 |
(multiple-value-bind (headers bytes) |
84 | 96 |
(parse-headers stream mbox) |
97 |
+ (if (and (null headers) outer) |
|
98 |
+ (return-from parse-mime-structure-1)) |
|
85 | 99 |
(setf (mime-part-headers-size part) bytes) |
86 | 100 |
(incf pos bytes) |
87 | 101 |
(setf (mime-part-body-position part) pos) |
... | ... |
@@ -367,13 +381,6 @@ |
367 | 381 |
(defconstant *whitespace* '(#\space #\tab #\return #\newline)) |
368 | 382 |
|
369 | 383 |
|
370 |
-;: parse-headers |
|
371 |
-;: |
|
372 |
-(defun blank-line-p (line len) |
|
373 |
- (declare (optimize (speed 3) (safety 0)) |
|
374 |
- (fixnum len)) |
|
375 |
- (= len (skip-whitespace line 0 len))) |
|
376 |
- |
|
377 | 384 |
;: parse-headers |
378 | 385 |
;: |
379 | 386 |
(defun parse-header-line (line len) |
... | ... |
@@ -414,27 +421,47 @@ |
414 | 421 |
;: |
415 | 422 |
(defun mime-read-line (stream buffer) |
416 | 423 |
(declare (optimize (speed 3) (safety 0))) |
417 |
- (let ((pos 0) |
|
418 |
- (end (length buffer)) |
|
419 |
- (count 0) |
|
420 |
- char) |
|
421 |
- (declare (fixnum pos end count)) |
|
424 |
+ |
|
425 |
+ (if* *mime-read-line-unread* |
|
426 |
+ then (let* ((line (car *mime-read-line-unread*)) |
|
427 |
+ (bytes (cdr *mime-read-line-unread*)) |
|
428 |
+ (len (length line))) |
|
429 |
+ (declare (simple-string line)) |
|
430 |
+ (setf *mime-read-line-unread* nil) |
|
431 |
+ (dotimes (n len) |
|
432 |
+ (setf (schar buffer n) (schar line n))) |
|
433 |
+ (values len bytes)) |
|
434 |
+ else (let ((pos 0) |
|
435 |
+ (end (length buffer)) |
|
436 |
+ (count 0) |
|
437 |
+ char) |
|
438 |
+ (declare (fixnum pos end count)) |
|
422 | 439 |
|
423 |
- (while (and (< pos end) (setf char (read-char stream nil nil))) |
|
424 |
- (incf count) |
|
425 |
- (if (char= char #\newline) |
|
426 |
- (return)) |
|
427 |
- (setf (schar buffer pos) char) |
|
428 |
- (incf pos)) |
|
440 |
+ (while (and (< pos end) (setf char (read-char stream nil nil))) |
|
441 |
+ (incf count) |
|
442 |
+ (if (char= char #\newline) |
|
443 |
+ (return)) |
|
444 |
+ (setf (schar buffer pos) char) |
|
445 |
+ (incf pos)) |
|
429 | 446 |
|
430 |
- (if* (= count 0) |
|
431 |
- then nil ;; EOF |
|
432 |
- else ;; Check for CR/LF combo |
|
433 |
- (if (and (> pos 0) (char= (schar buffer (1- pos)) #\return)) |
|
434 |
- (decf pos)) |
|
435 |
- |
|
436 |
- (values pos count)))) |
|
447 |
+ (if* (= count 0) |
|
448 |
+ then nil ;; EOF |
|
449 |
+ else ;; Check for CR/LF combo |
|
450 |
+ (if (and (> pos 0) |
|
451 |
+ (char= (schar buffer (1- pos)) #\return)) |
|
452 |
+ (decf pos)) |
|
453 |
+ |
|
454 |
+ (values pos count))))) |
|
437 | 455 |
|
456 |
+(defun mime-unread-line (line end bytes) |
|
457 |
+ ;; This should never happen |
|
458 |
+ (if *mime-read-line-unread* |
|
459 |
+ (error "Unread buffer is full.")) |
|
460 |
+ (setf *mime-read-line-unread* |
|
461 |
+ (cons (subseq line 0 end) bytes))) |
|
462 |
+ |
|
463 |
+(eval-when (compile) |
|
464 |
+ (defconstant *parse-headers-line-len* 1024)) |
|
438 | 465 |
|
439 | 466 |
;; Returns: |
440 | 467 |
;; 1) headers alist |
... | ... |
@@ -445,10 +472,8 @@ |
445 | 472 |
(defun parse-headers (stream mbox) |
446 | 473 |
(declare (optimize (speed 3) (safety 0))) |
447 | 474 |
(let ((count 0) |
448 |
- (line (make-array 1024 :element-type 'character)) |
|
449 |
- headers |
|
450 |
- lastcons |
|
451 |
- current) |
|
475 |
+ (line (make-array #.*parse-headers-line-len* :element-type 'character)) |
|
476 |
+ headers lastcons current incomplete lastincomplete) |
|
452 | 477 |
(declare (fixnum count) |
453 | 478 |
(dynamic-extent line)) |
454 | 479 |
|
... | ... |
@@ -456,34 +481,48 @@ |
456 | 481 |
(multiple-value-bind (end bytes) |
457 | 482 |
(mime-read-line stream line) |
458 | 483 |
(declare (fixnum end)) |
459 |
- |
|
460 |
- (if (or (null end) |
|
461 |
- (and mbox (my-prefixp "From " line end))) |
|
462 |
- (return)) |
|
463 | 484 |
|
464 |
- (incf count bytes) |
|
485 |
+ (if (null end) ;; EOF |
|
486 |
+ (return)) |
|
465 | 487 |
|
466 |
- (if (blank-line-p line end) |
|
488 |
+ (setf incomplete (= end #.*parse-headers-line-len*)) |
|
489 |
+ |
|
490 |
+ (if (and mbox (not lastincomplete) (my-prefixp "From " line end)) |
|
467 | 491 |
(return)) |
468 | 492 |
|
469 |
- (if* (whitespace-char-p (schar line 0)) |
|
470 |
- then ;; Continuation line |
|
471 |
- (if (null current) |
|
472 |
- (return)) |
|
473 |
- |
|
474 |
- (let ((newcons (cons (subseq line 0 end) nil))) |
|
475 |
- (setf (cdr lastcons) newcons) |
|
476 |
- (setf lastcons newcons)) |
|
477 |
- |
|
478 |
- else ;; Fresh header line |
|
479 |
- (multiple-value-bind (name value) |
|
480 |
- (parse-header-line line end) |
|
481 |
- (if (null name) |
|
482 |
- (return)) |
|
483 |
- |
|
484 |
- (setf lastcons (cons value nil)) |
|
485 |
- (setf current (cons name lastcons)) |
|
486 |
- (push current headers))))) |
|
493 |
+ (incf count bytes) |
|
494 |
+ |
|
495 |
+ (cond |
|
496 |
+ (lastincomplete ;; rest of a long line |
|
497 |
+ (setf (car lastcons) |
|
498 |
+ (concatenate 'string (car lastcons) (subseq line 0 end)))) |
|
499 |
+ |
|
500 |
+ ((zerop end) ;; blank line |
|
501 |
+ (return)) |
|
502 |
+ |
|
503 |
+ ((whitespace-char-p (schar line 0)) ;; Continuation line |
|
504 |
+ (if (null current) ;; Malformed header line |
|
505 |
+ (return)) |
|
506 |
+ |
|
507 |
+ (let ((newcons (cons (subseq line 0 end) nil))) |
|
508 |
+ (setf (cdr lastcons) newcons) |
|
509 |
+ (setf lastcons newcons))) |
|
510 |
+ |
|
511 |
+ (t ;; Fresh header line |
|
512 |
+ (multiple-value-bind (name value) |
|
513 |
+ (parse-header-line line end) |
|
514 |
+ (when (null name) |
|
515 |
+ ;; Malformed header line. Unread it (so that it |
|
516 |
+ ;; will be treated as part of the body) and |
|
517 |
+ ;; consider the headers terminated. |
|
518 |
+ (mime-unread-line line end bytes) |
|
519 |
+ (return)) |
|
520 |
+ |
|
521 |
+ (setf lastcons (cons value nil)) |
|
522 |
+ (setf current (cons name lastcons)) |
|
523 |
+ (push current headers)))) |
|
524 |
+ |
|
525 |
+ (setf lastincomplete incomplete))) |
|
487 | 526 |
|
488 | 527 |
;; Finalize strings. |
489 | 528 |
(dolist (header headers) |
... | ... |
@@ -558,7 +597,7 @@ |
558 | 597 |
|
559 | 598 |
(incf pos bytes) |
560 | 599 |
|
561 |
- (when (my-prefixp delimiter line end) |
|
600 |
+ (when (and delimiter (my-prefixp delimiter line end)) |
|
562 | 601 |
(if* (my-prefixp close-delimiter line end) |
563 | 602 |
then (setf stop-reason :close-boundary) |
564 | 603 |
else (setf stop-reason nil)) |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
;; merchantability or fitness for a particular purpose. See the GNU |
15 | 15 |
;; Lesser General Public License for more details. |
16 | 16 |
;; |
17 |
-;; $Id: mime-transfer-encoding.cl,v 1.9 2007/04/17 22:01:42 layer Exp $ |
|
17 |
+;; $Id: mime-transfer-encoding.cl,v 1.10 2007/05/31 23:13:08 dancy Exp $ |
|
18 | 18 |
|
19 | 19 |
(defpackage :net.post-office |
20 | 20 |
(:use #:lisp #:excl) |
... | ... |
@@ -28,6 +28,8 @@ |
28 | 28 |
#:base64-decode-stream |
29 | 29 |
#:qp-encode-stream |
30 | 30 |
#:qp-decode-stream |
31 |
+ #:qp-decode-usb8 |
|
32 |
+ #:qp-decode-string |
|
31 | 33 |
#:with-decoded-part-body-stream)) |
32 | 34 |
|
33 | 35 |
(in-package :net.post-office) |
... | ... |
@@ -126,7 +128,6 @@ |
126 | 128 |
(setf (aref arr 256) -2) |
127 | 129 |
arr)) |
128 | 130 |
|
129 |
- |
|
130 | 131 |
(defun qp-decode-stream (instream outstream &key count) |
131 | 132 |
(declare (optimize (speed 3))) |
132 | 133 |
|
... | ... |
@@ -184,6 +185,96 @@ |
184 | 185 |
|
185 | 186 |
t)))) |
186 | 187 |
|
188 |
+;; 'out' should be at least the size of 'in'. If it is nil, |
|
189 |
+;; a usb8 array will be allocated and used. It is okay if 'out' is the |
|
190 |
+;; same buffer as 'in'. |
|
191 |
+;; Returns: |
|
192 |
+;; 1) the supplied or allocated array |
|
193 |
+;; 2) the just past the last byte populated in the array. |
|
194 |
+(defun qp-decode-usb8 (in out &key (start1 0) (end1 (length in)) |
|
195 |
+ (start2 0) end2) |
|
196 |
+ (declare (optimize (speed 3)) |
|
197 |
+ ((simple-array (unsigned-byte 8) (*)) in out) |
|
198 |
+ (fixnum start1 end1 start2 end2)) |
|
199 |
+ |
|
200 |
+ (if (null out) |
|
201 |
+ (setf out (make-array (length in) :element-type '(unsigned-byte 8)))) |
|
202 |
+ |
|
203 |
+ (if (null end2) |
|
204 |
+ (setf end2 (length out))) |
|
205 |
+ |
|
206 |
+ (let ((count (- end1 start1))) |
|
207 |
+ (declare (fixnum count)) |
|
208 |
+ |
|
209 |
+ (if (< count 0) |
|
210 |
+ (error "start1 must be less than end1")) |
|
211 |
+ |
|
212 |
+ (if (> start2 end2) |
|
213 |
+ (error "start2 must be less than end2")) |
|
214 |
+ |
|
215 |
+ (if (< (the fixnum (- end2 start2)) count) |
|
216 |
+ (error "Not enough room in output array")) |
|
217 |
+ |
|
218 |
+ (macrolet ((unread (byte) |
|
219 |
+ (declare (ignore byte)) |
|
220 |
+ `(decf start1)) |
|
221 |
+ (get-byte (&key eof-value) |
|
222 |
+ `(if* (>= start1 end1) |
|
223 |
+ then ,eof-value |
|
224 |
+ else (prog1 (aref in start1) |
|
225 |
+ (incf start1)))) |
|
226 |
+ (out (byte) |
|
227 |
+ `(prog1 (setf (aref out start2) ,byte) |
|
228 |
+ (incf start2))) |
|
229 |
+ (eol-p (byte) |
|
230 |
+ `(or (eq ,byte 10) (eq ,byte 13)))) |
|
231 |
+ |
|
232 |
+ (let (byte) |
|
233 |
+ (while (setf byte (get-byte)) |
|
234 |
+ (if* (eq byte #.(char-code #\=)) |
|
235 |
+ then (let ((nextbyte (get-byte))) |
|
236 |
+ (if* (null nextbyte) |
|
237 |
+ then ;; stray equal sign. just dump and terminate. |
|
238 |
+ (out byte) |
|
239 |
+ (return)) |
|
240 |
+ (if* (eol-p nextbyte) |
|
241 |
+ then ;; soft line break. |
|
242 |
+ (if (eq nextbyte 13) ;; CR |
|
243 |
+ (setf nextbyte (get-byte))) |
|
244 |
+ (if (not (eq nextbyte 10)) ;; LF |
|
245 |
+ (unread nextbyte)) |
|
246 |
+ else ;; =XY encoding |
|
247 |
+ (let* ((byte3 (get-byte :eof-value 256)) |
|
248 |
+ (high (aref *qp-digit-values* nextbyte)) |
|
249 |
+ (low (aref *qp-digit-values* byte3)) |
|
250 |
+ (value (logior (the fixnum (ash high 4)) low))) |
|
251 |
+ (declare (fixnum byte3 high low value)) |
|
252 |
+ (if* (< value 0) |
|
253 |
+ then ;; Invalid or truncated encoding. just dump it. |
|
254 |
+ (out byte) |
|
255 |
+ (out nextbyte) |
|
256 |
+ (if* (eq low -2) ;; EOF |
|
257 |
+ then (return) |
|
258 |
+ else (out byte3)) |
|
259 |
+ else (out value))))) |
|
260 |
+ else (out byte))) |
|
261 |
+ |
|
262 |
+ (values out start2))))) |
|
263 |
+ |
|
264 |
+(defun qp-decode-string (string &key (start 0) (end (length string)) |
|
265 |
+ (return :string) |
|
266 |
+ (external-format :default)) |
|
267 |
+ (multiple-value-bind (vec len) |
|
268 |
+ (string-to-octets string :start start :end end :null-terminate nil |
|
269 |
+ :external-format :latin1) |
|
270 |
+ (multiple-value-setq (vec len) |
|
271 |
+ (qp-decode-usb8 vec vec :end1 len)) |
|
272 |
+ (ecase return |
|
273 |
+ (:string |
|
274 |
+ (octets-to-string vec :end len :external-format external-format)) |
|
275 |
+ (:usb8 |
|
276 |
+ (subseq vec 0 len))))) |
|
277 |
+ |
|
187 | 278 |
;; 'instream' must be positioned at the beginning of the part body |
188 | 279 |
;; by the caller beforehand. |
189 | 280 |
(defmacro with-decoded-part-body-stream ((sym part instream) &body body) |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
;; merchantability or fitness for a particular purpose. See the GNU |
15 | 15 |
;; Lesser General Public License for more details. |
16 | 16 |
;; |
17 |
-;; $Id: rfc2822.cl,v 1.3 2007/04/17 22:01:42 layer Exp $ |
|
17 |
+;; $Id: rfc2822.cl,v 1.4 2007/05/31 23:13:08 dancy Exp $ |
|
18 | 18 |
|
19 | 19 |
#+(version= 8 0) |
20 | 20 |
(sys:defpatch "rfc2822" 0 |
... | ... |
@@ -98,33 +98,161 @@ domain. |
98 | 98 |
|# |
99 | 99 |
|
100 | 100 |
(eval-when (compile eval) |
101 |
- ;; dash at the end to avoid mistaking it for a character range |
|
102 |
- ;; indicator. |
|
103 |
- (defconstant *atext-chars* |
|
104 |
- "!#$%&'*+/0123456789=?ABCDEFGHIJKLMNOPQRSTUVWXYZ^_`abcdefghijklmnopqrstuvwxyz{|}~-") |
|
101 |
+ (defconstant *controls* "\\x0-\\x1f") |
|
105 | 102 |
|
106 |
- (defconstant *dot-atom* |
|
107 |
- (format nil "[~a]+(\\.[~a]+)*" *atext-chars* *atext-chars*)) |
|
103 |
+ (defconstant *specials* "()<>\\[\\]:;@\\,.\"") |
|
104 |
+ |
|
105 |
+ (defconstant *no-ws-ctl* "\\x1-\\x8\\xb-\\xc\\xe-\\x1f\\x7f") |
|
106 |
+ |
|
107 |
+ (defconstant *fws* "(?:(?:[ \\t]*\\r?\\n)?[ \\t]+)") |
|
108 | 108 |
|
109 |
- (defconstant *dotted-dot-atom* |
|
110 |
- (format nil "[~a]+(\\.[~a]+)+" *atext-chars* *atext-chars*)) |
|
109 |
+ (defconstant *text* "[^\\r\\n]") |
|
111 | 110 |
|
112 |
- (defvar *rfc822-dotted-domain-re* |
|
113 |
- (format nil "^(~a)(@(~a))?$" *dot-atom* *dotted-dot-atom*)) |
|
111 |
+ (defconstant *quoted-pair* (format nil "\\\\~a" *text*)) |
|
114 | 112 |
|
115 |
- (defvar *rfc822-re* (format nil "^(~a)(@(~a))?$" *dot-atom* *dot-atom*)) |
|
116 |
- ) |
|
113 |
+ (defconstant *ctext* "[^\\s()\\\\]") |
|
114 |
+ |
|
115 |
+ ;; 1 means (xx) |
|
116 |
+ ;; 2 means (xxx (yyy) zzz) |
|
117 |
+ (defconstant *max-comment-level* 2) |
|
118 |
+ |
|
119 |
+ (defparameter *ccontent nil) |
|
120 |
+ (defparameter *comment* nil) |
|
121 |
+ |
|
122 |
+ (dotimes (n *max-comment-level*) |
|
123 |
+ (if* (null *comment*) |
|
124 |
+ then (setf *ccontent* (format nil "(?:~a|~a)" *ctext* *quoted-pair*)) |
|
125 |
+ else (setf *ccontent* (format nil "(?:~a|~a|~a)" |
|
126 |
+ *ctext* *quoted-pair* *comment*))) |
|
127 |
+ |
|
128 |
+ (setf *comment* (format nil "\\((?:~a?~a)*~a?\\)" |
|
129 |
+ *fws* *ccontent* *fws*))) |
|
130 |
+ |
|
131 |
+ (defconstant *cfws* (format nil "(?:~a?~a)*(?:(?:~a?~a)|~a)" |
|
132 |
+ *fws* *comment* *fws* *comment* *fws*)) |
|
133 |
+ |
|
134 |
+ (defconstant *atext* |
|
135 |
+ (format nil "[^\\s~a~a]" *controls* *specials*)) |
|
136 |
+ |
|
137 |
+ (defconstant *atom* (format nil "~a?(~a+)~a?" *cfws* *atext* *cfws*)) |
|
138 |
+ |
|
139 |
+ (defconstant *dot-atom-text* (format nil "~a+(?:\\.~a+)*" *atext* *atext*)) |
|
140 |
+ |
|
141 |
+ (defconstant *dot-atom* (format nil "~a?(~a)~a?" |
|
142 |
+ *cfws* *dot-atom-text* *cfws*)) |
|
143 |
+ |
|
144 |
+ ;; no control chars, no backslash, no quote |
|
145 |
+ (defconstant *qtext* (format nil "[^~a\\\\\"]" *controls*)) |
|
146 |
+ |
|
147 |
+ (defconstant *qcontent* (format nil "~a|~a" *qtext* *quoted-pair*)) |
|
117 | 148 |
|
149 |
+ (defconstant *quoted-string* |
|
150 |
+ (format nil "~a?\"((?:~a?~a)*~a?)\"~a?" |
|
151 |
+ *cfws* *fws* *qcontent* *fws* *cfws*)) |
|
152 |
+ |
|
153 |
+ (defconstant *local-part* |
|
154 |
+ (format nil "(~a)|(~a)" *dot-atom* *quoted-string*)) |
|
155 |
+ |
|
156 |
+ ;; domain literals not supported. |
|
157 |
+ (defconstant *domain* *dot-atom*) |
|
158 |
+ |
|
159 |
+ (defconstant *addr-spec* (format nil "(~a)@(~a)" *local-part* *domain*)) |
|
160 |
+ |
|
161 |
+ (defconstant *angle-addr* (format nil "~a?<~a>~a?" |
|
162 |
+ *cfws* *addr-spec* *cfws*)) |
|
163 |
+ |
|
164 |
+ (defconstant *word* (format nil "(?:~a|~a)" *atom* *quoted-string*)) |
|
165 |
+ |
|
166 |
+ (defconstant *phrase* (format nil "~a+" *word*)) |
|
167 |
+ |
|
168 |
+ (defconstant *display-name* *phrase*) |
|
169 |
+ |
|
170 |
+ (defconstant *name-addr* (format nil "~a?~a" *display-name* *angle-addr*)) |
|
171 |
+ |
|
172 |
+ (defconstant *mailbox* (format nil "(?:~a|~a)" *name-addr* *addr-spec*)) |
|
173 |
+ |
|
174 |
+ (defconstant *mailbox-list* |
|
175 |
+ (format nil "(?:~a(?:,~a)*)" *mailbox* *mailbox*)) |
|
176 |
+ |
|
177 |
+ (defconstant *group* |
|
178 |
+ (format nil "~a:(?:~a|~a)?;~a?" *display-name* *mailbox-list* *cfws* |
|
179 |
+ *cfws*)) |
|
180 |
+ |
|
181 |
+ ;; More strict than the RFC. |
|
182 |
+ |
|
183 |
+ (defconstant *email-address-re* |
|
184 |
+ (format nil "^\\s*(~a)(?:@(~a))?\\s*$" *dot-atom-text* *dot-atom-text*)) |
|
185 |
+ |
|
186 |
+ ) |
|
187 |
+ |
|
118 | 188 |
(defun parse-email-address (string &key (require-domain t) |
119 | 189 |
(require-dotted-domain t)) |
120 |
- (multiple-value-bind (matched whole user dummy1 dummy2 domain) |
|
121 |
- (if* require-dotted-domain |
|
122 |
- then (match-re #.*rfc822-dotted-domain-re* string) |
|
123 |
- else (match-re #.*rfc822-re* string)) |
|
124 |
- (declare (ignore whole dummy1 dummy2)) |
|
125 |
- (if (or (not matched) (and require-domain (null domain))) |
|
126 |
- nil |
|
127 |
- (values user domain)))) |
|
190 |
+ (multiple-value-bind (matched x user domain) |
|
191 |
+ (match-re #.*email-address-re* string) |
|
192 |
+ (declare (ignore x)) |
|
193 |
+ (if* (or |
|
194 |
+ ;; Failure cases |
|
195 |
+ (not matched) |
|
196 |
+ (and require-domain (null domain)) |
|
197 |
+ (and require-dotted-domain domain (zerop (count #\. domain)))) |
|
198 |
+ then nil |
|
199 |
+ else (values user domain)))) |
|
200 |
+ |
|
201 |
+;; Returns a list of entries like so: |
|
202 |
+;; (:mailbox user domain display-name) |
|
203 |
+;; or |
|
204 |
+;; (:group display-name mailbox-list) |
|
205 |
+ |
|
206 |
+(defun extract-email-addresses (string &key (start 0) (end (length string)) |
|
207 |
+ (errorp t)) |
|
208 |
+ ) |
|
209 |
+ |
|
210 |
+(defmacro parse-common (re) |
|
211 |
+ (let ((matched (gensym)) |
|
212 |
+ (whole (gensym)) |
|
213 |
+ (inner (gensym))) |
|
214 |
+ (setf re (format nil "^~a" (symbol-value re))) |
|
215 |
+ `(multiple-value-bind (,matched ,whole, inner) |
|
216 |
+ (match-re ,re string :start start :end end :return :index) |
|
217 |
+ (when ,matched |
|
218 |
+ (values (subseq string (car ,inner) (cdr ,inner)) |
|
219 |
+ (cdr ,whole)))))) |
|
220 |
+ |
|
221 |
+;; Domain literals not supported |
|
222 |
+;; local-part @ domain ==> |
|
223 |
+;; dot-atom/quoted-string @ dot-atom |
|
224 |
+;; Optionally allows domain-less addrspecs. However, doing so |
|
225 |
+;; makes parsing ambiguous. |
|
226 |
+(defun parse-addr-spec (string start end require-domain) |
|
227 |
+ (declare (optimize (speed 3)) |
|
228 |
+ (fixnum start end)) |
|
229 |
+ (block nil |
|
230 |
+ (multiple-value-bind (local-part newpos) |
|
231 |
+ (parse-local-part string start end) |
|
232 |
+ (if (null local-part) |
|
233 |
+ (return)) |
|
234 |
+ (setf start newpos) |
|
235 |
+ (when (or (eq start end) |
|
236 |
+ (not (eq (char string start) #\@))) |
|
237 |
+ ;; no domain part. |
|
238 |
+ (if* require-domain |
|
239 |
+ then (return) |
|
240 |
+ else (return (values local-part nil start)))) |
|
241 |
+ (incf start) |
|
242 |
+ (multiple-value-bind (domain newpos) |
|
243 |
+ (parse-common *dot-atom*) |
|
244 |
+ (if domain |
|
245 |
+ (values local-part domain newpos)))))) |
|
246 |
+ |
|
247 |
+(defun parse-local-part (string &optional (start 0) (end (length string))) |
|
248 |
+ (multiple-value-bind (dot-atom newpos) |
|
249 |
+ (parse-common *dot-atom*) |
|
250 |
+ (if* dot-atom |
|
251 |
+ then (values dot-atom newpos) |
|
252 |
+ else (multiple-value-bind (quoted-string newpos) |
|
253 |
+ (parse-common *quoted-string*) |
|
254 |
+ (when quoted-string |
|
255 |
+ (values quoted-string newpos)))))) |
|
128 | 256 |
|
129 | 257 |
;; Ripped from maild:dns.cl and modified. |
130 | 258 |
|