Browse code
rfe7210
dancy authored on 05/06/2007 17:15:18
Showing 2 changed files
Showing 2 changed files
... | ... |
@@ -1,4 +1,4 @@ |
1 |
-;; -*- mode: common-lisp; package: net.post-office -*- |
|
1 |
+;; -*- mode: common-lisp; package: net.mail -*- |
|
2 | 2 |
;; |
3 | 3 |
;; copyright (c) 1999-2002 Franz Inc, Berkeley, CA - All rights reserved. |
4 | 4 |
;; copyright (c) 2002-2007 Franz Inc, Oakland, CA - All rights reserved. |
... | ... |
@@ -14,7 +14,7 @@ |
14 | 14 |
;; merchantability or fitness for a particular purpose. See the GNU |
15 | 15 |
;; Lesser General Public License for more details. |
16 | 16 |
;; |
17 |
-;; $Id: rfc2822.cl,v 1.5 2007/06/01 16:21:38 dancy Exp $ |
|
17 |
+;; $Id: rfc2822.cl,v 1.6 2007/06/05 17:15:18 dancy Exp $ |
|
18 | 18 |
|
19 | 19 |
#+(version= 8 0) |
20 | 20 |
(sys:defpatch "rfc2822" 0 |
... | ... |
@@ -22,9 +22,16 @@ |
22 | 22 |
:type :system |
23 | 23 |
:post-loadable t) |
24 | 24 |
|
25 |
+#+(version= 8 1 :beta) |
|
26 |
+(sys:defpatch "rfc2822" 0 |
|
27 |
+ "v0: New extract-email-addresses function." |
|
28 |
+ :type :system |
|
29 |
+ :post-loadable t) |
|
30 |
+ |
|
25 | 31 |
(defpackage :net.mail |
26 | 32 |
(:use #:lisp #:excl) |
27 | 33 |
(:export #:parse-email-address |
34 |
+ #:extract-email-addresses |
|
28 | 35 |
#:valid-email-domain-p)) |
29 | 36 |
|
30 | 37 |
(in-package :net.mail) |
... | ... |
@@ -102,86 +109,15 @@ domain. |
102 | 109 |
|
103 | 110 |
(defconstant *specials* "()<>\\[\\]:;@\\,.\"") |
104 | 111 |
|
105 |
- (defconstant *no-ws-ctl* "\\x1-\\x8\\xb-\\xc\\xe-\\x1f\\x7f") |
|
106 |
- |
|
107 |
- (defconstant *fws* "(?:(?:[ \\t]*\\r?\\n)?[ \\t]+)") |
|
108 |
- |
|
109 |
- (defconstant *text* "[^\\r\\n]") |
|
110 |
- |
|
111 |
- (defconstant *quoted-pair* (format nil "\\\\~a" *text*)) |
|
112 |
- |
|
113 |
- (defconstant *ctext* "[^\\s()\\\\]") |
|
114 |
- |
|
115 |
- ;; 1 means (xx) |
|
116 |
- ;; 2 means (xxx (yyy) zzz) |
|
117 |
- (defconstant *max-comment-level* 2) |
|
118 |
- |
|
119 |
- (defparameter *ccontent nil) |
|
120 |
- (defparameter *comment* nil) |
|
121 |
- |
|
122 |
- (dotimes (n *max-comment-level*) |
|
123 |
- (if* (null *comment*) |
|
124 |
- then (setf *ccontent* (format nil "(?:~a|~a)" *ctext* *quoted-pair*)) |
|
125 |
- else (setf *ccontent* (format nil "(?:~a|~a|~a)" |
|
126 |
- *ctext* *quoted-pair* *comment*))) |
|
127 |
- |
|
128 |
- (setf *comment* (format nil "\\((?:~a?~a)*~a?\\)" |
|
129 |
- *fws* *ccontent* *fws*))) |
|
130 |
- |
|
131 |
- (defconstant *cfws* (format nil "(?:~a?~a)*(?:(?:~a?~a)|~a)" |
|
132 |
- *fws* *comment* *fws* *comment* *fws*)) |
|
133 |
- |
|
134 | 112 |
(defconstant *atext* |
135 | 113 |
(format nil "[^\\s~a~a]" *controls* *specials*)) |
136 |
- |
|
137 |
- (defconstant *atom* (format nil "~a?(~a+)~a?" *cfws* *atext* *cfws*)) |
|
138 |
- |
|
139 |
- (defconstant *dot-atom-text* (format nil "~a+(?:\\.~a+)*" *atext* *atext*)) |
|
140 |
- |
|
141 |
- (defconstant *dot-atom* (format nil "~a?(~a)~a?" |
|
142 |
- *cfws* *dot-atom-text* *cfws*)) |
|
143 |
- |
|
144 |
- ;; no control chars, no backslash, no quote |
|
145 |
- (defconstant *qtext* (format nil "[^~a\\\\\"]" *controls*)) |
|
146 |
- |
|
147 |
- (defconstant *qcontent* (format nil "~a|~a" *qtext* *quoted-pair*)) |
|
148 | 114 |
|
149 |
- (defconstant *quoted-string* |
|
150 |
- (format nil "~a?\"((?:~a?~a)*~a?)\"~a?" |
|
151 |
- *cfws* *fws* *qcontent* *fws* *cfws*)) |
|
152 |
- |
|
153 |
- (defconstant *local-part* |
|
154 |
- (format nil "(~a)|(~a)" *dot-atom* *quoted-string*)) |
|
155 |
- |
|
156 |
- (defconstant *local-part-x* |
|
157 |
- (format nil "^(?:~a)|(?:~a)" *dot-atom* *quoted-string*)) |
|
158 |
- |
|
159 |
- ;; domain literals not supported. |
|
160 |
- (defconstant *domain* *dot-atom*) |
|
161 |
- |
|
162 |
- (defconstant *addr-spec* (format nil "(~a)@(~a)" *local-part* *domain*)) |
|
163 |
- |
|
164 |
- (defconstant *angle-addr* (format nil "~a?<~a>~a?" |
|
165 |
- *cfws* *addr-spec* *cfws*)) |
|
166 |
- |
|
167 |
- (defconstant *word* (format nil "(?:~a|~a)" *atom* *quoted-string*)) |
|
168 |
- |
|
169 |
- (defconstant *phrase* (format nil "~a+" *word*)) |
|
170 |
- |
|
171 |
- (defconstant *display-name* *phrase*) |
|
172 |
- |
|
173 |
- (defconstant *name-addr* (format nil "~a?~a" *display-name* *angle-addr*)) |
|
174 |
- |
|
175 |
- (defconstant *mailbox* (format nil "(?:~a|~a)" *name-addr* *addr-spec*)) |
|
176 |
- |
|
177 |
- (defconstant *mailbox-list* |
|
178 |
- (format nil "(?:~a(?:,~a)*)" *mailbox* *mailbox*)) |
|
115 |
+ (defconstant *atom* (format nil "^~a+" *atext*)) |
|
179 | 116 |
|
180 |
- (defconstant *group* |
|
181 |
- (format nil "~a:(?:~a|~a)?;~a?" *display-name* *mailbox-list* *cfws* |
|
182 |
- *cfws*)) |
|
117 |
+ (defconstant *dot-atom-text* (format nil "~a+(?:\\.~a+)*" *atext* *atext*)) |
|
183 | 118 |
|
184 |
- ;; More strict than the RFC. |
|
119 |
+ ;; More strict than the RFC, but good for verifying syntax of email |
|
120 |
+ ;; addresses that a user supplies. |
|
185 | 121 |
|
186 | 122 |
(defconstant *email-address-re* |
187 | 123 |
(format nil "^\\s*(~a)(?:@(~a))?\\s*$" *dot-atom-text* *dot-atom-text*)) |
... | ... |
@@ -201,6 +137,284 @@ domain. |
201 | 137 |
then nil |
202 | 138 |
else (values user domain)))) |
203 | 139 |
|
140 |
+;; Returns a list of entries like so: |
|
141 |
+;; (:mailbox display-name user domain) |
|
142 |
+;; or |
|
143 |
+;; (:group display-name mailbox-list) |
|
144 |
+ |
|
145 |
+(defun extract-email-addresses (string &key (start 0) (end (length string)) |
|
146 |
+ (require-domain t) (errorp t)) |
|
147 |
+ (declare (optimize (speed 3)) |
|
148 |
+ (fixnum start end)) |
|
149 |
+ (with-underlying-simple-vector (string string disp) |
|
150 |
+ (declare (simple-string string) |
|
151 |
+ (fixnum disp)) |
|
152 |
+ (incf start disp) |
|
153 |
+ (incf end disp) |
|
154 |
+ |
|
155 |
+ ;; Unfold. |
|
156 |
+ (when (match-re "\\r?\\n\\s" string :start start :end end) |
|
157 |
+ (setf string (replace-re string "\\r?\\n\\s" " " |
|
158 |
+ :start start :end end)) |
|
159 |
+ (setf start 0) |
|
160 |
+ (setf end (length string))) |
|
161 |
+ |
|
162 |
+ (let ((res |
|
163 |
+ (catch 'syntax-error |
|
164 |
+ (parse-address-list string start end require-domain)))) |
|
165 |
+ (if* (stringp res) |
|
166 |
+ then (if errorp (error res)) |
|
167 |
+ elseif (null res) |
|
168 |
+ then (if errorp |
|
169 |
+ (error "Failed to parse: ~s" (subseq string start end))) |
|
170 |
+ nil |
|
171 |
+ else res)))) |
|
172 |
+ |
|
173 |
+(macrolet ((parse-special (char skip-ws) |
|
174 |
+ `(multiple-value-bind (type value newpos) |
|
175 |
+ (rfc2822-lex string start end ,skip-ws) |
|
176 |
+ (declare (ignore type)) |
|
177 |
+ (when (eq value ,char) |
|
178 |
+ (setf start newpos))))) |
|
179 |
+ |
|
180 |
+ ;; Supports obsolete format which allows for null members in the list. |
|
181 |
+ (defun parse-address-list (string start end require-domain) |
|
182 |
+ (let (res) |
|
183 |
+ (loop |
|
184 |
+ (while (parse-special #\, t)) |
|
185 |
+ (multiple-value-bind (addr newpos) |
|
186 |
+ (parse-address string start end require-domain) |
|
187 |
+ (if (null addr) |
|
188 |
+ (return)) |
|
189 |
+ (setf start newpos) |
|
190 |
+ (push addr res))) |
|
191 |
+ (values (nreverse res) start))) |
|
192 |
+ |
|
193 |
+ (defun parse-address (string start end require-domain) |
|
194 |
+ (multiple-value-bind (mb newpos) |
|
195 |
+ (parse-mailbox string start end require-domain) |
|
196 |
+ (if* mb |
|
197 |
+ then (values mb newpos) |
|
198 |
+ else (parse-group string start end require-domain)))) |
|
199 |
+ |
|
200 |
+ |
|
201 |
+ (defun parse-mailbox (string start end require-domain) |
|
202 |
+ (multiple-value-bind (ok display-name localpart domain newpos) |
|
203 |
+ (parse-name-addr string start end require-domain) |
|
204 |
+ (if ok |
|
205 |
+ (return-from parse-mailbox |
|
206 |
+ (values |
|
207 |
+ (list :mailbox display-name localpart domain) |
|
208 |
+ newpos)))) |
|
209 |
+ (multiple-value-bind (localpart domain newpos) |
|
210 |
+ (parse-addr-spec string start end require-domain) |
|
211 |
+ (when localpart |
|
212 |
+ (setf start newpos) |
|
213 |
+ ;; Check for a trailing comment and use that as the display name |
|
214 |
+ (multiple-value-bind (display-name newpos) |
|
215 |
+ (grab-next-comment string start end) |
|
216 |
+ (if display-name |
|
217 |
+ (setf start newpos)) |
|
218 |
+ (values |
|
219 |
+ (list :mailbox display-name localpart domain) |
|
220 |
+ start))))) |
|
221 |
+ |
|
222 |
+ (defun grab-next-comment (string start end) |
|
223 |
+ (loop |
|
224 |
+ (multiple-value-bind (type value newpos) |
|
225 |
+ (rfc2822-lex string start end nil) |
|
226 |
+ (if (eq type :comment) |
|
227 |
+ (return (values (replace-re value "^\\((.*)\\)$" "\\1") newpos))) |
|
228 |
+ (if* (eq type :wsp) |
|
229 |
+ then (setf start newpos) |
|
230 |
+ else (return))))) |
|
231 |
+ |
|
232 |
+ (defun parse-group (string start end require-domain) |
|
233 |
+ (multiple-value-bind (display-name newpos) |
|
234 |
+ (parse-phrase string start end) |
|
235 |
+ (when display-name |
|
236 |
+ (setf start newpos) |
|
237 |
+ (when (parse-special #\: t) |
|
238 |
+ (multiple-value-bind (mailbox-list newpos) |
|
239 |
+ (parse-mailbox-list string start end require-domain) |
|
240 |
+ (setf start newpos) |
|
241 |
+ (when (parse-special #\; t) |
|
242 |
+ (values :group mailbox-list newpos))))))) |
|
243 |
+ |
|
244 |
+ (defun parse-mailbox-list (string start end require-domain) |
|
245 |
+ (let (res) |
|
246 |
+ (loop |
|
247 |
+ (multiple-value-bind (mailbox newpos) |
|
248 |
+ (parse-mailbox string start end require-domain) |
|
249 |
+ (if (null mailbox) |
|
250 |
+ (return)) |
|
251 |
+ (push mailbox res) |
|
252 |
+ (setf start newpos) |
|
253 |
+ (if (not (parse-special #\, t)) |
|
254 |
+ (return)))) |
|
255 |
+ (values (nreverse res) start))) |
|
256 |
+ |
|
257 |
+ (defun parse-name-addr (string start end require-domain) |
|
258 |
+ (multiple-value-bind (display-name newpos) |
|
259 |
+ (parse-phrase string start end) |
|
260 |
+ (if display-name |
|
261 |
+ (setf start newpos)) |
|
262 |
+ (multiple-value-bind (localpart domain newpos) |
|
263 |
+ (parse-angle-addr string start end require-domain) |
|
264 |
+ (when localpart |
|
265 |
+ (values t display-name localpart domain newpos))))) |
|
266 |
+ |
|
267 |
+ ;; This is obs-phrase, which is seen often. For example: |
|
268 |
+ ;; From: Mr. T <mr.t@pitythefool.com> |
|
269 |
+ (defun parse-phrase (string start end) |
|
270 |
+ (let ((first t) |
|
271 |
+ res type value newpos) |
|
272 |
+ (loop |
|
273 |
+ (multiple-value-setq (type value newpos) |
|
274 |
+ (rfc2822-lex string start end first)) |
|
275 |
+ (if (null type) |
|
276 |
+ nil) |
|
277 |
+ (if* (or (eq type :atom) |
|
278 |
+ (eq type :quoted-string) |
|
279 |
+ (and (not first) (or (eq value #\.) (eq type :wsp)))) |
|
280 |
+ then (push value res) |
|
281 |
+ (setf first nil) |
|
282 |
+ (setf start newpos) |
|
283 |
+ else (return))) |
|
284 |
+ (if (and res (match-re "^\\s" (first res))) |
|
285 |
+ (pop res)) |
|
286 |
+ (if res |
|
287 |
+ (values (list-to-delimited-string (nreverse res) "") start)))) |
|
288 |
+ |
|
289 |
+ (defun parse-angle-addr (string start end require-domain) |
|
290 |
+ (when (parse-special #\< t) |
|
291 |
+ (multiple-value-bind (localpart domain newpos) |
|
292 |
+ (parse-addr-spec string start end require-domain) |
|
293 |
+ (setf start newpos) |
|
294 |
+ (when (and localpart (parse-special #\> t)) |
|
295 |
+ (values localpart domain start))))) |
|
296 |
+ |
|
297 |
+ (defun parse-addr-spec (string start end require-domain) |
|
298 |
+ (multiple-value-bind (localpart newpos) |
|
299 |
+ (parse-local-part string start end) |
|
300 |
+ (when localpart |
|
301 |
+ (setf start newpos) |
|
302 |
+ (when (not (parse-special #\@ t)) |
|
303 |
+ (if* require-domain |
|
304 |
+ then (return-from parse-addr-spec) |
|
305 |
+ else (return-from parse-addr-spec |
|
306 |
+ (values localpart nil start)))) |
|
307 |
+ (multiple-value-bind (domain newpos) |
|
308 |
+ (parse-dot-atom string start end) |
|
309 |
+ (when domain |
|
310 |
+ (values localpart domain newpos))))))) |
|
311 |
+ |
|
312 |
+(defun parse-local-part (string start end) |
|
313 |
+ (multiple-value-bind (type value newpos) |
|
314 |
+ (rfc2822-lex string start end t) |
|
315 |
+ (if* (eq type :quoted-string) |
|
316 |
+ then (values value newpos) |
|
317 |
+ elseif (eq type :atom) |
|
318 |
+ then (parse-dot-atom string start end)))) |
|
319 |
+ |
|
320 |
+(defun parse-dot-atom (string start end) |
|
321 |
+ (let ((first t) |
|
322 |
+ res) |
|
323 |
+ (loop |
|
324 |
+ (multiple-value-bind (type value newpos) |
|
325 |
+ (rfc2822-lex string start end first) |
|
326 |
+ (setf first nil) |
|
327 |
+ (if (null type) |
|
328 |
+ (return)) |
|
329 |
+ (if* (eq type :atom) |
|
330 |
+ then (push value res) |
|
331 |
+ elseif (not (eq value #\.)) |
|
332 |
+ then (return)) |
|
333 |
+ (setf start newpos))) |
|
334 |
+ (if res |
|
335 |
+ (values (list-to-delimited-string (nreverse res) #\.) start)))) |
|
336 |
+ |
|
337 |
+(eval-when (compile) |
|
338 |
+ (defconstant *max-comment-nesting* 3) |
|
339 |
+ |
|
340 |
+ (defparameter *cchar* "(?:[^()\\\\]|\\\\.)") |
|
341 |
+ (defparameter *comment* nil) |
|
342 |
+ |
|
343 |
+ (dotimes (n *max-comment-nesting*) |
|
344 |
+ (if* *comment* |
|
345 |
+ then (setf *comment* (format nil "(?:\\((?:~a|~a)*\\))" |
|
346 |
+ *cchar* *comment*)) |
|
347 |
+ else (setf *comment* (format nil "(?:\\(~a*\\))" *cchar*)))) |
|
348 |
+ |
|
349 |
+ (setf *comment* (format nil "^~a" *comment*))) |
|
350 |
+ |
|
351 |
+(defun rfc2822-lex (string start end skip-ws) |
|
352 |
+ (declare (optimize (speed 3)) |
|
353 |
+ (simple-string string) |
|
354 |
+ (fixnum start end)) |
|
355 |
+ (when (< start end) |
|
356 |
+ (let ((char (schar string start))) |
|
357 |
+ (if* (eq char #\") |
|
358 |
+ then ;; quoted string. |
|
359 |
+ (multiple-value-bind (matched whole) |
|
360 |
+ (match-re "^\"((?:[^\\\\\"]|\\\\.)*)\"" string |
|
361 |
+ :start start :end end |
|
362 |
+ :return :index) |
|
363 |
+ (if (not matched) |
|
364 |
+ (throw 'syntax-error "Unterminated quoted string")) |
|
365 |
+ (values :quoted-string |
|
366 |
+ (subseq string (car whole) (cdr whole)) |
|
367 |
+ (cdr whole))) |
|
368 |
+ elseif (or (eq char #\space) (eq char #\tab)) |
|
369 |
+ then ;; whitespace |
|
370 |
+ (multiple-value-bind (x match) |
|
371 |
+ (match-re "^\\s+" string |
|
372 |
+ :start start :end end :return :index) |
|
373 |
+ (declare (ignore x)) |
|
374 |
+ (if* skip-ws |
|
375 |
+ then (rfc2822-lex string (cdr match) end t) |
|
376 |
+ else (values :wsp |
|
377 |
+ (subseq string (car match) (cdr match)) |
|
378 |
+ (cdr match)))) |
|
379 |
+ elseif (eq char #\() |
|
380 |
+ then ;; comment |
|
381 |
+ (multiple-value-bind (matched whole) |
|
382 |
+ (match-re #.*comment* string |
|
383 |
+ :start start :end end :return :index) |
|
384 |
+ (if (not matched) |
|
385 |
+ (throw 'syntax-error |
|
386 |
+ "Unterminated comment or nesting too deep")) |
|
387 |
+ (if* skip-ws |
|
388 |
+ then (rfc2822-lex string (cdr whole) end t) |
|
389 |
+ else (values :comment |
|
390 |
+ (subseq string (car whole) (cdr whole)) |
|
391 |
+ (cdr whole)))) |
|
392 |
+ else (multiple-value-bind (matched whole) |
|
393 |
+ (match-re *atom* string :start start :end end |
|
394 |
+ :return :index) |
|
395 |
+ (if* (not matched) |
|
396 |
+ then ;; must be a special |
|
397 |
+ (values :special |
|
398 |
+ char |
|
399 |
+ (1+ start)) |
|
400 |
+ else ;; atom |
|
401 |
+ (values :atom |
|
402 |
+ (subseq string (car whole) (cdr whole)) |
|
403 |
+ (cdr whole)))))))) |
|
404 |
+ |
|
405 |
+#+ignore |
|
406 |
+(defun test () |
|
407 |
+ (dolist (file (command-output "find ~/mail/ -name \"[0-9][0-9]*\"")) |
|
408 |
+ (with-open-file (f file) |
|
409 |
+ (let* ((part (parse-mime-structure f)) |
|
410 |
+ (hdrs (mime-part-headers part))) |
|
411 |
+ (dolist (type '("From" "To" "Cc")) |
|
412 |
+ (let ((hdr (cdr (assoc type hdrs :test #'equalp)))) |
|
413 |
+ (when hdr |
|
414 |
+ (if (null (extract-email-addresses hdr :require-domain nil |
|
415 |
+ :errorp nil)) |
|
416 |
+ (format t "Failed to parse: ~s~%" hdr))))))))) |
|
417 |
+ |
|
204 | 418 |
;; Ripped from maild:dns.cl and modified. |
205 | 419 |
|
206 | 420 |
(eval-when (compile load eval) |