texternal-format.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
(HTM) git clone git://bitreich.org/clic/ git://hg6vgqziawt5s4dj.onion/clic/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) LICENSE
---
texternal-format.lisp (17187B)
---
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/flexi-streams/external-format.lisp,v 1.24 2008/05/26 10:55:08 edi Exp $
3
4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
5
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
8 ;;; are met:
9
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
12
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
17
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 (in-package :flexi-streams)
31
32 (defclass external-format ()
33 ((name :initarg :name
34 :reader external-format-name
35 :documentation "The name of the external format - a
36 keyword.")
37 (id :initarg :id
38 :initform nil
39 :reader external-format-id
40 :documentation "If the external format denotes a Windows
41 code page this ID specifies which one to use. Otherwise the
42 value is ignored \(and usually NIL).")
43 (little-endian :initarg :little-endian
44 :initform *default-little-endian*
45 :reader external-format-little-endian
46 :documentation "Whether multi-octet values are
47 read and written with the least significant octet first. For
48 8-bit encodings like :ISO-8859-1 this value is ignored.")
49 (eol-style :initarg :eol-style
50 :reader external-format-eol-style
51 :documentation "The character\(s) to or from which
52 a #\Newline will be translated - one of the keywords :CR, :LF,
53 or :CRLF."))
54 (:documentation "EXTERNAL-FORMAT objects are used to denote
55 encodings for flexi streams or for the string functions defined in
56 strings.lisp."))
57
58 (defmethod make-load-form ((thing external-format) &optional environment)
59 "Defines a way to reconstruct external formats. Needed for OpenMCL."
60 (make-load-form-saving-slots thing :environment environment))
61
62 (defclass flexi-cr-mixin ()
63 ()
64 (:documentation "A mixin for external-formats where the end-of-line
65 designator is #\Return."))
66
67 (defclass flexi-crlf-mixin ()
68 ()
69 (:documentation "A mixin for external-formats where the end-of-line
70 designator is the sequence #\Return #\Linefeed."))
71
72 (defclass flexi-8-bit-format (external-format)
73 ((encoding-hash :accessor external-format-encoding-hash)
74 (decoding-table :accessor external-format-decoding-table))
75 (:documentation "The class for all flexi streams which use an 8-bit
76 encoding and thus need additional slots for the encoding/decoding
77 tables."))
78
79 (defclass flexi-cr-8-bit-format (flexi-cr-mixin flexi-8-bit-format)
80 ()
81 (:documentation "Special class for external formats which use an
82 8-bit encoding /and/ have #\Return as the line-end character."))
83
84 (defclass flexi-crlf-8-bit-format (flexi-crlf-mixin flexi-8-bit-format)
85 ()
86 (:documentation "Special class for external formats which use an
87 8-bit encoding /and/ have the sequence #\Return #\Linefeed as the
88 line-end character."))
89
90 (defclass flexi-ascii-format (flexi-8-bit-format)
91 ()
92 (:documentation "Special class for external formats which use the
93 US-ASCII encoding."))
94
95 (defclass flexi-cr-ascii-format (flexi-cr-mixin flexi-ascii-format)
96 ()
97 (:documentation "Special class for external formats which use the
98 US-ASCII encoding /and/ have #\Return as the line-end character."))
99
100 (defclass flexi-crlf-ascii-format (flexi-crlf-mixin flexi-ascii-format)
101 ()
102 (:documentation "Special class for external formats which use the
103 US-ASCII encoding /and/ have the sequence #\Return #\Linefeed as the
104 line-end character."))
105
106 (defclass flexi-latin-1-format (flexi-8-bit-format)
107 ()
108 (:documentation "Special class for external formats which use the
109 ISO-8859-1 encoding."))
110
111 (defclass flexi-cr-latin-1-format (flexi-cr-mixin flexi-latin-1-format)
112 ()
113 (:documentation "Special class for external formats which use the
114 ISO-8859-1 encoding /and/ have #\Return as the line-end character."))
115
116 (defclass flexi-crlf-latin-1-format (flexi-crlf-mixin flexi-latin-1-format)
117 ()
118 (:documentation "Special class for external formats which use the
119 ISO-8859-1 encoding /and/ have the sequence #\Return #\Linefeed as the
120 line-end character."))
121
122 (defclass flexi-utf-32-format (external-format)
123 ()
124 (:documentation "Abstract class for external formats which use the
125 UTF-32 encoding."))
126
127 (defclass flexi-utf-32-le-format (flexi-utf-32-format)
128 ()
129 (:documentation "Special class for external formats which use the
130 UTF-32 encoding with little-endian byte ordering."))
131
132 (defclass flexi-cr-utf-32-le-format (flexi-cr-mixin flexi-utf-32-le-format)
133 ()
134 (:documentation "Special class for external formats which use the
135 UTF-32 encoding with little-endian byte ordering /and/ have #\Return
136 as the line-end character."))
137
138 (defclass flexi-crlf-utf-32-le-format (flexi-crlf-mixin flexi-utf-32-le-format)
139 ()
140 (:documentation "Special class for external formats which use the
141 UTF-32 encoding with little-endian byte ordering /and/ have the
142 sequence #\Return #\Linefeed as the line-end character."))
143
144 (defclass flexi-utf-32-be-format (flexi-utf-32-format)
145 ()
146 (:documentation "Special class for external formats which use the
147 UTF-32 encoding with big-endian byte ordering."))
148
149 (defclass flexi-cr-utf-32-be-format (flexi-cr-mixin flexi-utf-32-be-format)
150 ()
151 (:documentation "Special class for external formats which use the
152 UTF-32 encoding with big-endian byte ordering /and/ have #\Return as
153 the line-end character."))
154
155 (defclass flexi-crlf-utf-32-be-format (flexi-crlf-mixin flexi-utf-32-be-format)
156 ()
157 (:documentation "Special class for external formats which use the
158 the UTF-32 encoding with big-endian byte ordering /and/ have the
159 sequence #\Return #\Linefeed as the line-end character."))
160
161 (defclass flexi-utf-16-format (external-format)
162 ()
163 (:documentation "Abstract class for external formats which use the
164 UTF-16 encoding."))
165
166 (defclass flexi-utf-16-le-format (flexi-utf-16-format)
167 ()
168 (:documentation "Special class for external formats which use the
169 UTF-16 encoding with little-endian byte ordering."))
170
171 (defclass flexi-cr-utf-16-le-format (flexi-cr-mixin flexi-utf-16-le-format)
172 ()
173 (:documentation "Special class for external formats which use the
174 UTF-16 encoding with little-endian byte ordering /and/ have #\Return
175 as the line-end character."))
176
177 (defclass flexi-crlf-utf-16-le-format (flexi-crlf-mixin flexi-utf-16-le-format)
178 ()
179 (:documentation "Special class for external formats which use the
180 UTF-16 encoding with little-endian byte ordering /and/ have the
181 sequence #\Return #\Linefeed as the line-end character."))
182
183 (defclass flexi-utf-16-be-format (flexi-utf-16-format)
184 ()
185 (:documentation "Special class for external formats which use the
186 UTF-16 encoding with big-endian byte ordering."))
187
188 (defclass flexi-cr-utf-16-be-format (flexi-cr-mixin flexi-utf-16-be-format)
189 ()
190 (:documentation "Special class for external formats which use the
191 UTF-16 encoding with big-endian byte ordering /and/ have #\Return as
192 the line-end character."))
193
194 (defclass flexi-crlf-utf-16-be-format (flexi-crlf-mixin flexi-utf-16-be-format)
195 ()
196 (:documentation "Special class for external formats which use the
197 UTF-16 encoding with big-endian byte ordering /and/ have the sequence
198 #\Return #\Linefeed as the line-end character."))
199
200 (defclass flexi-utf-8-format (external-format)
201 ()
202 (:documentation "Special class for external formats which use the
203 UTF-8 encoding."))
204
205 (defclass flexi-cr-utf-8-format (flexi-cr-mixin flexi-utf-8-format)
206 ()
207 (:documentation "Special class for external formats which use the
208 UTF-8 encoding /and/ have #\Return as the line-end character."))
209
210 (defclass flexi-crlf-utf-8-format (flexi-crlf-mixin flexi-utf-8-format)
211 ()
212 (:documentation "Special class for external formats which use the
213 UTF-8 encoding /and/ have the sequence #\Return #\Linefeed as the
214 line-end character."))
215
216 (defmethod initialize-instance :after ((external-format flexi-8-bit-format) &rest initargs)
217 "Sets the fixed encoding/decoding tables for this particular
218 external format."
219 (declare #.*standard-optimize-settings*)
220 (declare (ignore initargs))
221 (with-accessors ((encoding-hash external-format-encoding-hash)
222 (decoding-table external-format-decoding-table)
223 (name external-format-name)
224 (id external-format-id))
225 external-format
226 (multiple-value-setq (encoding-hash decoding-table)
227 (cond ((ascii-name-p name)
228 (values +ascii-hash+ +ascii-table+))
229 ((koi8-r-name-p name)
230 (values +koi8-r-hash+ +koi8-r-table+))
231 ((iso-8859-name-p name)
232 (values (cdr (assoc name +iso-8859-hashes+ :test #'eq))
233 (cdr (assoc name +iso-8859-tables+ :test #'eq))))
234 ((code-page-name-p name)
235 (values (cdr (assoc id +code-page-hashes+))
236 (cdr (assoc id +code-page-tables+))))))))
237
238 (defun external-format-class-name (real-name &key eol-style little-endian id)
239 "Given the initargs for a general external format returns the name
240 \(a symbol) of the most specific subclass matching these arguments."
241 (declare #.*standard-optimize-settings*)
242 (declare (ignore id))
243 (cond ((ascii-name-p real-name)
244 (ecase eol-style
245 (:lf 'flexi-ascii-format)
246 (:cr 'flexi-cr-ascii-format)
247 (:crlf 'flexi-crlf-ascii-format)))
248 ((eq real-name :iso-8859-1)
249 (ecase eol-style
250 (:lf 'flexi-latin-1-format)
251 (:cr 'flexi-cr-latin-1-format)
252 (:crlf 'flexi-crlf-latin-1-format)))
253 ((or (koi8-r-name-p real-name)
254 (iso-8859-name-p real-name)
255 (code-page-name-p real-name))
256 (ecase eol-style
257 (:lf 'flexi-8-bit-format)
258 (:cr 'flexi-cr-8-bit-format)
259 (:crlf 'flexi-crlf-8-bit-format)))
260 (t (ecase real-name
261 (:utf-8 (ecase eol-style
262 (:lf 'flexi-utf-8-format)
263 (:cr 'flexi-cr-utf-8-format)
264 (:crlf 'flexi-crlf-utf-8-format)))
265 (:utf-16 (ecase eol-style
266 (:lf (if little-endian
267 'flexi-utf-16-le-format
268 'flexi-utf-16-be-format))
269 (:cr (if little-endian
270 'flexi-cr-utf-16-le-format
271 'flexi-cr-utf-16-be-format))
272 (:crlf (if little-endian
273 'flexi-crlf-utf-16-le-format
274 'flexi-crlf-utf-16-be-format))))
275 (:utf-32 (ecase eol-style
276 (:lf (if little-endian
277 'flexi-utf-32-le-format
278 'flexi-utf-32-be-format))
279 (:cr (if little-endian
280 'flexi-cr-utf-32-le-format
281 'flexi-cr-utf-32-be-format))
282 (:crlf (if little-endian
283 'flexi-crlf-utf-32-le-format
284 'flexi-crlf-utf-32-be-format))))))))
285
286 (defun make-external-format% (name &key (little-endian *default-little-endian*)
287 id eol-style)
288 "Used internally by MAKE-EXTERNAL-FORMAT to default some of the
289 keywords arguments and to determine the right subclass of
290 EXTERNAL-FORMAT."
291 (declare #.*standard-optimize-settings*)
292 (let* ((real-name (normalize-external-format-name name))
293 (initargs
294 (cond ((or (iso-8859-name-p real-name)
295 (koi8-r-name-p real-name)
296 (ascii-name-p real-name))
297 (list :eol-style (or eol-style *default-eol-style*)))
298 ((code-page-name-p real-name)
299 (list :id (or (known-code-page-id-p id)
300 (error 'external-format-error
301 :format-control "Unknown code page ID ~S"
302 :format-arguments (list id)))
303 ;; default EOL style for Windows code pages is :CRLF
304 :eol-style (or eol-style :crlf)))
305 (t (list :eol-style (or eol-style *default-eol-style*)
306 :little-endian little-endian)))))
307 (apply #'make-instance (apply #'external-format-class-name real-name initargs)
308 :name real-name
309 initargs)))
310
311 (defun make-external-format (name &rest args
312 &key (little-endian *default-little-endian*)
313 id eol-style)
314 "Creates and returns an external format object as specified.
315 NAME is a keyword like :LATIN1 or :UTF-8, LITTLE-ENDIAN specifies
316 the `endianess' of the external format and is ignored for 8-bit
317 encodings, EOL-STYLE is one of the keywords :CR, :LF, or :CRLF
318 which denote the end-of-line character \(sequence), ID is the ID
319 of a Windows code page \(and ignored for other encodings)."
320 (declare #.*standard-optimize-settings*)
321 ;; the keyword arguments are only there for arglist display in the IDE
322 (declare (ignore id little-endian))
323 (let ((shortcut-args (cdr (assoc name +shortcut-map+ :test #'string-equal))))
324 (cond (shortcut-args
325 (apply #'make-external-format%
326 (append shortcut-args
327 `(:eol-style ,eol-style))))
328 (t (apply #'make-external-format% name args)))))
329
330 (defun maybe-convert-external-format (external-format)
331 "Given an external format designator \(a keyword, a list, or an
332 EXTERNAL-FORMAT object) returns the corresponding EXTERNAL-FORMAT
333 object."
334 (declare #.*standard-optimize-settings*)
335 (typecase external-format
336 (symbol (make-external-format external-format))
337 (list (apply #'make-external-format external-format))
338 (otherwise external-format)))
339
340 (defun external-format-equal (ef1 ef2)
341 "Checks whether two EXTERNAL-FORMAT objects denote the same encoding."
342 (declare #.*standard-optimize-settings*)
343 (let* ((name1 (external-format-name ef1))
344 (code-page-name-p (code-page-name-p name1)))
345 ;; they must habe the same canonical name
346 (and (eq name1
347 (external-format-name ef2))
348 ;; if both are code pages the IDs must be the same
349 (or (not code-page-name-p)
350 (eql (external-format-id ef1)
351 (external-format-id ef2)))
352 ;; for non-8-bit encodings the endianess must be the same
353 (or code-page-name-p
354 (ascii-name-p name1)
355 (koi8-r-name-p name1)
356 (iso-8859-name-p name1)
357 (eq name1 :utf-8)
358 (eq (not (external-format-little-endian ef1))
359 (not (external-format-little-endian ef2))))
360 ;; the EOL style must also be the same
361 (eq (external-format-eol-style ef1)
362 (external-format-eol-style ef2)))))
363
364 (defun normalize-external-format (external-format)
365 "Returns a list which is a `normalized' representation of the
366 external format EXTERNAL-FORMAT. Used internally by PRINT-OBJECT, for
367 example. Basically, the result is an argument list that can be fed
368 back to MAKE-EXTERNAL-FORMAT to create an equivalent object."
369 (declare #.*standard-optimize-settings*)
370 (let ((name (external-format-name external-format))
371 (eol-style (external-format-eol-style external-format)))
372 (cond ((or (ascii-name-p name)
373 (koi8-r-name-p name)
374 (iso-8859-name-p name)
375 (eq name :utf-8))
376 (list name :eol-style eol-style))
377 ((code-page-name-p name)
378 (list name
379 :id (external-format-id external-format)
380 :eol-style eol-style))
381 (t (list name
382 :eol-style eol-style
383 :little-endian (external-format-little-endian external-format))))))
384
385 (defmethod print-object ((object external-format) stream)
386 "How an EXTERNAL-FORMAT object is rendered. Uses
387 NORMALIZE-EXTERNAL-FORMAT."
388 (print-unreadable-object (object stream :type t :identity t)
389 (prin1 (normalize-external-format object) stream)))