encode.lisp - clic - Clic is an command line interactive client for gopher written in Common LISP
(HTM) git clone git://bitreich.org/clic/ git://enlrupgkhuxnvlhsf6lc3fziv5h2hhfrinws65d7roiv6bfj7d652fid.onion/clic/
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) Tags
(DIR) README
(DIR) LICENSE
---
encode.lisp (14245B)
---
1 ;;; -*- Mode: LISP; Syntax: COMMON-LISP; Package: FLEXI-STREAMS; Base: 10 -*-
2 ;;; $Header: /usr/local/cvsrep/flexi-streams/encode.lisp,v 1.26 2008/05/26 10:55:08 edi Exp $
3
4 ;;; Copyright (c) 2005-2008, Dr. Edmund Weitz. All rights reserved.
5
6 ;;; Redistribution and use in source and binary forms, with or without
7 ;;; modification, are permitted provided that the following conditions
8 ;;; are met:
9
10 ;;; * Redistributions of source code must retain the above copyright
11 ;;; notice, this list of conditions and the following disclaimer.
12
13 ;;; * Redistributions in binary form must reproduce the above
14 ;;; copyright notice, this list of conditions and the following
15 ;;; disclaimer in the documentation and/or other materials
16 ;;; provided with the distribution.
17
18 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
19 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
22 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
24 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
27 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 (in-package :flexi-streams)
31
32 (defgeneric char-to-octets (format char writer)
33 (declare #.*standard-optimize-settings*)
34 (:documentation "Converts the character CHAR to a sequence of octets
35 using the external format FORMAT. The conversion is performed by
36 calling the unary function \(which must be a functional object) WRITER
37 repeatedly each octet. The return value of this function is
38 unspecified."))
39
40 (defgeneric write-sequence* (format stream sequence start end)
41 (declare #.*standard-optimize-settings*)
42 (:documentation "A generic function which dispatches on the external
43 format and does the real work for STREAM-WRITE-SEQUENCE."))
44
45 (defgeneric string-to-octets* (format string start end)
46 (declare #.*standard-optimize-settings*)
47 (:documentation "A generic function which dispatches on the external
48 format and does the real work for STRING-TO-OCTETS."))
49
50 (defmethod string-to-octets* :around (format (list list) start end)
51 (declare #.*standard-optimize-settings*)
52 (string-to-octets* format (coerce list 'string*) start end))
53
54 (defmacro define-sequence-writers ((format-class) &body body)
55 "Non-hygienic utility macro which defines methods for
56 WRITE-SEQUENCE* and STRING-TO-OCTETS* for the class FORMAT-CLASS. For
57 BODY see the docstring of DEFINE-CHAR-ENCODERS."
58 (let ((body `((locally
59 (declare #.*fixnum-optimize-settings*)
60 ,@body))))
61 `(progn
62 (defmethod string-to-octets* ((format ,format-class) string start end)
63 (declare #.*standard-optimize-settings*)
64 (declare (fixnum start end) (string string))
65 (let ((octets (make-array (compute-number-of-octets format string start end)
66 :element-type 'octet))
67 (j 0))
68 (declare (fixnum j))
69 (loop for i of-type fixnum from start below end do
70 (macrolet ((octet-writer (form)
71 `(progn
72 (setf (aref (the (array octet *) octets) j) ,form)
73 (incf j))))
74 (symbol-macrolet ((char-getter (char string i)))
75 (progn ,@body))))
76 octets))
77 (defmethod write-sequence* ((format ,format-class) stream sequence start end)
78 (declare #.*standard-optimize-settings*)
79 (declare (fixnum start end))
80 (with-accessors ((column flexi-stream-column))
81 stream
82 (let* ((octet-seen-p nil)
83 (buffer-pos 0)
84 ;; estimate should be good enough...
85 (factor (encoding-factor format))
86 ;; we don't want arbitrarily large buffer, do we?
87 (buffer-size (min +buffer-size+ (ceiling (* factor (- end start)))))
88 (buffer (make-octet-buffer buffer-size))
89 (underlying-stream (flexi-stream-stream stream)))
90 (declare (fixnum buffer-pos buffer-size)
91 (boolean octet-seen-p)
92 (type (array octet *) buffer))
93 (macrolet ((octet-writer (form)
94 `(write-octet ,form)))
95 (labels ((flush-buffer ()
96 "Sends all octets in BUFFER to the underlying stream."
97 (write-sequence buffer underlying-stream :end buffer-pos)
98 (setq buffer-pos 0))
99 (write-octet (octet)
100 "Adds one octet to the buffer and flushes it if necessary."
101 (declare (type octet octet))
102 (when (>= buffer-pos buffer-size)
103 (flush-buffer))
104 (setf (aref buffer buffer-pos) octet)
105 (incf buffer-pos))
106 (write-object (object)
107 "Dispatches to WRITE-OCTET or WRITE-CHARACTER
108 depending on the type of OBJECT."
109 (etypecase object
110 (octet (setq octet-seen-p t)
111 (write-octet object))
112 (character (symbol-macrolet ((char-getter object))
113 ,@body)))))
114 (macrolet ((iterate (&body output-forms)
115 "An unhygienic macro to implement the actual
116 iteration through SEQUENCE. OUTPUT-FORM is the form to retrieve one
117 sequence element and put its octet representation into the buffer."
118 `(loop for index of-type fixnum from start below end
119 do (progn ,@output-forms)
120 finally (when (plusp buffer-pos)
121 (flush-buffer)))))
122 (etypecase sequence
123 (string (iterate
124 (symbol-macrolet ((char-getter (char sequence index)))
125 ,@body)))
126 (array (iterate
127 (symbol-macrolet ((char-getter (aref sequence index)))
128 ,@body)))
129 (list (iterate (write-object (nth index sequence))))))
130 ;; update the column slot, setting it to NIL if we sent
131 ;; octets
132 (setq column
133 (cond (octet-seen-p nil)
134 (t (let ((last-newline-pos (position #\Newline sequence
135 :test #'char=
136 :start start
137 :end end
138 :from-end t)))
139 (cond (last-newline-pos (- end last-newline-pos 1))
140 (column (+ column (- end start))))))))))))))))
141
142 (defmacro define-char-encoders ((lf-format-class cr-format-class crlf-format-class) &body body)
143 "Non-hygienic utility macro which defines several encoding-related
144 methods for the classes LF-FORMAT-CLASS, CR-FORMAT-CLASS, and
145 CRLF-FORMAT-CLASS where it is assumed that CR-FORMAT-CLASS is the same
146 encoding as LF-FORMAT-CLASS but with CR instead of LF line endings and
147 similar for CRLF-FORMAT-CLASS, i.e. LF-FORMAT-CLASS is the base class.
148 BODY is a code template for the code to convert one character to
149 octets. BODY must contain a symbol CHAR-GETTER representing the form
150 which is used to obtain the character and a forms like \(OCTET-WRITE
151 <thing>) to write the octet <thing>. The CHAR-GETTER form might be
152 called more than once."
153 `(progn
154 (defmethod char-to-octets ((format ,lf-format-class) char writer)
155 (declare #.*fixnum-optimize-settings*)
156 (declare (character char) (function writer))
157 (symbol-macrolet ((char-getter char))
158 (macrolet ((octet-writer (form)
159 `(funcall writer ,form)))
160 ,@body)))
161 (define-sequence-writers (,lf-format-class) ,@body)
162 (define-sequence-writers (,cr-format-class)
163 ;; modify the body so that the getter replaces a #\Newline
164 ;; with a #\Return
165 ,@(sublis `((char-getter . ,(with-unique-names (char)
166 `(let ((,char char-getter))
167 (declare (character ,char))
168 (if (char= ,char #\Newline)
169 #\Return
170 ,char)))))
171 body))
172 (define-sequence-writers (,crlf-format-class)
173 ;; modify the body so that we potentially write octets for
174 ;; two characters (#\Return and #\Linefeed) - the original
175 ;; body is wrapped with the WRITE-CHAR local function
176 ,(with-unique-names (char write-char)
177 `(flet ((,write-char (,char)
178 ,@(sublis `((char-getter . ,char)) body)))
179 (let ((,char char-getter))
180 (declare (character ,char))
181 (cond ((char= ,char #\Newline)
182 (,write-char #\Return)
183 (,write-char #\Linefeed))
184 (t (,write-char ,char)))))))))
185
186 (define-char-encoders (flexi-latin-1-format flexi-cr-latin-1-format flexi-crlf-latin-1-format)
187 (let ((octet (char-code char-getter)))
188 (when (> octet 255)
189 (signal-encoding-error format "~S (code ~A) is not a LATIN-1 character." char-getter octet))
190 (octet-writer octet)))
191
192 (define-char-encoders (flexi-ascii-format flexi-cr-ascii-format flexi-crlf-ascii-format)
193 (let ((octet (char-code char-getter)))
194 (when (> octet 127)
195 (signal-encoding-error format "~S (code ~A) is not an ASCII character." char-getter octet))
196 (octet-writer octet)))
197
198 (define-char-encoders (flexi-8-bit-format flexi-cr-8-bit-format flexi-crlf-8-bit-format)
199 (with-accessors ((encoding-hash external-format-encoding-hash))
200 format
201 (let ((octet (gethash (char-code char-getter) encoding-hash)))
202 (unless octet
203 (signal-encoding-error format "~S (code ~A) is not in this encoding." char-getter octet))
204 (octet-writer octet))))
205
206 (define-char-encoders (flexi-utf-8-format flexi-cr-utf-8-format flexi-crlf-utf-8-format)
207 ;; the old version using LDB was more elegant, but some Lisps had
208 ;; trouble optimizing it
209 (let ((char-code (char-code char-getter)))
210 (tagbody
211 (cond ((< char-code #x80)
212 (octet-writer char-code)
213 (go zero))
214 ((< char-code #x800)
215 (octet-writer (logior* #b11000000 (ash* char-code -6)))
216 (go one))
217 ((< char-code #x10000)
218 (octet-writer (logior* #b11100000 (ash* char-code -12)))
219 (go two))
220 (t
221 (octet-writer (logior* #b11110000 (ash* char-code -18)))))
222 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -12))))
223 two
224 (octet-writer (logior* #b10000000 (logand* #b00111111 (ash* char-code -6))))
225 one
226 (octet-writer (logior* #b10000000 (logand* #b00111111 char-code)))
227 zero)))
228
229 (define-char-encoders (flexi-utf-16-le-format flexi-cr-utf-16-le-format flexi-crlf-utf-16-le-format)
230 (flet ((write-word (word)
231 (octet-writer (logand* #x00ff word))
232 (octet-writer (ash* (logand* #xff00 word) -8))))
233 (declare (inline write-word))
234 (let ((char-code (char-code char-getter)))
235 (declare (type char-code-integer char-code))
236 (cond ((< char-code #x10000)
237 (write-word char-code))
238 (t (decf char-code #x10000)
239 (write-word (logior* #xd800 (ash* char-code -10)))
240 (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
241
242 (define-char-encoders (flexi-utf-16-be-format flexi-cr-utf-16-be-format flexi-crlf-utf-16-be-format)
243 (flet ((write-word (word)
244 (octet-writer (ash* (logand* #xff00 word) -8))
245 (octet-writer (logand* #x00ff word))))
246 (declare (inline write-word))
247 (let ((char-code (char-code char-getter)))
248 (declare (type char-code-integer char-code))
249 (cond ((< char-code #x10000)
250 (write-word char-code))
251 (t (decf char-code #x10000)
252 (write-word (logior* #xd800 (ash* char-code -10)))
253 (write-word (logior* #xdc00 (logand* #x03ff char-code))))))))
254
255 (define-char-encoders (flexi-utf-32-le-format flexi-cr-utf-32-le-format flexi-crlf-utf-32-le-format)
256 (let ((char-code (char-code char-getter)))
257 (octet-writer (logand* #x00ff char-code))
258 (octet-writer (logand* #x00ff (ash* char-code -8)))
259 (octet-writer (logand* #x00ff (ash* char-code -16)))
260 (octet-writer (logand* #x00ff (ash* char-code -24)))))
261
262 (define-char-encoders (flexi-utf-32-be-format flexi-cr-utf-32-be-format flexi-crlf-utf-32-be-format)
263 (let ((char-code (char-code char-getter)))
264 (octet-writer (logand* #x00ff (ash* char-code -24)))
265 (octet-writer (logand* #x00ff (ash* char-code -16)))
266 (octet-writer (logand* #x00ff (ash* char-code -8)))
267 (octet-writer (logand* #x00ff char-code))))
268
269 (defmethod char-to-octets ((format flexi-cr-mixin) char writer)
270 (declare #.*fixnum-optimize-settings*)
271 (declare (character char))
272 (if (char= char #\Newline)
273 (call-next-method format #\Return writer)
274 (call-next-method)))
275
276 (defmethod char-to-octets ((format flexi-crlf-mixin) char writer)
277 (declare #.*fixnum-optimize-settings*)
278 (declare (character char))
279 (cond ((char= char #\Newline)
280 (call-next-method format #\Return writer)
281 (call-next-method format #\Linefeed writer))
282 (t (call-next-method))))