Raw File
1 /*============================================================================
2 epub2txt v2
3 wstring.c
4 Copyright (c)2020 Kevin Boone, GPL v3.0
5 ============================================================================*/
6
7 #define _GNU_SOURCE
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <stdint.h>
11 #include <unistd.h>
12 #include <fcntl.h>
13 #include <sys/types.h>
14 #include <sys/stat.h>
15 #include <errno.h>
16 #include <string.h>
17 #include "wstring.h"
18 #include "custom_string.h"
19 #include "convertutf.h"
20 #include "log.h"
21
22 struct _WString
23 {
24 uint32_t *str;
25 int len;
26 };
27
28
29 /*============================================================================
30 wstring_convert_utf8_to_utf32
31 ===========================================================================*/
32 uint32_t *wstring_convert_utf8_to_utf32 (const char *_in)
33 {
34 IN
35 const char* in = (const char *)_in;
36 int max_out = strlen (_in);
37 uint32_t *out = malloc ((max_out + 1) * sizeof (uint32_t));
38 memset (out, 0, (max_out + 1) * sizeof (uint32_t));
39 uint32_t *out_temp = out;
40
41 ConvertUTF8toUTF32 ((const UTF8 **)&in, (const UTF8 *)in+strlen((char *)in),
42 (UTF32**)&out_temp, (UTF32*)out + max_out, 0);
43
44 int len = out_temp - out;
45 out [len] = 0;
46 OUT
47 return out;
48 }
49
50
51 /*============================================================================
52 wstring_create_empty
53 ============================================================================*/
54 WString *wstring_create_empty (void)
55 {
56 WString *self = malloc (sizeof (WString));
57 self->str = malloc (sizeof (uint32_t));
58 self->str[0] = 0;
59 self->len = 0;
60 return self;
61 }
62
63
64
65 /*============================================================================
66 wstring_create_from_utf8
67 ============================================================================*/
68 WString *wstring_create_from_utf8 (const char *s)
69 {
70 WString *self = malloc (sizeof (WString));
71 self->str = wstring_convert_utf8_to_utf32 (s);
72 self->len = wstring_length_calc(self);
73 return self;
74 }
75
76
77 /*============================================================================
78 wstring_create_from_utf8_file
79 ============================================================================*/
80 BOOL wstring_create_from_utf8_file (const char *filename,
81 WString **result, char **error)
82 {
83 IN
84 WString *self = NULL;
85 BOOL ok = FALSE;
86 int f = open (filename, O_RDONLY);
87 if (f > 0)
88 {
89 self = malloc (sizeof (WString));
90 struct stat sb;
91 fstat (f, &sb);
92 int64_t size = sb.st_size;
93 char *buff = malloc (size + 2);
94 int n = read (f, buff, size);
95 close (f);
96 buff[n] = 0;
97
98 // Might need to skip a UTF-8 BOM when reading file
99 if (buff[0] == (char)0xEF && buff[1] == (char)0xBB && buff[2] == (char)0xBF) {
100 self->str = wstring_convert_utf8_to_utf32 (buff + 3);
101 self->len = wstring_length_calc(self);
102 } else {
103 self->str = wstring_convert_utf8_to_utf32 (buff);
104 self->len = wstring_length_calc(self);
105 }
106
107 free (buff);
108
109 *result = self;
110 ok = TRUE;
111 }
112 else
113 {
114 asprintf (error, "Can't open file '%s' for reading: %s",
115 filename, strerror (errno));
116 ok = FALSE;
117 }
118
119 OUT
120 return ok;
121 }
122
123
124 /*============================================================================
125 wstring_length_calc
126 ============================================================================*/
127 const int wstring_length_calc (const WString *self)
128 {
129 IN
130 if (!self)
131 {
132 OUT
133 return 0;
134 }
135 uint32_t *s = self->str;
136 int i = 0;
137 uint32_t c = 0;
138 do
139 {
140 c = s[i];
141 i++;
142 } while (c != 0);
143 int ret = i - 1;
144 return ret;
145 OUT
146 }
147
148 /*============================================================================
149 wstring_length
150 ============================================================================*/
151 inline const int wstring_length (const WString *self)
152 {
153 if (!self)
154 return 0;
155 return self->len;
156 }
157
158
159 /*============================================================================
160 wstring_destroy
161 ============================================================================*/
162 void wstring_destroy (WString *self)
163 {
164 IN
165 if (self)
166 {
167 if (self->str) free (self->str);
168 free (self);
169 }
170 OUT
171 }
172
173
174 /*============================================================================
175 wstring_wstr
176 ============================================================================*/
177 const uint32_t *wstring_wstr (const WString *self)
178 {
179 return self->str;
180 }
181
182
183 /*============================================================================
184 wstring_to_utf8
185 ============================================================================*/
186 char *wstring_to_utf8 (const WString *self)
187 {
188 const uint32_t *s = self->str;
189 String *temp = string_create_empty();
190 int i, l = wstring_length (self);
191 for (i = 0; i < l; i++)
192 string_append_c (temp, s[i]);
193
194 char *ret = strdup (string_cstr (temp));
195 string_destroy (temp);
196 return ret;
197 }
198
199
200 /*============================================================================
201 wstring_append_c
202 ============================================================================*/
203 void wstring_append_c (WString *self, const uint32_t c)
204 {
205 int l = wstring_length (self);
206 self->str = realloc (self->str, (l + 2) * sizeof (uint32_t));
207 self->str[l] = c;
208 self->str[l+1] = 0;
209 self->len = l + 1;
210 }
211
212
213 /*============================================================================
214 wstring_append
215 ============================================================================*/
216 void wstring_append (WString *self, const WString *other)
217 {
218 int mylen = wstring_length (self);
219 int otherlen = wstring_length (other);
220 self->str = realloc (self->str, (mylen + otherlen + 1) * sizeof (uint32_t));
221 int i;
222 for (i = 0; i < otherlen; i++)
223 self->str[mylen+i] = other->str[i];
224 self->str[mylen+i] = 0;
225 self->len = mylen + otherlen;
226 }
227
228
229 /*============================================================================
230 wstring_clear
231 ============================================================================*/
232 void wstring_clear (WString *self)
233 {
234 free (self->str);
235 self->str = malloc (sizeof (uint32_t));
236 self->str[0] = 0;
237 self->len = 0;
238 }
239
240
241 /*============================================================================
242 wstring_is_whitespace
243 ============================================================================*/
244 BOOL wstring_is_whitespace (const WString *self)
245 {
246 int l = wstring_length (self);
247 uint32_t *s = self->str;
248 int i;
249 for (i = 0; i < l; i++)
250 {
251 uint32_t c = s[i];
252 if (c != ' ' && c != '\n' && c != '\t') return FALSE;
253 }
254
255 return TRUE;
256 }
257
258
259
260
261
Generated by GNU Enscript 1.6.6, and GophHub 1.3.