1 /*============================================================================ 2 epub2txt v2 3 wstring.c 4 Copyright (c)2020 Kevin Boone, GPL v3.0 5 ============================================================================*/ 6 7 #define _GNU_SOURCE 8 #include 9 #include 10 #include 11 #include 12 #include 13 #include 14 #include 15 #include 16 #include 17 #include "wstring.h" 18 #include "custom_string.h" 19 #include "convertutf.h" 20 #include "log.h" 21 22 struct _WString 23 { 24 uint32_t *str; 25 int len; 26 }; 27 28 29 /*============================================================================ 30 wstring_convert_utf8_to_utf32 31 ===========================================================================*/ 32 uint32_t *wstring_convert_utf8_to_utf32 (const char *_in) 33 { 34 IN 35 const char* in = (const char *)_in; 36 int max_out = strlen (_in); 37 uint32_t *out = malloc ((max_out + 1) * sizeof (uint32_t)); 38 memset (out, 0, (max_out + 1) * sizeof (uint32_t)); 39 uint32_t *out_temp = out; 40 41 ConvertUTF8toUTF32 ((const UTF8 **)&in, (const UTF8 *)in+strlen((char *)in), 42 (UTF32**)&out_temp, (UTF32*)out + max_out, 0); 43 44 int len = out_temp - out; 45 out [len] = 0; 46 OUT 47 return out; 48 } 49 50 51 /*============================================================================ 52 wstring_create_empty 53 ============================================================================*/ 54 WString *wstring_create_empty (void) 55 { 56 WString *self = malloc (sizeof (WString)); 57 self->str = malloc (sizeof (uint32_t)); 58 self->str[0] = 0; 59 self->len = 0; 60 return self; 61 } 62 63 64 65 /*============================================================================ 66 wstring_create_from_utf8 67 ============================================================================*/ 68 WString *wstring_create_from_utf8 (const char *s) 69 { 70 WString *self = malloc (sizeof (WString)); 71 self->str = wstring_convert_utf8_to_utf32 (s); 72 self->len = wstring_length_calc(self); 73 return self; 74 } 75 76 77 /*============================================================================ 78 wstring_create_from_utf8_file 79 ============================================================================*/ 80 BOOL wstring_create_from_utf8_file (const char *filename, 81 WString **result, char **error) 82 { 83 IN 84 WString *self = NULL; 85 BOOL ok = FALSE; 86 int f = open (filename, O_RDONLY); 87 if (f > 0) 88 { 89 self = malloc (sizeof (WString)); 90 struct stat sb; 91 fstat (f, &sb); 92 int64_t size = sb.st_size; 93 char *buff = malloc (size + 2); 94 int n = read (f, buff, size); 95 close (f); 96 buff[n] = 0; 97 98 // Might need to skip a UTF-8 BOM when reading file 99 if (buff[0] == (char)0xEF && buff[1] == (char)0xBB && buff[2] == (char)0xBF) { 100 self->str = wstring_convert_utf8_to_utf32 (buff + 3); 101 self->len = wstring_length_calc(self); 102 } else { 103 self->str = wstring_convert_utf8_to_utf32 (buff); 104 self->len = wstring_length_calc(self); 105 } 106 107 free (buff); 108 109 *result = self; 110 ok = TRUE; 111 } 112 else 113 { 114 asprintf (error, "Can't open file '%s' for reading: %s", 115 filename, strerror (errno)); 116 ok = FALSE; 117 } 118 119 OUT 120 return ok; 121 } 122 123 124 /*============================================================================ 125 wstring_length_calc 126 ============================================================================*/ 127 const int wstring_length_calc (const WString *self) 128 { 129 IN 130 if (!self) 131 { 132 OUT 133 return 0; 134 } 135 uint32_t *s = self->str; 136 int i = 0; 137 uint32_t c = 0; 138 do 139 { 140 c = s[i]; 141 i++; 142 } while (c != 0); 143 int ret = i - 1; 144 return ret; 145 OUT 146 } 147 148 /*============================================================================ 149 wstring_length 150 ============================================================================*/ 151 inline const int wstring_length (const WString *self) 152 { 153 if (!self) 154 return 0; 155 return self->len; 156 } 157 158 159 /*============================================================================ 160 wstring_destroy 161 ============================================================================*/ 162 void wstring_destroy (WString *self) 163 { 164 IN 165 if (self) 166 { 167 if (self->str) free (self->str); 168 free (self); 169 } 170 OUT 171 } 172 173 174 /*============================================================================ 175 wstring_wstr 176 ============================================================================*/ 177 const uint32_t *wstring_wstr (const WString *self) 178 { 179 return self->str; 180 } 181 182 183 /*============================================================================ 184 wstring_to_utf8 185 ============================================================================*/ 186 char *wstring_to_utf8 (const WString *self) 187 { 188 const uint32_t *s = self->str; 189 String *temp = string_create_empty(); 190 int i, l = wstring_length (self); 191 for (i = 0; i < l; i++) 192 string_append_c (temp, s[i]); 193 194 char *ret = strdup (string_cstr (temp)); 195 string_destroy (temp); 196 return ret; 197 } 198 199 200 /*============================================================================ 201 wstring_append_c 202 ============================================================================*/ 203 void wstring_append_c (WString *self, const uint32_t c) 204 { 205 int l = wstring_length (self); 206 self->str = realloc (self->str, (l + 2) * sizeof (uint32_t)); 207 self->str[l] = c; 208 self->str[l+1] = 0; 209 self->len = l + 1; 210 } 211 212 213 /*============================================================================ 214 wstring_append 215 ============================================================================*/ 216 void wstring_append (WString *self, const WString *other) 217 { 218 int mylen = wstring_length (self); 219 int otherlen = wstring_length (other); 220 self->str = realloc (self->str, (mylen + otherlen + 1) * sizeof (uint32_t)); 221 int i; 222 for (i = 0; i < otherlen; i++) 223 self->str[mylen+i] = other->str[i]; 224 self->str[mylen+i] = 0; 225 self->len = mylen + otherlen; 226 } 227 228 229 /*============================================================================ 230 wstring_clear 231 ============================================================================*/ 232 void wstring_clear (WString *self) 233 { 234 free (self->str); 235 self->str = malloc (sizeof (uint32_t)); 236 self->str[0] = 0; 237 self->len = 0; 238 } 239 240 241 /*============================================================================ 242 wstring_is_whitespace 243 ============================================================================*/ 244 BOOL wstring_is_whitespace (const WString *self) 245 { 246 int l = wstring_length (self); 247 uint32_t *s = self->str; 248 int i; 249 for (i = 0; i < l; i++) 250 { 251 uint32_t c = s[i]; 252 if (c != ' ' && c != '\n' && c != '\t') return FALSE; 253 } 254 255 return TRUE; 256 } 257 258 259 260