GophHub - kevinboone/epub2txt2/src/wstring.c


Raw File

    1	/*============================================================================
    2	  epub2txt v2 
    3	  wstring.c
    4	  Copyright (c)2020 Kevin Boone, GPL v3.0
    5	============================================================================*/
    6	
    7	#define _GNU_SOURCE
    8	#include <stdio.h> 
    9	#include <stdlib.h> 
   10	#include <stdint.h> 
   11	#include <unistd.h> 
   12	#include <fcntl.h> 
   13	#include <sys/types.h> 
   14	#include <sys/stat.h> 
   15	#include <errno.h> 
   16	#include <string.h> 
   17	#include "wstring.h"
   18	#include "custom_string.h"
   19	#include "convertutf.h"
   20	#include "log.h"
   21	
   22	struct _WString
   23	  {
   24	  uint32_t *str;
   25	  int len;
   26	  }; 
   27	
   28	
   29	/*============================================================================
   30	  wstring_convert_utf8_to_utf32
   31	===========================================================================*/
   32	uint32_t *wstring_convert_utf8_to_utf32 (const char *_in)
   33	  {
   34	  IN
   35	  const char* in = (const char *)_in;
   36	  int max_out = strlen (_in);
   37	  uint32_t *out = malloc ((max_out + 1) * sizeof (uint32_t));
   38	  memset (out, 0, (max_out + 1) * sizeof (uint32_t));
   39	  uint32_t *out_temp = out;
   40	  
   41	  ConvertUTF8toUTF32 ((const UTF8 **)&in, (const UTF8 *)in+strlen((char *)in),
   42	      (UTF32**)&out_temp, (UTF32*)out + max_out, 0);
   43	  
   44	  int len = out_temp - out;
   45	  out [len] = 0;
   46	  OUT
   47	  return out;
   48	  }
   49	
   50	
   51	/*============================================================================
   52	  wstring_create_empty
   53	============================================================================*/
   54	WString *wstring_create_empty (void)
   55	  {
   56	  WString *self = malloc (sizeof (WString));
   57	  self->str = malloc (sizeof (uint32_t));
   58	  self->str[0] = 0;
   59	  self->len = 0;
   60	  return self;
   61	  }
   62	
   63	
   64	
   65	/*============================================================================
   66	  wstring_create_from_utf8
   67	============================================================================*/
   68	WString *wstring_create_from_utf8 (const char *s)
   69	  {
   70	  WString *self = malloc (sizeof (WString));
   71	  self->str = wstring_convert_utf8_to_utf32 (s);
   72	  self->len = wstring_length_calc(self);
   73	  return self;
   74	  }
   75	
   76	
   77	/*============================================================================
   78	  wstring_create_from_utf8_file
   79	============================================================================*/
   80	BOOL wstring_create_from_utf8_file (const char *filename, 
   81	    WString **result, char **error)
   82	  {
   83	  IN
   84	  WString *self = NULL;
   85	  BOOL ok = FALSE; 
   86	  int f = open (filename, O_RDONLY);
   87	  if (f > 0)
   88	    {
   89	    self = malloc (sizeof (WString));
   90	    struct stat sb;
   91	    fstat (f, &sb);
   92	    int64_t size = sb.st_size;
   93	    char *buff = malloc (size + 2);
   94	    int n = read (f, buff, size);
   95	    close (f);
   96	    buff[n] = 0;
   97	
   98	    // Might need to skip a UTF-8 BOM when reading file
   99	    if (buff[0] == (char)0xEF && buff[1] == (char)0xBB && buff[2] == (char)0xBF) {
  100	      self->str = wstring_convert_utf8_to_utf32 (buff + 3);
  101	      self->len = wstring_length_calc(self);
  102	    } else {
  103	      self->str = wstring_convert_utf8_to_utf32 (buff);
  104	      self->len = wstring_length_calc(self);
  105	    }
  106	
  107	    free (buff);
  108	
  109	    *result = self;
  110	    ok = TRUE;
  111	    }
  112	  else
  113	    {
  114	    asprintf (error, "Can't open file '%s' for reading: %s", 
  115	      filename, strerror (errno));
  116	    ok = FALSE;
  117	    }
  118	
  119	  OUT
  120	  return ok;
  121	  }
  122	
  123	
  124	/*============================================================================
  125	  wstring_length_calc
  126	============================================================================*/
  127	const int wstring_length_calc (const WString *self)
  128	  {
  129	  IN
  130	  if (!self) 
  131	    {
  132	    OUT
  133	    return 0;
  134	    }
  135	  uint32_t *s = self->str;
  136	  int i = 0;
  137	  uint32_t c = 0;
  138	  do
  139	    {
  140	    c = s[i];
  141	    i++;
  142	    } while (c != 0);
  143	  int ret = i - 1;
  144	  return ret;
  145	  OUT
  146	  }
  147	
  148	/*============================================================================
  149	  wstring_length
  150	============================================================================*/
  151	inline const int wstring_length (const WString *self)
  152	{
  153	    if (!self)
  154	        return 0;
  155	    return self->len;
  156	}
  157	
  158	
  159	/*============================================================================
  160	  wstring_destroy
  161	============================================================================*/
  162	void wstring_destroy (WString *self)
  163	  {
  164	  IN
  165	  if (self)
  166	    {
  167	    if (self->str) free (self->str);
  168	    free (self);
  169	    }
  170	  OUT
  171	  }
  172	
  173	
  174	/*============================================================================
  175	  wstring_wstr
  176	============================================================================*/
  177	const uint32_t *wstring_wstr (const WString *self)
  178	  {
  179	  return self->str;
  180	  }
  181	
  182	
  183	/*============================================================================
  184	  wstring_to_utf8
  185	============================================================================*/
  186	char *wstring_to_utf8 (const WString *self)
  187	  {
  188	  const uint32_t *s = self->str;
  189	  String *temp = string_create_empty();
  190	  int i, l = wstring_length (self);
  191	  for (i = 0; i < l; i++)
  192	     string_append_c (temp, s[i]);
  193	
  194	  char *ret = strdup (string_cstr (temp));
  195	  string_destroy (temp);
  196	  return ret;
  197	  }
  198	
  199	
  200	/*============================================================================
  201	  wstring_append_c
  202	============================================================================*/
  203	void wstring_append_c (WString *self, const uint32_t c)
  204	  {
  205	  int l = wstring_length (self);
  206	  self->str = realloc (self->str, (l + 2) * sizeof (uint32_t));
  207	  self->str[l] = c;
  208	  self->str[l+1] = 0; 
  209	  self->len = l + 1;
  210	  }
  211	
  212	
  213	/*============================================================================
  214	  wstring_append
  215	============================================================================*/
  216	void wstring_append (WString *self, const WString *other)
  217	  {
  218	  int mylen = wstring_length (self);
  219	  int otherlen = wstring_length (other);
  220	  self->str = realloc (self->str, (mylen + otherlen + 1) * sizeof (uint32_t));
  221	  int i;
  222	  for (i = 0; i < otherlen; i++)
  223	    self->str[mylen+i] = other->str[i];
  224	  self->str[mylen+i] = 0; 
  225	  self->len = mylen + otherlen;
  226	  }
  227	
  228	
  229	/*============================================================================
  230	  wstring_clear
  231	============================================================================*/
  232	void  wstring_clear (WString *self)
  233	  {
  234	  free (self->str);
  235	  self->str = malloc (sizeof (uint32_t));
  236	  self->str[0] = 0;
  237	  self->len = 0;
  238	  }
  239	
  240	
  241	/*============================================================================
  242	  wstring_is_whitespace
  243	============================================================================*/
  244	BOOL wstring_is_whitespace (const WString *self)
  245	  {
  246	  int l = wstring_length (self);
  247	  uint32_t *s = self->str;
  248	  int i;
  249	  for (i = 0; i < l; i++)
  250	    {
  251	    uint32_t c = s[i];
  252	    if (c != ' ' && c != '\n' && c != '\t') return FALSE;
  253	    }
  254	
  255	  return TRUE;
  256	  }
  257	
  258	
  259	
  260	
  261	

Generated by GNU Enscript 1.6.6, and GophHub 1.3.