GophHub - kevinboone/epub2txt2/src/wstring.c


Raw File

/*============================================================================
  epub2txt v2 
  wstring.c
  Copyright (c)2020 Kevin Boone, GPL v3.0
============================================================================*/

#define _GNU_SOURCE
#include <stdio.h> 
#include <stdlib.h> 
#include <stdint.h> 
#include <unistd.h> 
#include <fcntl.h> 
#include <sys/types.h> 
#include <sys/stat.h> 
#include <errno.h> 
#include <string.h> 
#include "wstring.h"
#include "custom_string.h"
#include "convertutf.h"
#include "log.h"

struct _WString
  {
  uint32_t *str;
  int len;
  }; 


/*============================================================================
  wstring_convert_utf8_to_utf32
===========================================================================*/
uint32_t *wstring_convert_utf8_to_utf32 (const char *_in)
  {
  IN
  const char* in = (const char *)_in;
  int max_out = strlen (_in);
  uint32_t *out = malloc ((max_out + 1) * sizeof (uint32_t));
  memset (out, 0, (max_out + 1) * sizeof (uint32_t));
  uint32_t *out_temp = out;
  
  ConvertUTF8toUTF32 ((const UTF8 **)&in, (const UTF8 *)in+strlen((char *)in),
      (UTF32**)&out_temp, (UTF32*)out + max_out, 0);
  
  int len = out_temp - out;
  out [len] = 0;
  OUT
  return out;
  }


/*============================================================================
  wstring_create_empty
============================================================================*/
WString *wstring_create_empty (void)
  {
  WString *self = malloc (sizeof (WString));
  self->str = malloc (sizeof (uint32_t));
  self->str[0] = 0;
  self->len = 0;
  return self;
  }



/*============================================================================
  wstring_create_from_utf8
============================================================================*/
WString *wstring_create_from_utf8 (const char *s)
  {
  WString *self = malloc (sizeof (WString));
  self->str = wstring_convert_utf8_to_utf32 (s);
  self->len = wstring_length_calc(self);
  return self;
  }


/*============================================================================
  wstring_create_from_utf8_file
============================================================================*/
BOOL wstring_create_from_utf8_file (const char *filename, 
    WString **result, char **error)
  {
  IN
  WString *self = NULL;
  BOOL ok = FALSE; 
  int f = open (filename, O_RDONLY);
  if (f > 0)
    {
    self = malloc (sizeof (WString));
    struct stat sb;
    fstat (f, &sb);
    int64_t size = sb.st_size;
    char *buff = malloc (size + 2);
    int n = read (f, buff, size);
    close (f);
    buff[n] = 0;

    // Might need to skip a UTF-8 BOM when reading file
    if (buff[0] == (char)0xEF && buff[1] == (char)0xBB && buff[2] == (char)0xBF) {
      self->str = wstring_convert_utf8_to_utf32 (buff + 3);
      self->len = wstring_length_calc(self);
    } else {
      self->str = wstring_convert_utf8_to_utf32 (buff);
      self->len = wstring_length_calc(self);
    }

    free (buff);

    *result = self;
    ok = TRUE;
    }
  else
    {
    asprintf (error, "Can't open file '%s' for reading: %s", 
      filename, strerror (errno));
    ok = FALSE;
    }

  OUT
  return ok;
  }


/*============================================================================
  wstring_length_calc
============================================================================*/
const int wstring_length_calc (const WString *self)
  {
  IN
  if (!self) 
    {
    OUT
    return 0;
    }
  uint32_t *s = self->str;
  int i = 0;
  uint32_t c = 0;
  do
    {
    c = s[i];
    i++;
    } while (c != 0);
  int ret = i - 1;
  return ret;
  OUT
  }

/*============================================================================
  wstring_length
============================================================================*/
inline const int wstring_length (const WString *self)
{
    if (!self)
        return 0;
    return self->len;
}


/*============================================================================
  wstring_destroy
============================================================================*/
void wstring_destroy (WString *self)
  {
  IN
  if (self)
    {
    if (self->str) free (self->str);
    free (self);
    }
  OUT
  }


/*============================================================================
  wstring_wstr
============================================================================*/
const uint32_t *wstring_wstr (const WString *self)
  {
  return self->str;
  }


/*============================================================================
  wstring_to_utf8
============================================================================*/
char *wstring_to_utf8 (const WString *self)
  {
  const uint32_t *s = self->str;
  String *temp = string_create_empty();
  int i, l = wstring_length (self);
  for (i = 0; i < l; i++)
     string_append_c (temp, s[i]);

  char *ret = strdup (string_cstr (temp));
  string_destroy (temp);
  return ret;
  }


/*============================================================================
  wstring_append_c
============================================================================*/
void wstring_append_c (WString *self, const uint32_t c)
  {
  int l = wstring_length (self);
  self->str = realloc (self->str, (l + 2) * sizeof (uint32_t));
  self->str[l] = c;
  self->str[l+1] = 0; 
  self->len = l + 1;
  }


/*============================================================================
  wstring_append
============================================================================*/
void wstring_append (WString *self, const WString *other)
  {
  int mylen = wstring_length (self);
  int otherlen = wstring_length (other);
  self->str = realloc (self->str, (mylen + otherlen + 1) * sizeof (uint32_t));
  int i;
  for (i = 0; i < otherlen; i++)
    self->str[mylen+i] = other->str[i];
  self->str[mylen+i] = 0; 
  self->len = mylen + otherlen;
  }


/*============================================================================
  wstring_clear
============================================================================*/
void  wstring_clear (WString *self)
  {
  free (self->str);
  self->str = malloc (sizeof (uint32_t));
  self->str[0] = 0;
  self->len = 0;
  }


/*============================================================================
  wstring_is_whitespace
============================================================================*/
BOOL wstring_is_whitespace (const WString *self)
  {
  int l = wstring_length (self);
  uint32_t *s = self->str;
  int i;
  for (i = 0; i < l; i++)
    {
    uint32_t c = s[i];
    if (c != ' ' && c != '\n' && c != '\t') return FALSE;
    }

  return TRUE;
  }





Generated by GNU Enscript 1.6.6, and GophHub 1.3.