/* fixedstr.c - fixed string search using Boyer-Moore algorithm
 * Copyright (C) 1995-99 Andrew Pipkin (minitrue@pagesz.net)
 * MiniTrue is free software released with no warranty. See COPYING for details
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include "minitrue.h"
#include "fixedstr.h"

static size_t Max_skip = 255;
static char *boyer_moore(FixedStr *str, const char *scan_start,
                               const char *scan_end);


/* This function takes a pointer to a string along with its length and
 *   generates the appropriate skip table to implement the Boyer-Moore
 *   searching algorithm. Max skip is the maximum skip length
 * It is assumed that the escape characters in the string have been processed
 *   and the string has been converted to lower case if case insens. on */
void FixedStr_init(FixedStr *str, const char *src, size_t len)
{
    size_t ch_i, default_skip;
    int search_ch_i = -1; /* Index of char to search for if memchr used */

    str->start  = src;
    str->len    = len;

 /* If length of string less than the maximum for memchr searching, look for
  *   the rarest character in the string, if case-insensitivity on, character
  *   must not be alphabetic. If no usable chars present use Boyer-Moore */
    if(len <= MEMCHR_MAX_LEN)
    {   int min_freq = FREQ_TOTAL;
        for(ch_i = 0; ch_i < len; ++ch_i)
        {   int ch = src[ch_i], freq = char_freq(ch);
            if(up_casE(ch) == low_casE(ch) && freq < min_freq)
            {   min_freq    = freq;
                search_ch_i = ch_i;
            }
        }
    }
    if(search_ch_i != -1)
    {   str->penult = &src[search_ch_i];
        str->table  = NULL;
    }

 /* Set up table for Boyer-Moore searching if memchr will not be used */
    else
    {   str->penult = src + (len - 2);
        str->table  = x_farmalloc(NCHAR * sizeof(skip_t));

        default_skip = miN(len, Max_skip);
        for(ch_i = 0; ch_i < NCHAR; ++ch_i)
            str->table[ch_i] = default_skip;

     /* Upper and lower case will have same skip if case insens. desired */
        for(ch_i = 0; ch_i < len; ++ch_i)
        {   int ch      = (uchar)src[ch_i], up_ch = (uchar)up_casE(ch);
            size_t skip = len - ch_i - 1;
            if(skip < Max_skip)
                str->table[ up_ch ] = str->table[ ch ] = skip;
        }
    }
}

/* destructor will free table */
void FixedStr_kill(FixedStr *str)
{
    if(str->table)
        farfree(str->table);
}

void FixedStr_Set_max_skip(size_t new_max)
{
    if(new_max < Max_skip)
        Max_skip = new_max;
}

char *FixedStr_find(FixedStr *str, const char *scan_start,
                          const char *scan_end)
{
    if(str->table)
        return boyer_moore(str, scan_start, scan_end);

 /* Look for rarest character in string, then see if surrounding chars
  * match remainder of string */
    else
    {   int ch = *(str->penult), ch_i = str->penult - str->start;
        int second_off;
        const char *ch_ptr = scan_start + ch_i;
        char second_ch;
        if(str->len == 1)
            second_off = 0;
        else if(str->penult != str->start)
            second_off = -1;
        else
            second_off = 1;
        second_ch = str->penult[second_off];

        while((ch_ptr = memchr(ch_ptr, ch, scan_end - ch_ptr)) != NULL)
        {   if(second_ch != low_casE(ch_ptr[second_off]))
                ++ch_ptr;
            else
            {   const char *buf_str = ch_ptr - ch_i;
                size_t str_i = 0;
                while(str->start[str_i] == low_casE(buf_str[str_i]))
                {   if(++str_i == str->len)
                        return (char *)buf_str;
                }
                ++ch_ptr;
            }
        }
        return (char *)scan_end + 1;
    }
}

#ifndef __DOS16__
/* Locate the first instance of the fixed string between start and end
 * It is assumed that there are at least 2048 characters after scan_end
 *   which can be accessed without causing a segmentation fault */
static char *boyer_moore(FixedStr *str, const char *scan_start,
                         const char *scan_end)
{
    const char *ptr = scan_start + (str->penult - str->start) + 1, *str_ptr;

 /* When the last character in the string is found, the skip length is 0
  * Because the pointer won't go anywhere once the last character is
  * found, it is not neccessary to test after every advance */
    for( ; ; )
    {   skip_t skip_len;
        ptr += str->table[(uchar)*ptr];
        ptr += str->table[(uchar)*ptr];
        ptr += str->table[(uchar)*ptr];
        skip_len = str->table[(uchar)*ptr];
        if((ptr += skip_len) >= scan_end)
            break;

        if(skip_len)
            continue;

     /* Now that last character found, test previous characters
      * If no preceding characters to test, string has been found */
        if(str->penult < str->start)
            return (char *)ptr;

     /* Test penultimate character before setting up loop */
        str_ptr = str->penult;
        if (*str_ptr != (char)low_casE( ptr[-1] ))
            ++ptr;
     /* backtrack to test preceding characters */
        else
        {   const char *backtrack_ptr = ptr - 1;
            do
            {   if(str_ptr == str->start)
                    return (char *)backtrack_ptr;
            } while(*--str_ptr == (char)low_casE(*--backtrack_ptr));
            ++ptr; /* on failure, increment pointer and try again */
        }
    }
 /* return pointer outside buffer to indicate no match*/
    return (char *)scan_end + 1;
}
#else  /* Boyer Moore find for 16-bit assembly */
/* This macro moves to the next possible location in which the find string
 *  may be found using the Boyer-Moore algorithms
 * due to COMPILER ERROR -must use __emit__ to override segment for xlat
 *  0x26, 0xd7 are the opcodes for xlat es: */
#define next_loc    asm     mov al, byte ptr [si]; \
                    __emit__(0x26, 0xd7);\
                    asm     add si, ax;

char *boyer_moore(FixedStr *str, const char *scan_start,
                  const char *scan_end)
{
/* reg usage -
 *    SI points to buffer DI points to string
 *    CX has the end of the search
 *    BX & AX are used for indexing w/ XLAT
 *    DX contains start of string
 *    ES contains segment where skip table is
 *    BP points to data structure  */

asm mov si, scan_start
asm mov cx, scan_end
asm mov bp, str
asm les bx, dword ptr [bp].table
asm mov di, [bp].penult
asm mov dx, [bp].start
asm mov ax, di
asm sub ax, dx
asm inc ax
asm jl  done
asm add si, ax
asm add cx, ax

reload_skip_table:
asm mov bx, [bp].table

/* Advance in the buffer using Boyer-Moore */
find_last_char:
asm cmp si, cx  /* make sure we have not gone past buffer end */
asm jae done
    next_loc;
    next_loc;
    next_loc;
    next_loc;
asm and al, al
asm jz  found_last_char
    next_loc;
    next_loc;
    next_loc;
    next_loc;

/* Check if found */
asm and al, al
asm jnz find_last_char

/* If last character found and string contains one character, search is done*/
found_last_char:
asm cmp di, dx
asm jb done

/* check preceding character before setting up loop */
    (uchar *)_BX = Lower_table;
asm mov al, [si - 1]
asm xlat
asm cmp al, [di]
asm je compare_string
asm inc si
asm jmp reload_skip_table

/* Now that last two characters match, set up loop to compare preceding chars */
compare_string:
asm push si
compare_prev_char:
asm cmp di, dx
asm je  string_found
asm dec si
asm dec di
asm mov al, [si - 1]
asm xlat
asm cmp al, [di]
asm je  compare_prev_char

/* If character does not match, reset and find next instance of last char */
asm pop si
asm inc si
asm mov di, [bp].penult
asm jmp reload_skip_table

string_found:
asm pop ax /* clear stack */
asm dec si

done:
    return (char *)_SI;
}
#endif  /*  __BORLANDC__ */

#ifdef FIXEDSTR_TEST

void FixedStr_dump(FixedStr *str)
{
    unsigned len = str->penult - str->start + 2, ch_i;
    char buf[10];
    printf("Default Skip = %d\n", len);
    for(ch_i = 0; ch_i < NCHAR; ++ch_i)
    {   if(str->table[ch_i] < len)
        {   ch_to_esc(buf, ch_i);
            printf("%s -> %d\n", buf, str->table[ch_i]);
        }
    }
}

int main(int argc, char *argv[])
{
    int arg_i;
    for(arg_i = 1; arg_i < argc; ++arg_i)
    {   int str_len = str_preproc(argv[arg_i]);
        FixedStr fixed_str;
        FixedStr_init(&fixed_str, argv[arg_i],  str_len);
        FixedStr_dump(&fixed_str);
        FixedStr_kill(&fixed_str);
    }
    return EXIT_SUCCESS;
}
#endif /* FIXEDSTR_TEST */
