/*
    OpenGUI - Drawing & Windowing library

    Copyright (C) 1996,2000  Marian Krivos

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

    nezmar@internet.alcatel.sk

    base.cc - base graphics support routines   
*/
/*
				PORTING TO OTHER PLATFORMS
				
1) FIRST OF ALL YOU MUST IMPROVE  graph_set_mode() routine for properly settings
   graphics hardware to good video mode.

2) Phase two improve/rewrite engine.asm file (for other processors that x86
   you must probably write this engine in C, at first).
   Below these lines is description of common used variables for graphics
   routines.

Some variables in visual:

Screen: 'videobase' = offset of first byte in video segment
	|-------------------------------|----
	|                               |	Y
	|                               |	_     Y_max = Y_width-1
	|     SCREEN                    |	w	  X_max = X_width-1
	|                               |	i
	|                               |	d
	|                               |	t
	|                               |   h
	|-------------------------------|----
	|                               |
	|<------- X_width ------------->|

Buffer (i.e. window image, ...) 'Image' = address of first byte in RAM
	|---------------------------|----
	| [cx_work,cy_work]         |
	|  *--------------------|   |	c     Y_max = Y_width-1
	|  |                    |   |	y	  X_max = X_width-1
	|  |                    |   |	_
	|  |                    |   |	m
	|  |--------------------*   |	a
	|    [cx_maxwork,cy_maxwork]|   x
	|                           |
	|---------------------------|----
	|                           |
	|<------- cx_max ---------->|


void L1VideoToRam8(char	*from, char	*to, int off1, int off2, int w, int h);

	move rectangle block from VRAM to RAM
	'from' - address of the first byte in VRAM to move
	'to'   - address of the first byte in RAM to move
	'off1','off2'
		   - these are sizes diff between moved block and buffer (off1 that
		     will be add for move to the next microline in the buffer) and
			 diff between moved block and screen width (off2 that will be add for
			 move to the next microline in the screen (or no ???))
	'w','h'- width and height of moved block

	
void L1RamToVideo2(char	*from, char	to*, int w,	int h, int off1, int off2);
void L1RamToVideo8(char	*from, char	to*, int w,	int h, int off1, int off2);

	all as previous but to move from RAM to VRAM, difference is that
	L1RamToVideo2 work with current drawmode (_GAND ..)
	
void L1RamToRamd(char *from, char to*, int w, int h, int off1, int off2);
void L1RamToRam(char *from, char to*, int w, int h, int off1, int off2);
void L1RamToRamPpop(char *from, char to*, int w, int h, int off1, int off2);

	all as previous but to move from RAM to RAM, difference is that
	L1RamToRamPpop work with current drawmode (_GAND ..), and L1RamToRamd
	does it in other direction from down to up, for scrolling

void L1Box(char	*ptr, int w, int h, int xoffset);
void L1BoxX(char *ptr, int w, int h, int xoffset);

	fill block in RAM vith current color, block start at address 'ptr', has size
	'width' and 'height' and xoofset is difference between block width and
	buffer width
	
void CharOutClip(char *buf, char *font, int xsize);

	draws character to 'buf' address, with font (point to the char image) and
	'xsize' is width of buffer for drawing
	fontdata are in the buffer with size fonth*fontw*256 bytes (byte per pixel), each
	each one char image data are continuosly, i.e. char 'a' is at address
	from (font+'a'*fonth*fontw) to (font+'a'*fonth*fontw+fonth*fontw)

int set_ppop(int mode);

	set draw mode (assign var. 'ppop', sets some internal jump vectors to
	apropriate address)

void draw_line(int x1,int y1,int x2,int y2);

	draw line to screen at position with current color and drawmode
	
void Line(int x1,int y1,int x2,int y2);

	draw line to RAM buffer at position with current color and drawmode
*/

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdarg.h>
#include <ctype.h>
#include <time.h>

#ifdef __DJGPP__
#include <dpmi.h>
#include <go32.h>
#include <sys/movedata.h>
#include <sys/farptr.h>
#include <sys/nearptr.h>
#include <sys/segments.h>
#endif

#include "config.h"
#include "base.h"
#include "_fastgl.h"

typedef struct
{
	int 	width, height, variable;
	FGPixel *font, *mem;
} FGFont;

// there are current system colors
int _CurrColor,_CurrBkColor;
// clipping co-ordinates
int	clip_x_min,clip_y_min,clip_x_max,clip_y_max;
// address of in-memory buffer for drawing routines,
// obviously it contains address of current window image buffer
FGPixel *Image;
// size of in-RAM drawing buffer
int	cx_max,cy_max;
// Width of screen-1, Height of screen-1
int	X_max, Y_max;
// non-zero if lfb is enabled
int	lfb;
// from Vesa info, for paging routines
int granularity;
// offset VRAM in VideoSegment (non-zero in LINUX only)
FGPixel *videobase;
// default 0xA000 for dos 64kb window
unsigned videobasesegment=0xa000;
// selector for VRAM
unsigned VideoSelector=0xFFFFFFFF;
// linear address of mapped FRAMEBUFFER (dos only)
unsigned VideoLinear=0;
// mmx variables
int	mmx_state=0, mmx=0;
// index to driver table (for paging)
int	driver=0;
// size and number of current font
int	fonth,fontw,fontn;
// ptr to font data (each pixel is = 1 byte)
FGPixel *font;
// current draving mode (_GSET, _GAND ...)
int	ppop;
// size of the screen
int	X_width, Y_width;
// TRUE if graph mode is invoked
int	inGraph	= 0;
// wariables for window clipping to its workspace
int	cx_work, cy_work, cx_maxwork, cy_maxwork;
// internal for polygons
static PolygonStruct ps;
// this is a temp buffer for text drawing
FGPixel imagebuffer[20 * 34 * MAX_TEXT_LENGTH];
// TRUE if linear framebuffer is used
static int linear=1;
// current palette [ABGR]
unsigned int _fgl_palette[256];
Palette ColorsArray[256];
// current bytes per pixel - for future
int bpp=sizeof(FGPixel);
// 
int CRTC = 0x3D4;
//
FGPixel	transp_color;

static FGFont FGFonts[FONTLAST];
static int font_counter;
void ReservedColors(void);

#ifdef __MSDOS__
static __dpmi_regs dpmir;
#endif

// convert table for video modes
#ifdef INDEX_COLORS
#ifdef __linux__
static int mode_tab[7]={TEXT, G320x200x256, G640x480x256, G800x600x256,	G1024x768x256, G1280x1024x256, G1600x1200x256};
#else
static int mode_tab[7]={3, 19, 0x101, 0x103, 0x105, 0x107, 0x120};
#endif
#endif

#if (DIRECT_COLORS==15)
#ifdef __linux__
static int mode_tab[7]={TEXT, G320x200x32K, G640x480x32K, G800x600x32K, G1024x768x32K, G1280x1024x32K, G1600x1200x32K};
#else
static int mode_tab[7]={3, 0x10d, 0x110, 0x113, 0x116, 0x119, 0x119};
#endif
#endif

#if	(DIRECT_COLORS==16)
#ifdef __linux__
static int mode_tab[7]={TEXT, G320x200x64K, G640x480x64K, G800x600x64K, G1024x768x64K, G1280x1024x64K, G1600x1200x64K};
#else
static int mode_tab[7]={3, 0x10e, 0x111, 0x114, 0x117, 0x11a, 0x11a};
#endif
#endif

#ifdef TRUE_COLORS
#ifdef __linux__
static int mode_tab[7]={TEXT, G320x200x16M32, G640x480x16M32, G800x600x16M32, G1024x768x16M32, G1280x1024x16M32, G1600x1200x16M32};
#else
static int mode_tab[7]={3, 0x10f, 0x112, 0x115, 0x118, 0x11b, 0x11e};
#endif
#endif

// from vesa info
int	memorysize,vesaversion,framebuffer;

static const char *drv_name[FG_LAST+1]=
{"VESA", "CIRRUS LOGIC", "TRIDENT",
 "S3 OLD", "S3 NEW", "MX", "TSENG3000",	"ATI", "TSENG4000",
 "CHIPS & TECHNOLOGIES", "WESTERN DIGITAL", "3DFX ACCELERATOR", "SGS-THOMPSON RIVA",
 "MATROX","PERMEDIA 2", "INTEL 740", "INTEL 810 (VESA)", "RENDITION VERITE", "NO DETECTED"
};

int		get_colordepth(void)
{
	return FASTGL_BPP;
}

void	set_transpcolor(FGPixel c)
{
	transp_color = c;
}

#ifdef __WATCOMC__				// dpmi reduction to DJGPP interface ...

int	__dpmi_free_dos_memory(int sel)							/* DPMI 0.9 AX=0101 */
{
    union REGS  r;

    r.w.ax = 0x101;                 /* DPMI free DOS memory             */
    r.w.dx = sel;                   /* DX := selector from 0x100        */
    int386(0x31, &r, &r);
	if (r.w.cflag	& 1)
		return -1;				// error
	return 0;
}

/****************************************************************************
*
* Function:     DPMI_allocRealSeg
* Parameters:   size    - Size of memory block to allocate
*               sel     - Place to return protected mode selector
*               r_seg   - Place to return real mode segment
*
* Description:  Allocates a block of real mode memory using DPMI services.
*               This routine returns both a protected mode selector and
*               real mode segment for accessing the memory block.
*
****************************************************************************/
int	__dpmi_allocate_dos_memory(int _paragraphs,	int	*_ret_selector_or_max)
{
	union REGS reg;

	reg.w.ax = 0x100;
	reg.w.bx = _paragraphs;
	int386(0x31, &reg, &reg);
	if (reg.w.cflag	& 1)
		return -1;				// error

	*_ret_selector_or_max =	reg.w.dx;
	return reg.w.ax;
}

int	__dpmi_free_ldt_descriptor(unsigned sel)
{
	union REGS reg;

	reg.w.ax = 0x1;
	reg.w.bx = sel;
	int386(0x31, &reg, &reg);
	if (reg.w.cflag	& 1)
		return -1;				// error
	return 0;
}

/****************************************************************************
*
* Function:     DPMI_allocSelector
* Returns:      Newly allocated protected mode selector
*
* Description:  Allocates a new protected mode selector using DPMI
*               services. This selector has a base address and limit of 0.
*
****************************************************************************/
int	__dpmi_allocate_ldt_descriptors(int _count)						/* DPMI 0.9 AX=0000 */
{
    int         sel;
    union REGS  r;

    r.w.ax = 0;                     /* DPMI allocate selector           */
    r.w.cx = _count;                     /* Allocate a single selector       */
    int386(0x31, &r, &r);
    if (r.x.cflag)
		return -1;
    sel = r.w.ax;

    r.w.ax = 9;                     /* DPMI set access rights           */
    r.w.bx = sel;
    r.w.cx = 0x8092;                /* 32 bit page granular             */
    int386(0x31, &r, &r);
    return sel;
}

/****************************************************************************
*
* Function:     DPMI_mapPhysicalToLinear
* Parameters:   physAddr    - Physical memory address to map
*               limit       - Length-1 of physical memory region to map
* Returns:      Starting linear address for mapped memory
*
* Description:  Maps a section of physical memory into the linear address
*               space of a process using DPMI calls. Note that this linear
*               address cannot be used directly, but must be used as the
*               base address for a selector.
*
****************************************************************************/
int	__dpmi_physical_address_mapping(__dpmi_meminfo *info)					/* DPMI 0.9 AX=0800 */
{
    union REGS  r;

    r.w.ax = 0x800;                 /* DPMI map physical to linear      */
    r.w.bx = info->address >> 16;
    r.w.cx = info->address & 0xFFFF;
    r.w.si = info->size >> 16;
    r.w.di = info->size & 0xFFFF;
    int386(0x31, &r, &r);
    if (r.x.cflag)
        return -1;
    return ((long)r.w.bx << 16) + r.w.cx;
}

int	__dpmi_free_physical_address_mapping(__dpmi_meminfo *)				/* DPMI 0.9 AX=0801 */
{
	return -1;
}

/****************************************************************************
*
* Function:     DPMI_setSelectorBase
* Parameters:   sel     - Selector to change base address for
*               linAddr - Linear address used for new base address
*
* Description:  Sets the base address for the specified selector.
*
****************************************************************************/
int	__dpmi_set_segment_base_address(int sel, unsigned long linAddr)			/* DPMI 0.9 AX=0007 */
{
    union REGS  r;

    r.w.ax = 7;                     /* DPMI set selector base address   */
    r.w.bx = sel;
    r.w.cx = linAddr >> 16;
    r.w.dx = linAddr & 0xFFFF;
    int386(0x31, &r, &r);
    if (r.x.cflag)
		return -1;
	return 0;
}

/****************************************************************************
*
* Function:     DPMI_setSelectorLimit
* Parameters:   sel     - Selector to change limit for
*               limit   - Limit-1 for the selector
*
* Description:  Sets the memory limit for the specified selector.
*
****************************************************************************/
int	__dpmi_set_segment_limit(int sel, unsigned long limit)				/* DPMI 0.9 AX=0008 */
{
    union REGS  r;

    r.w.ax = 8;                     /* DPMI set selector limit          */
    r.w.bx = sel;
    r.w.cx = limit >> 16;
    r.w.dx = limit & 0xFFFF;
    int386(0x31, &r, &r);
    if (r.x.cflag)
		return -1;
	return 0;
}

#ifdef __MSDOS__
unsigned seg_overlay(void);
unsigned qnx_segment_overlay_flags(void);
#pragma aux seg_overlay "_*";
#pragma aux qnx_segment_overlay_flags "*_";

unsigned qnx_segment_overlay_flags(void)
{
	int val;
	val = (int)seg_overlay();
	return val;
}
#endif
#endif

const	char *GetDriverName(void)
{
	if (lfb) return "Linear FrameBuffer";
	return drv_name[driver>=0 && driver<FG_LAST ? driver:FG_LAST];
}

// modify pixel data
void modify_point(FGPixel *ptr)
{
	switch (ppop)
	{
		default:
		case 0:
			*ptr =	_CurrColor;
			break;
		case 1:					// xor
			*ptr^= _CurrColor;
			break;
		case 2:					// and
			*ptr &= _CurrColor;
			break;
		case 3:					// or
			*ptr |= _CurrColor;
			break;
		case 4:					// plus
			*ptr += _CurrColor;
			break;
		case 5:					// minus
			*ptr -= _CurrColor;
			break;
		case 6:					// not
			*ptr ^= 255;
			break;
		case 7:					// not
			if (*ptr>=16) *ptr = _CurrColor;
			break;
	}
}

// pixel drawing to RAM
void Pixel(int x, int y)
{
	if (y <	cy_work	|| x < cx_work || y >= cy_maxwork || x >= cx_maxwork) return;
	modify_point(Image + cx_max * y + x);
}

// draw box to RAM without clipping to window workspace
void Boxw(int x, int y,	int	w, int h)
{
	FGPixel *ptr;

	if (y <	0 || x < 0 || y	>= cy_max || x >= cx_max ||	w<1	|| h<1)
		return;
	if ((x + w)	> cx_max)
		w -= ((x + w) -	cx_max);
	if ((y + h)	> cy_max)
		h -= ((y + h) -	cy_max);
	ptr	= Image	+ cx_max * y + x;

	L1Box(ptr, w, h, cx_max	- w);
}

// draw box to RAM with clipping to window workspace
void Box(int x,	int y, int w, int h)
{
	FGPixel *ptr;

	if (y >= cy_maxwork || x >= cx_maxwork || w<1 || h<1 || (y + h) <= cy_work || (x + w) <= cx_work) return;

	if ((x + w) > cx_maxwork)
		w -= ((x + w) -	cx_maxwork);
	if ((y + h) > cy_maxwork)
		h -= ((y + h) -	cy_maxwork);
	if ((x + w) > cx_work && x < cx_work)
	{
		w = ((x	+ w) - cx_work);
		x = cx_work;
	}
	if ((y + h) > cy_work && y < cy_work)
	{
		h = ((y	+ h) - cy_work);
		y = cy_work;
	}
	ptr = Image + cx_max * y + x;
	L1BoxX(ptr, w, h, cx_max - w);
}

// internal for circle draw to RAM
static void	_symetry(int xs, int ys, int x,	int	y)
{
	Pixel(xs + x, ys + y);
	Pixel(xs - x, ys + y);
	Pixel(xs + x, ys - y);
	Pixel(xs - x, ys - y);
	Pixel(xs + y, ys + x);
	Pixel(xs - y, ys + x);
	Pixel(xs + y, ys - x);
	Pixel(xs - y, ys - x);
}

// internal for circle fill to RAM
static void	_symetry2(int xs, int ys, int x, int y)
{
	Line(xs+x,ys+y,xs-x,ys+y);
	Line(xs+x,ys-y,xs-x,ys-y);
	Line(xs+y,ys+x,xs-y,ys+x);
	Line(xs+y,ys-x,xs-y,ys-x);
}

// fill circle to RAM
void FillCircle(int	xs,	int	ys,	int	r)
{
	int	x =	0;
	int	y =	r;
	int	p =	3 -	2 *	r;

	if ((ys	+ r) < cy_work || (xs +	r) < cx_work ||	(ys	- r) >=	cy_maxwork || (xs -	r) >= cx_maxwork ||	r <	1)
		return;
	while (x < y)
	{
		_symetry2(xs, ys, x, y);
		if (p <	0)
			p += 4 * (x++) + 6;
		else
			p += 4 * ((x++)	- (y--)) + 10;
	}
	if (x == y)	_symetry2(xs, ys, x, y);
}

// draw circle to RAM
void DrawCircle(int	xs,	int	ys,	int	r)
{
	int	x =	0;
	int	y =	r;
	int	p =	3 -	2 *	r;

	if ((ys	+ r) < cy_work || (xs +	r) < cx_work ||	(ys	- r) >=	cy_maxwork || (xs -	r) >= cx_maxwork ||	r <	1)
		return;
	while (x < y)
	{
		_symetry(xs, ys, x,	y);
		if (p <	0)
			p += 4 * (x++) + 6;
		else
			p += 4 * ((x++)	- (y--)) + 10;
	}
	if (x == y)
		_symetry(xs, ys, x,	y);
}

// internal for fill circle in VRAM
static void	_vsymetry2(int xs, int ys, int x, int y)
{
	draw_line(xs+x,ys+y,xs-x,ys+y);
	draw_line(xs+x,ys-y,xs-x,ys-y);
	draw_line(xs+y,ys+x,xs-y,ys+x);
	draw_line(xs+y,ys-x,xs-y,ys-x);
}

// fill circle in VRAM
void fill_circle(int xs, int ys, int r)
{
	int	x =	0;
	int	y =	r;
	int	p =	3 -	2 *	r;

	if ((ys	+ r) < clip_y_min || (xs + r) <	clip_x_min || (ys -	r) >= clip_y_max ||	(xs	- r) >=	clip_x_max || r<1)
		return;

	while (x < y)
	{
		_vsymetry2(xs, ys, x, y);
		if (p <	0)
			p += 4 * (x++) + 6;
		else
			p += 4 * ((x++)	- (y--)) + 10;
	}
	if (x == y)
		_vsymetry2(xs, ys, x, y);
}

// internal for draw circle to screen
static void	_vsymetry(int xs, int ys, int x, int y)
{
	draw_point(xs +	x, ys +	y);
	draw_point(xs -	x, ys +	y);
	draw_point(xs +	x, ys -	y);
	draw_point(xs -	x, ys -	y);
	draw_point(xs +	y, ys +	x);
	draw_point(xs -	y, ys +	x);
	draw_point(xs +	y, ys -	x);
	draw_point(xs -	y, ys -	x);
}

// draw circle to screen
void draw_circle(int xs, int ys, int r)
{
	int	x =	0;
	int	y =	r;
	int	p =	3 -	2 *	r;

	if ((ys	+ r) < clip_y_min || (xs + r) <	clip_x_min || (ys -	r) >= clip_y_max ||	(xs	- r) >=	clip_x_max || r<1)
		return;

	while (x < y)
	{
		_vsymetry(xs, ys, x, y);
		if (p <	0)
			p += 4 * (x++) + 6;
		else
			p += 4 * ((x++)	- (y--)) + 10;
	}
	if (x == y)
		_vsymetry(xs, ys, x, y);
}

// draw vertical line to RAM
void Vline(int x, int y, int c)
{
	FGPixel *ptr;
	int	i;

	if (y <	0 || x < 0 || y	>= cy_max || x >= cx_max ||	c<1)
		return;
	if ((y + c)	> cy_max)
		c -= ((y + c) -	cy_max);
	ptr	= Image	+ cx_max * y + x;

	for	(i = c;	i; i--)
	{
		*ptr = _CurrColor;
		ptr	+= cx_max;
	}
}

// draw horiz. line to RAM
static void do_hline(int x, int y, int c)
{
	FGPixel *ptr;
	int	j;

	ptr	= Image	+ cx_max * y + x;

	switch(ppop)
	{
		default:
		case 0:	 // set
			FGmemset(ptr, _CurrColor, c);
			break;
		case 1:	 // xor
			for	(j = c;	j; j--)
			{
				*ptr++ ^= _CurrColor;
			}
			break;
		case 2:	 // and
			for	(j = c;	j; j--)
			{
				*ptr++ &= _CurrColor;
			}
			break;
		case 3:	 // or
			for	(j = c;	j; j--)
			{
				*ptr++ |= _CurrColor;
			}
			break;
		case 4:	 // plus
			for	(j = c;	j; j--)
			{
				*ptr++ += _CurrColor;
			}
			break;
		case 5:	 // minus
			for	(j = c;	j; j--)
			{
				*ptr++ -= _CurrColor;
			}
			break;
		case 6:	 // not
			for	(j = c;	j; j--)
			{
				*ptr++ ^= 255;
			}
			break;
		case 7:	 // replace
			for	(j = c;	j; j--)
			{
				if (*(ptr) >= 16) *ptr = _CurrColor;
				ptr++;
			}
			break;
	}
}

// draw horiz. line to RAM
void Hline(int x, int y, int c)
{
	if (y <	0 || x < 0 || y	>= cy_max || x >= cx_max ||	c<1)
		return;
	if ((x + c)	> cx_max)
		c -= ((x + c) -	cx_max);
	do_hline(x,y,c);
}

// draw horiz. line to RAM
void Hlinew(int x, int y, int c)
{
	if (y <	cy_work || y >= cy_maxwork) return;
	if (x+c < cx_work || x >= cx_maxwork) return;
	if ((x + c)	> cx_maxwork)
		c -= ((x + c) -	cx_maxwork);
	if (x < cx_work)
	{
		c = ((x + c) - cx_work);
		x = cx_work;
	}
 	if (c<1) return;
	do_hline(x,y,c);
}

// draw rect without window clipping to RAM
void Rectw(int x, int y, int w,	int	h)
{
	Hline(x, y,	w);
	Hline(x, y + h - 1,	w);
	Vline(x, y,	h);
	Vline(x	+ w	- 1, y,	h);
}

// draw rect with window clipping to RAM
void Rect(int x, int y,	int	w, int h)
{
	Line(x,y,x+w-1,y);
	Line(x,y,x,y+h-1);
	Line(x,y+h-1,x+w-1,y+h-1);
	Line(x+w-1,y,x+w-1,y+h-1);
}

// this is very important procedure - its must be fast
// put buffer from RAM to VRAM with redraw mode
void RamToVideo(int	x, int y, int xm, int ym, int w, int h)
{
	FGPixel *src, *dst;

	if (y <	0 || x < 0 || y	>= cy_max || x >= cx_max ||	w<1	|| h<1)
		return;
	if ((ym	+ h) < clip_y_min || (xm + w) <	clip_x_min || ym >=	clip_y_max || xm >=	clip_x_max)
		return;
	if ((x + w)	> cx_max)
		w -= ((x + w) -	cx_max);
	if ((y + h)	> cy_max)
		h -= ((y + h) -	cy_max);

	if ((xm	+ w) > clip_x_max)
		w -= ((xm +	w) - clip_x_max);
	else if	(xm	< clip_x_min)
	{
		w += (xm - clip_x_min);
		x += clip_x_min	- xm;
		xm = clip_x_min;
	}

	if ((ym	+ h) > clip_y_max)
		h -= ((ym +	h) - clip_y_max);
	else if	(ym	< clip_y_min)
	{
		h += (ym - clip_y_min);
		y += clip_y_min	- ym;
		ym = clip_y_min;
	}

	if (w<1 || h<1) return;

    if (xm<0) xm=0;
	if (ym<0) ym=0;
    
	src	= Image	+ y	* cx_max + x;
	dst	= (FGPixel *) (ym * X_width + xm +	videobase);
	    L1RamToVideo8(src, dst, w, h,	cx_max - w,	X_width	- w);
}

// this very important procedure - its must be fast
// put buffer from RAM to VRAM with draw mode (_GAND ..)
void RamToVideo2(int x,	int	y, int xm, int ym, int w, int h)
{
	FGPixel *src, *dst;

	if (y <	0 || x < 0 || y	>= cy_max || x >= cx_max ||	w<1	|| h<1)
		return;
	if ((ym	+ h) < clip_y_min || (xm + w) <	clip_x_min || ym >=	clip_y_max || xm >=	clip_x_max)
		return;
	if ((x + w)	> cx_max)
		w -= ((x + w) -	cx_max);
	if ((y + h)	> cy_max)
		h -= ((y + h) -	cy_max);

	if ((xm	+ w) > clip_x_max)
		w -= ((xm +	w) - clip_x_max);
	else if	(xm	< clip_x_min)
	{
		w += (xm - clip_x_min);
		x += clip_x_min	- xm;
		xm = clip_x_min;
	}
	if ((ym	+ h) > clip_y_max)
		h -= ((ym +	h) - clip_y_max);
	else if	(ym	< clip_y_min)
	{
		h += (ym - clip_y_min);
		y += clip_y_min	- ym;
		ym = clip_y_min;
	}
	if (w<1 || h<1) return;
	src	= Image	+ y	* cx_max + x;
	dst	= (FGPixel *) (ym * X_width + xm +	videobase);
	L1RamToVideo2(src, dst, w, h,	cx_max - w,	X_width	- w);
}

// this very important procedure - its must be fast
// put buffer from VRAM to RAM with redraw mode
void VideoToRam(int	x, int y, int xm, int ym, int w, int h)
{
	FGPixel *src, *dst;

	if ((y + h)	< clip_y_min ||	(x + w)	< clip_x_min ||	y >= clip_y_max	|| x >=	clip_x_max || w<1 || h<1)
		return;
	if (ym < 0 || xm < 0 ||	ym >= cy_max ||	xm >= cx_max)
		return;

	if ((x + w)	> clip_x_max)
		w -= ((x + w) -	clip_x_max);
	else if	(x < clip_x_min)
	{
		w += (x	- clip_x_min);
		xm += clip_x_min - x;
		x =	clip_x_min;
	}
	if ((y + h)	> clip_y_max)
		h -= ((y + h) -	clip_y_max);
	else if	(y < clip_y_min)
	{
		h += (y	- clip_y_min);
		ym += clip_y_min - y;
		y =	clip_y_min;
	}

	if ((xm	+ w) > cx_max)
		w -= ((xm +	w) - cx_max);
	if ((ym	+ h) > cy_max)
		h -= ((ym +	h) - cy_max);

	if (w<1 || h<1) return;
	dst	= Image	+ ym * cx_max +	xm;
	src	= (FGPixel *) (y *	X_width	+ x	+ videobase);
	L1VideoToRam8(src, dst, w, h,	cx_max - w,	X_width	- w);
}

// this very important procedure - its must be fast
// put buffer from RAM to RAM with redraw mode
void RamToRam(int x, int y,	int	xmax, int ymax,	int	xdst, int ydst,	int	xmaxdst, int ymaxdst, int w, int h,	FGPixel *from,	FGPixel *to)
{
	FGPixel *src, *dst;

	if (y <	0 || x < 0 || y	>= ymax	|| x >=	xmax ||	w<1	|| h<1)
		return;
	if (ydst < 0 ||	xdst < 0 ||	ydst >=	ymaxdst	|| xdst	>= xmaxdst)
		return;
	if ((x + w)	> xmax)
		w -= ((x + w) -	xmax);
	if ((y + h)	> ymax)
		h -= ((y + h) -	ymax);
	if ((xdst +	w) > xmaxdst)
		w -= ((xdst	+ w) - xmaxdst);
	if ((ydst +	h) > ymaxdst)
		h -= ((ydst	+ h) - ymaxdst);

	if (w<1 || h<1) return;
	dst	= to + ydst	* xmaxdst +	xdst;
	src	= from + y * xmax +	x;
	if (ppop!=_GTRANSP) L1RamToRam(src,	dst, w,	h, xmaxdst - w,	xmax - w);
	else L1RamToRamPpop(src, dst, w, h, xmaxdst - w, xmax - w);
}

// this very important procedure - its must be fast
// put buffer from RAM to RAM with redraw mode but from down to up
void RamToRamd(int x, int y, int xmax, int ymax, int xdst, int ydst, int xmaxdst, int ymaxdst, int w, int h, FGPixel *from, FGPixel *to)
{
	FGPixel *src, *dst;

	if (y <	0 || x < 0 || y	>= ymax	|| x >=	xmax ||	w<1	|| h<1)
		return;
	if (ydst < 0 ||	xdst < 0 ||	ydst >=	ymaxdst	|| xdst	>= xmaxdst)
		return;
	if ((x + w)	> xmax)
		w -= ((x + w) -	xmax);
	if ((y + h)	> ymax)
		h -= ((y + h) -	ymax);
	if ((xdst +	w) > xmaxdst)
		w -= ((xdst	+ w) - xmaxdst);
	if ((ydst +	h) > ymaxdst)
		h -= ((ydst	+ h) - ymaxdst);

	if (w<1 || h<1) return;
	dst	= to + (ydst + h - 1) *	xmaxdst	+ xdst;
	src	= from + (y	+ h	- 1) * xmax	+ x;
	L1RamToRam(src,	dst, w,	h, -(xmaxdst + w), -(xmax +	w));
}

// draw text to RAM buffer
void text_to_buffer(int	x, int y, char *s, int xsize, int ysize, FGPixel *buf)
{
	int	width =	strlen(s) *	fontw;

	if ((y + fonth)	< 0	|| (x +	width) < 0 || y	>= ysize ||	x >= xsize)
		return;
	if (width >	(MAX_TEXT_LENGTH * fontw))
	{
		s[MAX_TEXT_LENGTH] = 0;
		width =	MAX_TEXT_LENGTH	* fontw;
	}
#ifndef	NO_MMX
	set_mmx();
#endif
	buf	+= y*xsize+x;
	while (*s)
	{
		CharOutClip(buf, (FGPixel *) (*s++	* (fontw * fonth) +	font),xsize);
		buf	+= fontw;
	}
#ifndef	NO_MMX
	reset_mmx();
#endif	
}

// draw text to window image with clipping
void Textw(int x, int y, char *s)
{
	FGPixel *dst;
	int	width =	strlen(s) *	fontw;

	if ((y + fonth)	< 0	|| (x +	width) < 0 || y	>= cy_max || x >= cx_max)
		return;
	if (width >	(MAX_TEXT_LENGTH * fontw))
	{
		s[MAX_TEXT_LENGTH] = 0;
		width =	MAX_TEXT_LENGTH	* fontw;
	}
	dst	= imagebuffer;
#ifndef NO_MMX
	set_mmx();
#endif
	while (*s)
	{
		CharOutClip(dst, (FGPixel *) (*s++	* (fontw * fonth) +	font), MAX_TEXT_LENGTH*fontw);
		dst	+= fontw;
	}
#ifndef	NO_MMX
	reset_mmx();
#endif
	RamToRam(0,	0, MAX_TEXT_LENGTH * fontw,	fonth, x, y, cx_max, cy_max, width,	fonth, imagebuffer,	Image);
}

// draw text to window image without clipping
void Text(int x, int y,	char *s)
{
	FGPixel *dst;
	int tc;
	int	w =	strlen(s) *	fontw, h = fonth, hh = h, ww=w;

	if ((y + fonth)	< 0	|| (x +	w) < 0 || y	>= cy_max || x >= cx_max)
		return;
	if (w >	(MAX_TEXT_LENGTH * fontw))
	{
		s[MAX_TEXT_LENGTH] = 0;
		w =	MAX_TEXT_LENGTH	* fontw;
	}
	if (w <= 0)
		return;
#ifndef	NO_MMX
	set_mmx();
#endif
	dst	= imagebuffer;
	tc = get_bcolor();
	if (ppop == _GTRANSP)
	{
		set_bcolor(transp_color);
	}
	while (*s)
	{
		CharOutClip(dst, (FGPixel *) (*s++	* (fontw * fonth) +	font), MAX_TEXT_LENGTH*fontw);
		dst	+= fontw;
	}
	set_bcolor(tc);
#ifndef	NO_MMX
	reset_mmx();
#endif
	if (y >= cy_maxwork	|| x >=	cx_maxwork)
		return;
	if ((y + h)	<= cy_work || (x + w) <= cx_work)
		return;

	if ((x + w) > cx_maxwork)
		ww = (w -= ((x + w) - cx_maxwork));
	if ((y + h) > cy_maxwork)
		hh = (h -= ((y + h) - cy_maxwork));

	if ((x + w) > cx_work && x < cx_work)
	{
		w = ((x + w) -	cx_work);
		x = cx_work;
	}
	if ((y + h) > cy_work && y < cy_work)
	{
		h = ((y + h) -	cy_work);
		y = cy_work;
	}

	if (w<1 || h<1) return;
	dst	= Image	+ cx_max * y + x;
	L1RamToRamPpop(imagebuffer+(MAX_TEXT_LENGTH * fontw * (hh-h))+(ww-w), dst, w, h, cx_max - w, MAX_TEXT_LENGTH * fontw - w);
}

// draw text directly to VRAM
int	text_out(int x,	int	y, char	*s)
{
	FGPixel *dst, *pom;
	int tc;
	int	a =	cx_max,	b =	cy_max;
	int	width =	strlen(s) *	fontw;

	if ((y + fonth)	< clip_y_min ||	(x + width)	< clip_x_min ||	y >= clip_y_max	|| x >=	clip_x_max)
		return x;
	if (width >	(MAX_TEXT_LENGTH * fontw))
	{
		s[MAX_TEXT_LENGTH] = 0;
		width =	MAX_TEXT_LENGTH	* fontw;
	}
#ifndef	NO_MMX
	set_mmx();
#endif	
	dst	= imagebuffer;
	pom	= Image;
	cx_max = MAX_TEXT_LENGTH * fontw;
	cy_max = fonth;
	Image =	imagebuffer;

	tc = get_bcolor();
	if (ppop == _GTRANSP)
	{
		set_bcolor(transp_color);
	}

	while (*s)
	{
		CharOutClip(dst, (FGPixel *) (*s++	* (fontw * fonth) +	font), MAX_TEXT_LENGTH*fontw);
		dst	+= fontw;
	}
	set_bcolor(tc);
#ifndef	NO_MMX
	reset_mmx();
#endif	
	RamToVideo2(0, 0, x, y,	width, fonth);
	Image =	pom;
	cx_max = a;
	cy_max = b;
	return x + width;
}

// fill box directly to VRAM
void fill_box(int x, int y, int w, int h)
{
	if ((y + h)	< clip_y_min ||	(x + w)	< clip_x_min ||	y >= clip_y_max	|| x >=	clip_x_max || w<1 || h<1)
		return;
	if ((x + w)	> clip_x_max)
		w -= ((x + w) -	clip_x_max);
	if ((y + h)	> clip_y_max)
		h -= ((y + h) -	clip_y_max);
	if (x <	clip_x_min)
	{
		w += (x	- clip_x_min);
		x =	clip_x_min;
	}
	if (y <	clip_y_min)
	{
		h += (y	- clip_y_min);
		y =	clip_y_min;
	}
	if (w<1	|| h<1)	return;
	_fill_rect(x, y, w,	h);
}

// draw box directly to VRAM
void draw_box(int x, int y, int w, int h)
{
	draw_line(x, y,	x +	w -	1, y);
	draw_line(x, y + h - 1,	x +	w -	1, y + h - 1);
	draw_line(x, y,	x, y + h - 1);
	draw_line(x	+ w	- 1, y,	x +	w -	1, y + h - 1);
}

// set clipping for screen (no window!)
void set_clip_rect(int w, int h, int x,	int	y)
{
	clip_x_min = x;
	clip_y_min = y;
	if (clip_x_max < 0)
		clip_x_min = 0;
	if (clip_y_max < 0)
		clip_y_min = 0;
	clip_x_max = x + w;
	clip_y_max = y + h;
	if (clip_x_max > X_width)
		clip_x_max = X_width;
	if (clip_y_max > Y_width)
		clip_y_max = Y_width;

// for polygon routines
	ps.gc_xoffset =	0;
	ps.gc_yoffset =	0;
	ps.gc_xcliplo =	clip_x_min;
	ps.gc_ycliplo =	clip_y_min;
	ps.gc_xcliphi =	clip_x_max-1;
	ps.gc_ycliphi =	clip_y_max-1;

	ps.line	= draw_line;
	ps.scan	= draw_hline;
}

// put block from RAM to VRAM with current draw mode
void put_block(int x, int y, int w,	int	h, FGPixel	*p)
{
	int	a =	cx_max,	b =	cy_max;
	FGPixel *ptr =	Image;

	cx_max = w;
	cy_max = h;
	Image =	(FGPixel *)p;
	if (ppop==_GSET) RamToVideo(0, 0, x, y,	w, h);
	else RamToVideo2(0,	0, x, y, w,	h);
	cx_max = a;
	cy_max = b;
	Image =	ptr;
}

// gut block from VRAM to RAM
void get_block(int x, int y, int w,	int	h, FGPixel *p)
{
	int	a =	cx_max,	b =	cy_max;
	FGPixel *ptr =	Image;

	cx_max = w;
	cy_max = h;
	Image =	(FGPixel *)p;
	VideoToRam(x, y, 0,	0, w, h);
	cx_max = a;
	cy_max = b;
	Image =	ptr;
}

// return size of buffer for block
unsigned int areasize(int w, int h)
{
	return w * h * bpp;
}

// clear screen
void clear_frame_buffer(FGPixel color)
{
	set_fcolor(color);
	set_ppop(_GSET);
	fill_box(0, 0, X_width, Y_width);
}

void register_font(unsigned char *source, int width, int height, int count, int offset, int type, int index)
{
	FGPixel *dst;
	int i;
	register int c;
	assert(dst = (FGPixel *)malloc(width*height*count*bpp));
	FGFonts[index].font   = dst-(offset*width*height);
	FGFonts[index].mem    = dst;
	FGFonts[index].height = height;
	FGFonts[index].width  = width;
	FGFonts[index].variable = type;
	width = width / 4;
	assert(++font_counter<=FONTLAST);
	for(;count--;) for(i=0;i<height;i++) switch(width)
	{
		case 5:
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			break;
		case 4:
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			break;
		case 3:
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			break;
		case 2:
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			*dst++ = c&8?-1:0;
			*dst++ = c&4?-1:0;
			*dst++ = c&2?-1:0;
			*dst++ = c&1?-1:0;
			break;
		case 1:
			c = *source++;
			*dst++ = c&128?-1:0;
			*dst++ = c&64?-1:0;
			*dst++ = c&32?-1:0;
			*dst++ = c&16?-1:0;
			break;
	}
}

void deregister_fonts(void)
{
	int i;
	for(i=0;i<font_counter;i++) free(FGFonts[i].mem);
}

// set current font
int	set_font(int f)
{
	int	rc = fontn;

	switch (fontn =	f)
	{
		case FONT0406:
		case FONT0808:
		case FONT0816:
			font  = FGFonts[f].font;
			fontw =	FGFonts[f].width;
			fonth =	FGFonts[f].height;
			break;
			// load dynamically - save memory usage
		case FONT1222:
		case FONT1625:
		case FONT2034:
			if (FGFonts[f].font==0) switch(f)
			{
				case FONT1222:
					register_font(miro1220,12,20,  95, ' ', 0, f);
					break;
				case FONT1625:
					register_font(miro1625,16,25,  95, ' ', 0, f);
					break;
				case FONT2034:
					register_font(miro2034,20,34,  95, ' ', 0, f);
					break;
			}
			font  = FGFonts[f].font;
			fontw =	FGFonts[f].width;
			fonth =	FGFonts[f].height;
			break;
	}
	return rc;
}

// load some fonts at startup
void _expand_font(void)
{
	register_font(miro0406, 4, 6,  95, ' ', 0, FONT0406);
	register_font(miro0808, 8, 8, 256, 0, 0, FONT0808);
	register_font(miro0816, 8,16, 256, 0, 0, FONT0816);
	set_font(1);
}

// internal
void set_linear(int	flag)
{
	linear = flag;
}

#ifdef __MSDOS__
/* memory access helper functions */
static int map_linear(unsigned long	adr, unsigned long len,
			  unsigned *sel, unsigned *la	)
{
  if(*sel==0xFFFFFFFF) {
	*sel = __dpmi_allocate_ldt_descriptors(1);
	if (*sel==0xFFFFFFFF)	return 0;
  }
  if(!*la) {
	__dpmi_meminfo meminfo;
	meminfo.address	= adr;
	meminfo.size	= len;
	if(__dpmi_physical_address_mapping(&meminfo)==-1) {
	   return 0;
	}
	if(__dpmi_set_segment_base_address(*sel,meminfo.address)==-1) {
	   return 0;
	}
	if(__dpmi_set_segment_limit(*sel, len-1)==-1) {
	   return 0;
	}
	*la=meminfo.address;
  }
  return 1;
}

static void	free_linear(unsigned *sel, unsigned	*la)
{
  if(*la) {
	  __dpmi_meminfo meminfo;
	  meminfo.address =	*(unsigned *)la;
	  __dpmi_free_physical_address_mapping(&meminfo);
	  *la =	0;
  }
  if(*sel != 0xFFFFFFFF)	{
	  __dpmi_free_ldt_descriptor(*sel);
	  *sel = 0xFFFFFFFF;
  }
}
#endif

static int exit_flag=0;
static int firsttime = 1;

//
// only for backing to previous console mode
//
void cleanup(void)
{
	if (exit_flag) return;
	exit_flag = 1;
	CloseUserInput();
	if (inGraph) graph_set_mode(0);
#ifdef __linux__
	if (svgalib) UnlinkSvgalib();
	svgalib = 0;
#endif
	exit_flag = 0;
	firsttime = 1;
}

// internal for mode switching
void GetModeSize(int mode, int * m)
{
	switch (mode)
	{
		case G320x200:
			*m =	mode_tab[mode];
			X_width	= 320;
			Y_width	= 200;
			break;
		case G640x480:
			*m =	mode_tab[mode];
			X_width	= 640;
			Y_width	= 480;
			break;
		case G800x600:
			*m =	mode_tab[mode];
			X_width	= 800;
			Y_width	= 600;
			break;
		case G1024x768:
			*m =	mode_tab[mode];
			X_width	= 1024;
			Y_width	= 768;
			break;
		case G1280x1024:
			*m =	mode_tab[mode];
			X_width	= 1280;
			Y_width	= 1024;
			break;
		case G1600x1200:
			*m =	mode_tab[mode];
			X_width	= 1600;
			Y_width	= 1200;
			break;
		default:
			*m = 0;
			break;
	}
#ifdef __MSDOS__
	if (lfb) *m |= 0x4000;
#endif
}

// set graph mode, returns TRUE if all is OK
int	graph_set_mode(int mode)
{
	int	m=1;
	int up_to16=0;
	
	assert(bpp == (FASTGL_BPP+1)/8);  // bad colors config
#ifndef __MSDOS__
	if (geteuid() && mode)
	{
	    printf("this program must be suid root!\a\n");
	    exit(-1);
	}
#endif
	if (mode == 0) // text mode?
	{
#ifdef INDEX_COLORS
		_set_default_palette();
#endif
		deregister_fonts();
#ifndef	__linux__
#ifdef __MSDOS__
#ifndef __QNX__
		if (VideoLinear)  free_linear(&VideoSelector, &VideoLinear);
#endif
#endif
		// NOTE! this routine also convert font data from bit
		// per pixel to byte per pixel !!!
		_setvideomode(3);
#ifdef __MSDOS__
		if (VideoSelector!=0xFFFFFFFF)
			__dpmi_free_ldt_descriptor(VideoSelector);
		VideoSelector = 0xFFFFFFFF;
#endif
#else
		LinuxGraphClose();
#endif
		inGraph	= 0;
		return 1;
	}

	if (firsttime) // for first run
	{
		_expand_font();
		VideoSelector = 0xFFFFFFFF;
		VideoLinear = 0;
#ifdef __linux__
//		set_linear(1); // force linear mode for no asm kernel
		if ((mode=LinuxStartup(mode)) == 0) return 0;
#else  
		if (driver < 0)
			driver = 0;			// no detect, force VESA
		else if	(driver	== 0)
			driver = detect_video(verbose);	// detect card
#endif	// linux		
#ifndef	NO_MMX
		if (mmx < 0) mmx = 0;
		else test_mmx();
      if (verbose) if (mmx>0) printf("MMX extension detected!\n");
#endif		
	}
	m = mode_tab[mode];
#ifdef __MSDOS__
	if (mode >=	2)				// VESA paging ?
	{
		int	tmp;
		dpmir.x.ax = 0x4f00;
		dpmir.x.di = 0;
		dpmir.x.es = __dpmi_allocate_dos_memory(512 / 16, &tmp);
		poke(tmp, 0, '2' * 0xffffff	+ 'E' *	65536 +	'B'	* 256 +	'V');
		__dpmi_int(16, &dpmir);
		if (dpmir.x.ax != 0x004F)
			IErrorText("Sorry, your card not support VESA standard!", 1);
		memorysize = (peek(tmp,	0x12) &	0xffff)	* 64;
		for(;;mode--) // find best mode
		{
			dpmir.x.cx = m = mode_tab[mode];
			dpmir.x.ax = 0x4f01;
			dpmir.x.di = 0;
			__dpmi_int(16, &dpmir);
			if (dpmir.x.ax != 0x004F && mode<=0)
				IErrorText("Sorry, your card not support this VESA mode !",	1);
			if (dpmir.x.ax == 0x004F) break;
#if (FASTGL_BPP==15)
			dpmir.x.cx = m+1;	// replace 15 bit mode with 16 bitmode
			dpmir.x.ax = 0x4f01;
			dpmir.x.di = 0;
			__dpmi_int(16, &dpmir);
			if (dpmir.x.ax != 0x004F)
				IErrorText("Sorry, your card not support this VESA mode !",	0);
			if (dpmir.x.ax == 0x004F) { up_to16=1; break; }
#endif
			printf("Trying other mode than %d\n", mode);
		}
		if ((peek(tmp, 6) &	0xffff)	< 64)
			IErrorText("Sorry, your card not support 64 KB Window !", 1);
		videobasesegment = peek(tmp, 8)	& 0xffff;
		lfb	= (peek(tmp, 0)	& 0xffff)&128;
		if (linear==0) lfb = 0;
		granularity	= peek(tmp,	4) & 255;
		framebuffer	= peek(tmp,	0x28);
#ifndef __WATCOMC__
		if (lfb	&& framebuffer)
		{
			map_linear(framebuffer,	memorysize*1024, &VideoSelector, &VideoLinear);
			if (!(VideoSelector != 0xFFFFFFFF && VideoLinear)) linear = lfb = 0;
			videobase = 0;
		}
#else
		// not supported for WATCOM C++ ;-(
		lfb = linear = 0;
#endif
		dpmir.x.ax = 0x4f05;
		dpmir.x.bx = 0;
		dpmir.x.dx = 0;
		__dpmi_int(16, &dpmir);
	}
#endif // msdos	
#ifdef __QNX__
	videobasesegment = 0xa000U;
	memorysize = 2048;
	lfb	= linear = 0;
	granularity	= 64;
#endif
#ifdef __linux__
	memorysize = LinuxMemSize(m);
#endif
	if (memorysize <= 512 && mode >	2)
		mode = 2;
	if (memorysize <= 1024 && mode > 4)
		mode = 4;
	GetModeSize(mode, &m);
	if (!m) return 0; // bad mode
	if (firsttime)
	{
		atexit(cleanup);
		firsttime =	0;
	}
#ifndef	__linux__
	if (lfb==0 && bpp>1)
	{
		IErrorText("Linear mode is required for this BPP!", 1);
	}
	if (lfb || mode<2) driver = 0; // no banking
	if (_setvideomode(m+up_to16) ==	0) return 0; //set properly mode
#else	
	if (LinuxSetMode(m,mode,linear)==0) return 0;
	lfb = linear;
	if (lfb==0 && bpp>1)
	{
		IErrorText("Linear mode is required for this BPP!", 1);
	}
#endif
	inGraph	= m;
	X_max =	X_width	- 1;
	Y_max =	Y_width	- 1;
	set_clip_rect(X_width, Y_width,	0, 0);
	set_font(FONT0816);
	set_colors(0, CWHITE);
	set_ppop(_GSET);
#ifdef INDEX_COLORS
	_set_default_palette();
	ReservedColors();
#endif
	FGLInitEvents();
	linear = (lfb!=0);
	if (verbose) printf("linear mode enabled succesfull for %d BPP\n", FASTGL_BPP);
	if (up_to16) printf("this card don't support any 15bit mode, you can use 16bit modes!\n");
	return 1;	// all is OK
}

#ifndef __linux__
// sound a HZ tone for b msec
void Snd(int a,	int	b)
{
	sound(a);
	delay(b);
	nosound();
}
#endif

// sound efect
void Puk(void)
{
	Snd(100,10);
}

// for drawing sync
void XWaitRetrace(void)
{
/* some prob here I'll fix it later */
	while ((inp(0x3da) & 0x08) != 0);
	while ((inp(0x3da) & 0x08) == 0);
}

void XWaitPeriod(void)
{
	while ((inp(0x3da) & 0x08) == 0);
	while ((inp(0x3da) & 0x08) != 0);
}

// sound
void WarningBeep(int which)
{
	switch (which)
	{
		default:
		case 0:
			Snd(1000, 100);
			break;
	}
}

// internal for print error message
void IErrorText(char *s, int flag)
{
	WarningBeep(0);
	if (flag)
	{
		printf("FATAL ERROR: %s\n",	s);
		getchar();
		exit(flag);
	}
	else
	{
		printf("WARNING: %s\n",	s);
		getchar();
	}
}

//
// A traditional printf to the screen + position
//
int	gprintf(int	x, int y, const	char *format,...)
{
	char s[256];

	va_list	arglist;

	va_start(arglist, format);
	vsprintf(s,	format,	arglist);
	va_end(arglist);
	text_out(x,y,s);
	return strlen(s);
}

/**
 ** SCANCNVX.C ---- scan fill a convex polygon
 **
 ** Copyright (c) 1995 Csaba Biegl, 820 Stirrup Dr, Nashville, TN 37221
 ** [e-mail: csaba@vuse.vanderbilt.edu] See "doc/copying.cb" for details.
 **/
void _FillPolygon(int n,	int	pt[][2], PolygonStruct * current)
{
	edge edge_array[16], *edges=edge_array, *ep;
	scan scan_array[16], *scans=scan_array, *sp, *points, *segments;
	int	xmin, xmax,	ymin, ymax;
	int	ypos, nedges;

	if ((n > 1)	&&
		(pt[0][0] == pt[n -	1][0]) &&
		(pt[0][1] == pt[n -	1][1]))
	{
		n--;
	}
	if (n <	1)
	{
		return;
	}
	if (n>16)
	{
		edges =	(edge *) malloc(sizeof(edge) * (n +	2));
		scans =	(scan *) malloc(sizeof(scan) * (n +	8));
	}
	if (edges && scans)
	{
		int	prevx =	xmin = xmax	= pt[0][0];
		int	prevy =	ymin = ymax	= pt[0][1];

		nedges = 0;
		ep = edges;
		while (--n >= 0)
		{
			if (pt[n][1] >=	prevy)
			{
				ep->e.x	= prevx;
				ep->e.y	= prevy;
				ep->e.xlast	= prevx	= pt[n][0];
				ep->e.ylast	= prevy	= pt[n][1];
			}
			else
			{
				ep->e.xlast	= prevx;
				ep->e.ylast	= prevy;
				ep->e.x	= prevx	= pt[n][0];
				ep->e.y	= prevy	= pt[n][1];
			}
			if ((ep->e.y > (current->gc_ycliphi)) || (ep->e.ylast <	(current->gc_ycliplo)))
				continue;
			{
				if (ep->e.y	< current->gc_ycliplo)
				{
					ep->e.x	+= (((int) (((long)	(((int)	((ep->e.xlast -	ep->e.x)) << 1)) * (long) ((current->gc_ycliplo	- ep->e.y))) / (long) ((ep->e.ylast	- ep->e.y))) + (((int) ((ep->e.xlast - ep->e.x)) ^ (int) ((current->gc_ycliplo - ep->e.y)) ^ (int) ((ep->e.ylast - ep->e.y))) >> ((sizeof(int) * 8)	- 1)) +	1) >> 1);

					ep->e.y	= current->gc_ycliplo;;
				}
			};
			if (ymin > ep->e.y)
				ymin = ep->e.y;
			if (ymax < ep->e.ylast)
				ymax = ep->e.ylast;
			if (xmin > ep->e.x)
				xmin = ep->e.x;
			if (xmax < ep->e.xlast)
				xmax = ep->e.xlast;
			{
				(&ep->e)->dy = (&ep->e)->ylast - (&ep->e)->y;
				(&ep->e)->dx = (&ep->e)->xlast - (&ep->e)->x;
				if ((&ep->e)->dx < 0)
				{
					(&ep->e)->xstep	= (-1);
					(&ep->e)->dx = (-(&ep->e)->dx);
				}
				else
				{
					(&ep->e)->xstep	= 1;
				}
				if ((&ep->e)->dx > (&ep->e)->dy)
				{
					(&ep->e)->xmajor = 1;
					(&ep->e)->error	= (&ep->e)->dx >> 1;
				}
				else
				{
					(&ep->e)->xmajor = 0;
					(&ep->e)->error	= ((&ep->e)->dy	- ((1 -	(&ep->e)->xstep) >>	1))	>> 1;
				}
			};
			ep->status = inactive;
			nedges++;
			ep++;
		}
		if ((nedges	> 0) &&	(xmin <= (current->gc_xcliphi))	&& (xmax >=	(current->gc_xcliplo)))
		{
			if (xmin < (current->gc_xcliplo))
				xmin = (current->gc_xcliplo);
			if (ymin < (current->gc_ycliplo))
				ymin = (current->gc_ycliplo);
			if (xmax > (current->gc_xcliphi))
				xmax = (current->gc_xcliphi);
			if (ymax > (current->gc_ycliphi))
				ymax = (current->gc_ycliphi);

			for	(ypos =	ymin; ypos <= ymax;	ypos++)
			{
				sp = scans;
				points = 0;
				segments = 0;
				for	(n = nedges, ep	= edges; --n >=	0; ep++)
				{
					switch (ep->status)
					{
						case inactive:
							if (ep->e.y	!= ypos)
								break;
							if (ep->e.dy ==	0)
							{
								ep->status = passed;
								xmin = ep->e.x;
								xmax = ep->e.xlast;
								{
									if ((int) (xmin) > (int) (xmax))
									{
										int	_swap_tmpval_ =	(xmin);

										(xmin) = (xmax);
										(xmax) = _swap_tmpval_;
									}
								};
								{
									scan *prev = 0;
									scan *work = segments;
									int	overlap	= 0;

									while (work	!= 0)
									{
										if ((work->x1 <= xmax) && (xmin	<= work->x2))
										{
											overlap	= 1;
											if (xmin < work->x1)
												work->x1 = xmin;
											if (xmax > work->x2)
											{
												prev = work;
												while ((work = work->next) != 0)
												{
													if (work->x1 > xmax)
														break;
													if (work->x2 > xmax)
														xmax = work->x2;
												}
												prev->x2 = xmax;
												prev->next = work;
											}
											break;
										}
										if (work->x1 > xmax)
											break;
										prev = work;
										work = work->next;
									}
									if (!overlap)
									{
										sp->x1 = xmin;
										sp->x2 = xmax;
										sp->next = work;
										if (prev)
											prev->next = sp;
										else
											segments = sp;
									}
								};
								sp++;
								break;
							}
							ep->status = active;
						case active:
							xmin = xmax	= ep->e.x;
							if (ep->e.ylast	== ypos)
							{
								ep->status = passed;
								xmax = ep->e.xlast;
								{
									if ((int) (xmin) > (int) (xmax))
									{
										int	_swap_tmpval_ =	(xmin);

										(xmin) = (xmax);
										(xmax) = _swap_tmpval_;
									}
								};
								{
									scan *prev = 0;
									scan *work = points;

									while (work	!= 0)
									{
										if (work->x1 > xmin)
											break;
										prev = work;
										work = work->next;
									}
									sp->x1 = xmin;
									sp->x2 = xmax;
									sp->next = work;
									if (prev)
										prev->next = sp;
									else
										points = sp;
								};
								sp++;
							}
							else if	(ep->e.xmajor)
							{
								for	(;;)
								{
									(&ep->e)->x	+= (&ep->e)->xstep;
									if (((&ep->e)->error -=	(&ep->e)->dy) <	0)
									{
										(&ep->e)->error	+= (&ep->e)->dx;
										break;
									}
								};
								xmax = ep->e.x - ep->e.xstep;
								{
									if ((int) (xmin) > (int) (xmax))
									{
										int	_swap_tmpval_ =	(xmin);

										(xmin) = (xmax);
										(xmax) = _swap_tmpval_;
									}
								};
							}
							else
							{
								{
									if (((&ep->e)->error -=	(&ep->e)->dx) <	0)
									{
										(&ep->e)->x	+= (&ep->e)->xstep;
										(&ep->e)->error	+= (&ep->e)->dy;
									}
								};
							}
							{
								scan *prev = 0;
								scan *work = points;

								while (work	!= 0)
								{
									if (work->x1 > xmin)
										break;
									prev = work;
									work = work->next;
								}
								sp->x1 = xmin;
								sp->x2 = xmax;
								sp->next = work;
								if (prev)
									prev->next = sp;
								else
									points = sp;
							};
							sp++;
							break;
						default:
							break;
					}
				}
				while (points != 0)
				{
					scan *nextpt = points->next;

					if (!nextpt)
						break;
					xmin = points->x1;
					xmax = nextpt->x2;
					points = nextpt->next;
					{
						scan *prev = 0;
						scan *work = segments;
						int	overlap	= 0;

						while (work	!= 0)
						{
							if ((work->x1 <= xmax) && (xmin	<= work->x2))
							{
								overlap	= 1;
								if (xmin < work->x1)
									work->x1 = xmin;
								if (xmax > work->x2)
								{
									prev = work;
									while ((work = work->next) != 0)
									{
										if (work->x1 > xmax)
											break;
										if (work->x2 > xmax)
											xmax = work->x2;
									}
									prev->x2 = xmax;
									prev->next = work;
								}
								break;
							}
							if (work->x1 > xmax)
								break;
							prev = work;
							work = work->next;
						}
						if (!overlap)
						{
							nextpt->x1 = xmin;
							nextpt->x2 = xmax;
							nextpt->next = work;
							if (prev)
								prev->next = nextpt;
							else
								segments = nextpt;
						}
					};
				}
				while (segments	!= 0)
				{
					xmin = segments->x1;
					xmax = segments->x2;
					segments = segments->next;
					{
						if (xmin > current->gc_xcliphi)
						{
							continue;
						}
						if (xmax < current->gc_xcliplo)
						{
							continue;
						}
						if (xmin < current->gc_xcliplo)
						{
							xmin = current->gc_xcliplo;;
						}
						if (xmax > current->gc_xcliphi)
						{
							xmax = current->gc_xcliphi;;
						}
					};
					(*current->scan) (
										 (xmin + current->gc_xoffset),
										 (ypos + current->gc_yoffset),
										 (xmax - xmin +	1));
				}
			}
		}
	}
	if (n>16)
	{
		free(edges);
		free(scans);
	}
}

void _DrawPolygon(int n,	int	pt[][2], PolygonStruct * current)
{
	int	i, px, py, x1, y1, x2, y2, doClose=1;

	if (n <= 0)
		return;
	if (n == 1)
		doClose	= 1;
	x1 = x2	= pt[0][0];
	y1 = y2	= pt[0][1];
	for	(i = 1;	i <	n; i++)
	{
		int	*ppt = pt[i];

		if (x1 > ppt[0])
			x1 = ppt[0];
		if (x2 < ppt[0])
			x2 = ppt[0];
		if (y1 > ppt[1])
			y1 = ppt[1];
		if (y2 < ppt[1])
			y2 = ppt[1];
	}
	px = pt[n -	1][0];
	py = pt[n -	1][1];
	for	(i = 0;	i <	n; i++)
	{
		x1 = px;
		y1 = py;
		x2 = px	= pt[i][0];
		y2 = py	= pt[i][1];
		if (i |	doClose)
		{
			if (y1 > y2)
			{
				{
					int	_swap_tmpval_ =	(x1);

					(x1) = (x2);
					(x2) = _swap_tmpval_;
				};
				{
					int	_swap_tmpval_ =	(y1);

					(y1) = (y2);
					(y2) = _swap_tmpval_;
				};
			}
			{
				if (x1 < x2)
				{
					if (x2 < current->gc_xcliplo)
					{
						continue;
					}
					if (x1 > current->gc_xcliphi)
					{
						continue;
					}
					{
						if (x1 < current->gc_xcliplo)
						{
							y1 += (((int) (((long) (((int) ((y2	- y1)) << 1)) *	(long) (current->gc_xcliplo	- x1)) / (long)	((x2 - x1))) + (((int) ((y2	- y1)) ^ (int) (current->gc_xcliplo	- x1) ^	(int) ((x2 - x1))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							x1 = current->gc_xcliplo;
						}
					};
					{
						if (x2 > current->gc_xcliphi)
						{
							y2 -= (((int) (((long) (((int) ((y2	- y1)) << 1)) *	(long) (x2 - current->gc_xcliphi)) / (long)	((x2 - x1))) + (((int) ((y2	- y1)) ^ (int) (x2 - current->gc_xcliphi) ^	(int) ((x2 - x1))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							x2 = current->gc_xcliphi;
						}
					};
				}
				else
				{
					if (x1 < current->gc_xcliplo)
					{
						continue;
					}
					if (x2 > current->gc_xcliphi)
					{
						continue;
					}
					{
						if (x2 < current->gc_xcliplo)
						{
							y2 += (((int) (((long) (((int) ((y1	- y2)) << 1)) *	(long) (current->gc_xcliplo	- x2)) / (long)	((x1 - x2))) + (((int) ((y1	- y2)) ^ (int) (current->gc_xcliplo	- x2) ^	(int) ((x1 - x2))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							x2 = current->gc_xcliplo;
						}
					};
					{
						if (x1 > current->gc_xcliphi)
						{
							y1 -= (((int) (((long) (((int) ((y1	- y2)) << 1)) *	(long) (x1 - current->gc_xcliphi)) / (long)	((x1 - x2))) + (((int) ((y1	- y2)) ^ (int) (x1 - current->gc_xcliphi) ^	(int) ((x1 - x2))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							x1 = current->gc_xcliphi;
						}
					};
				}
				if (y1 < y2)
				{
					if (y2 < current->gc_ycliplo)
					{
						continue;
					}
					if (y1 > current->gc_ycliphi)
					{
						continue;
					}
					{
						if (y1 < current->gc_ycliplo)
						{
							x1 += (((int) (((long) (((int) ((x2	- x1)) << 1)) *	(long) (current->gc_ycliplo	- y1)) / (long)	((y2 - y1))) + (((int) ((x2	- x1)) ^ (int) (current->gc_ycliplo	- y1) ^	(int) ((y2 - y1))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							y1 = current->gc_ycliplo;
						}
					};
					{
						if (y2 > current->gc_ycliphi)
						{
							x2 -= (((int) (((long) (((int) ((x2	- x1)) << 1)) *	(long) (y2 - current->gc_ycliphi)) / (long)	((y2 - y1))) + (((int) ((x2	- x1)) ^ (int) (y2 - current->gc_ycliphi) ^	(int) ((y2 - y1))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							y2 = current->gc_ycliphi;
						}
					};
				}
				else
				{
					if (y1 < current->gc_ycliplo)
					{
						continue;
					}
					if (y2 > current->gc_ycliphi)
					{
						continue;
					}
					{
						if (y2 < current->gc_ycliplo)
						{
							x2 += (((int) (((long) (((int) ((x1	- x2)) << 1)) *	(long) (current->gc_ycliplo	- y2)) / (long)	((y1 - y2))) + (((int) ((x1	- x2)) ^ (int) (current->gc_ycliplo	- y2) ^	(int) ((y1 - y2))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							y2 = current->gc_ycliplo;
						}
					};
					{
						if (y1 > current->gc_ycliphi)
						{
							x1 -= (((int) (((long) (((int) ((x1	- x2)) << 1)) *	(long) (y1 - current->gc_ycliphi)) / (long)	((y1 - y2))) + (((int) ((x1	- x2)) ^ (int) (y1 - current->gc_ycliphi) ^	(int) ((y1 - y2))) >> ((sizeof(int)	* 8) - 1)) + 1)	>> 1);

							y1 = current->gc_ycliphi;
						}
					};
				}
			};
			(*current->line) (
								 (x1 + current->gc_xoffset),
								 (y1 + current->gc_yoffset),
								 (x2 + current->gc_xoffset),
								 (y2 + current->gc_yoffset));
		}
	}
}

// fill polygon
void fill_convex(int num, int pole[][2])
{
	_FillPolygon(num, pole, &ps);
}

// draw polygon
void draw_convex(int num, int pole[][2])
{
	_DrawPolygon(num, pole, &ps);
}

//
// this code is from allegro lib
//

/* do_ellipse:
 *  Helper function for the ellipse drawing routines. Calculates the points
 *  in an ellipse of radius rx and ry around point x, y, and calls the 
 *  specified routine for each one. The output proc will be passed first a 
 *  copy of the bmp parameter, then the x, y point, then a copy of the d 
 *  parameter (so putpixel() can be used as the callback).
 */
static void do_ellipse(int x, int y, int rx, int ry, void (*proc)(int, int))
{
   int ix, iy;
   int h, i, j, k;
   int oh, oi, oj, ok;

   if (rx < 1) 
      rx = 1; 

   if (ry < 1) 
      ry = 1;

   h = i = j = k = 0xFFFF;

   if (rx > ry) {
      ix = 0; 
      iy = rx * 64;

      do {
	 oh = h;
	 oi = i;
	 oj = j;
	 ok = k;

	 h = (ix + 32) >> 6; 
	 i = (iy + 32) >> 6;
	 j = (h * ry) / rx; 
	 k = (i * ry) / rx;

	 if (((h != oh) || (k != ok)) && (h < oi)) {
	    proc(x+h, y+k);
	    if (h) 
	       proc(x-h, y+k);
	    if (k) {
	       proc(x+h, y-k);
	       if (h)
		  proc(x-h, y-k);
	    }
	 }

	 if (((i != oi) || (j != oj)) && (h < i)) {
	    proc(x+i, y+j);
	    if (i)
	       proc(x-i, y+j);
	    if (j) {
	       proc(x+i, y-j);
	       if (i)
		  proc(x-i, y-j);
	    }
	 }

	 ix = ix + iy / rx; 
	 iy = iy - ix / rx;

      } while (i > h);
   } 
   else {
      ix = 0; 
      iy = ry * 64;

      do {
	 oh = h;
	 oi = i;
	 oj = j;
	 ok = k;

	 h = (ix + 32) >> 6; 
	 i = (iy + 32) >> 6;
	 j = (h * rx) / ry; 
	 k = (i * rx) / ry;

	 if (((j != oj) || (i != oi)) && (h < i)) {
	    proc(x+j, y+i);
	    if (j)
	       proc(x-j, y+i);
	    if (i) {
	       proc(x+j, y-i);
	       if (j)
		  proc(x-j, y-i);
	    }
	 }

	 if (((k != ok) || (h != oh)) && (h < oi)) {
	    proc(x+k, y+h);
	    if (k)
	       proc(x-k, y+h);
	    if (h) {
	       proc(x+k, y-h);
	       if (k)
		  proc(x-k, y-h);
	    }
	 }

	 ix = ix + iy / ry; 
	 iy = iy - ix / ry;

      } while(i > h);
   }
}

/* ellipsefill:
 *  Draws a filled ellipse.
 */
static void do_fill_ellipse(int x, int y, int rx, int ry, void (*draw_hline)(int,int,int))
{
   int ix, iy;
   int a, b, c, d;
   int da, db, dc, dd;
   int na, nb, nc, nd;

   if (rx > ry) {
      dc = -1;
      dd = 0xFFFF;
      ix = 0; 
      iy = rx * 64;
      na = 0; 
      nb = (iy + 32) >> 6;
      nc = 0; 
      nd = (nb * ry) / rx;

      do {
	 a = na; 
	 b = nb; 
	 c = nc; 
	 d = nd;

	 ix = ix + (iy / rx);
	 iy = iy - (ix / rx);
	 na = (ix + 32) >> 6; 
	 nb = (iy + 32) >> 6;
	 nc = (na * ry) / rx; 
	 nd = (nb * ry) / rx;

	 if ((c > dc) && (c < dd)) {
	    draw_hline(x-b, y+c, b*2);
	    if (c)
	       draw_hline(x-b, y-c, b*2);
	    dc = c;
	 }

	 if ((d < dd) && (d > dc)) { 
	    draw_hline(x-a, y+d, a*2);
	    draw_hline(x-a, y-d, a*2);
	    dd = d;
	 }

      } while(b > a);
   } 
   else {
      da = -1;
      db = 0xFFFF;
      ix = 0; 
      iy = ry * 64; 
      na = 0; 
      nb = (iy + 32) >> 6;
      nc = 0; 
      nd = (nb * rx) / ry;

      do {
	 a = na; 
	 b = nb; 
	 c = nc; 
	 d = nd; 

	 ix = ix + (iy / ry); 
	 iy = iy - (ix / ry);
	 na = (ix + 32) >> 6; 
	 nb = (iy + 32) >> 6;
	 nc = (na * rx) / ry; 
	 nd = (nb * rx) / ry;

	 if ((a > da) && (a < db)) {
	    draw_hline(x-d, y+a, d*2);
	    if (a)
	       draw_hline(x-d, y-a, d*2);
	    da = a;
	 }

	 if ((b < db) && (b > da)) { 
	    draw_hline(x-c, y+b, c*2);
	    draw_hline(x-c, y-b, c*2);
	    db = b;
	 }

      } while(b > a);
   }
}

void fill_ellipse(int x, int y, int rx, int ry)
{
	if ((y	+ ry) < clip_y_min || (x + rx) < clip_x_min || (y - ry) >= clip_y_max ||	(x	- rx) >=	clip_x_max || rx<1 || ry<1)
		return;
	do_fill_ellipse(x,y,rx,ry,draw_hline);
}

/* ellipse:
 *  Draws an ellipse.
 */
void draw_ellipse(int xs, int ys, int rx, int ry)
{
	if ((ys	+ ry) < clip_y_min || (xs + rx) < clip_x_min || (ys - ry) >= clip_y_max ||	(xs	- rx) >= clip_x_max || rx<1 || ry<1)
		return;
	do_ellipse(xs, ys, rx, ry, draw_point);
}

// fill ellipse to RAM
void FillEllipse(int x, int y, int rx, int ry)
{
	if ((y	+ ry) < cy_work || (x + rx) < cx_work || (y - ry) >= cy_maxwork || (x	- rx) >=	cx_maxwork || rx<1 || ry<1)
		return;
	do_fill_ellipse(x,y,rx,ry,Hlinew);
}

// draw_ellipse to RAM
void DrawEllipse(int x, int y, int rx, int ry)
{
	if ((y	+ ry) < cy_work || (x + rx) < cx_work || (y - ry) >= cy_maxwork || (x	- rx) >=	cx_maxwork || rx<1 || ry<1)
		return;
	do_ellipse(x, y, rx, ry, Pixel);
}

// ***********************************************************************
/* bez_split:
 *  Calculates a weighted average between x1 and x2.
 */
static float bez_split(float mu, float x1, float x2)
{
   return (1.0-mu) * x1	+ mu * x2;
}


/* bezval:
 *  Calculates a point on a bezier curve.
 */
static float bezval(float mu, int *coor)
{
   float work[4];
   int i; 
   int j;

   for (i=0; i<4; i++) 
	  work[i] =	(float)coor[i*2];

   for (j=0; j<3; j++)
	  for (i=0;	i<3-j; i++)
	 work[i] = bez_split(mu, work[i], work[i+1]);

   return work[0];
}

/* calc_spline:
 *  Calculates a set of pixels for the bezier spline defined by the four
 *  points specified in the points array. The required resolution
 *  is specified by the npts parameter, which controls how many output
 *  pixels will be stored in the x and y arrays.
 */
void calc_spline(int points[8],	int	npts, int *x, int *y)
{
   int i;
   float denom;

   for (i=0; i<npts; i++) {
	  denom	= (float)i/((float)npts-1.0);
	  x[i] = (int) bezval(denom, points);
	  y[i] = (int) bezval(denom, points+1);
   }
}


/* spline:
 *  Draws a bezier spline onto the specified bitmap in the specified color.
 */
void draw_spline(int points[8])
{
   #define NPTS	  64

   int xpts[NPTS], ypts[NPTS];
   int i;

   calc_spline(points, NPTS, xpts, ypts);

   for (i=1; i<NPTS; i++) {
	  draw_line(xpts[i-1], ypts[i-1], xpts[i], ypts[i]);

	if (ppop == _GXOR)
		draw_point(xpts[i],	ypts[i]);
   }
}

// for compatibility
int stricmp(const char	*s1, const char	*s2)
{
  while	(tolower(*s1) == tolower(*s2))
  {
	if (*s1	== 0)
	  return 0;
	s1++;
	s2++;
  }
  return (int)tolower(*s1) - (int)tolower(*s2);
}

int strnicmp(const	char *s1, const	char *s2, size_t n)
{

  if (n	== 0)
	return 0;
  do {
	if (tolower(*s1) !=	tolower(*s2++))
	  return (int)tolower(*s1) - (int)tolower(*--s2);
	if (*s1++ == 0)
	  break;
  }	while (--n != 0);
  return 0;
}

char * strupr(register char	*s)
{
	register int c;
	char *s2=s;
	while((c=*s)!=0)
	{
	if (islower(c))	*s = toupper(c);
	s++;
	}
	return s2;
}

char * strlwr(char *_s)
{
  char *rv = _s;
  while (*_s)
  {
    *_s = tolower(*_s);
    _s++;
  }
  return rv;
}

#if (defined(__GNUC__) && (defined(__QNX__)) && !defined(NO_MMX))

// no MMX for QNX
int test_mmx(void)
{
	return 0;
}

void set_mmx(void)
{
}

void reset_mmx(void)
{
}
#endif

//
// NOTE! this runs with pixels not bytes !!!
//
void FGmemset(FGPixel *to, FGPixel data, unsigned c)
{
#if !defined(__WATCOMC__)
	__asm__ ("cld");
#ifdef INDEX_COLORS
	int cc;
	while(int(to)&3)
 	{
		if (--c & 0x80000000) return;
		*to++=data;
	}
	cc = c>>2;
	if (cc<4) // if less than 16 bytes
		asm volatile ("rep\n\t stosb":
		: "c" (c) , "D" (to), "a" (data) );
	else
	{
		asm volatile ("mov %%al,%%ah\n pushw %%ax\n pushw %%ax\n popl %%eax\n rep\n\t stosl" : : "c" (cc) , "D" (to), "a" (data));
		if (c &= 3) asm volatile ("rep\n\t stosb" : : "c" (c), "a" (data));
	}
#endif
#ifdef DIRECT_COLORS
	asm volatile ("rep\n\t stosw" : : "c" (c) , "D" (to), "a" (data));
#endif
#ifdef TRUE_COLORS
	asm volatile ("rep\n\t stosl" : : "c" (c) , "D" (to), "a" (data));
#endif
#else
	while (c--) *to++ = data;
#endif
}

// *******************************************************************

// set foreground color
void set_fcolor(FGPixel c)
{
	_CurrColor = c;
}

// set background color
void set_bcolor(FGPixel c)
{
	_CurrBkColor = c;
}

// get foreground color
FGPixel	get_fcolor(void)
{
	return _CurrColor;
}

// get background color
FGPixel	get_bcolor(void)
{
	return _CurrBkColor;
}

// set back and foreground color
void set_colors(FGPixel f,	FGPixel b)
{
	set_fcolor(f);
	set_bcolor(b);
}

#ifdef INDEX_COLORS

static int	 colorsFree=256, startFree;
static int	 range;

// set CLUT palette register
void __palette(unsigned col, unsigned rgb)
{
	outp((unsigned short)0x3c8,	(unsigned char)col);
	outp(0x3c9,	rgb	>> 16);
	outp(0x3c9,	rgb	>> 8);
	outp(0x3c9,	rgb);
}

// set color in palette
void _palette(unsigned col,	unsigned rgb)
{
	col &= 255;
	__palette(col, rgb);
	_fgl_palette[col]=rgb&0x3F3F3F;
}

// absolete
void set_palet(int cnt,	int	ind, int *pal)
{
	unsigned int color,	x =	0;

	for	(; cnt;	cnt--)
	{
		color =	(unsigned int) pal[x++];
		_palette(ind++,	color);
	}
}

// get color from palette
unsigned int get_palette(unsigned int i)
{
	return _fgl_palette[i]&0x3F3F3F;
}

// refresh current palette
void _set_fgl_palette(void)
{
	int i;
	for (i=0; i<256; i++) _palette(i,_fgl_palette[i]);
}

// set startup palette
void _set_default_palette(void)
{
	memmove(_fgl_palette, _default_palette, sizeof(_fgl_palette));
	_set_fgl_palette();
}

void GetPaletteEntry(char *rc, char	*gc, char *bc, int i)
{
	if (i<0 || i>255) return;
 	*rc = ColorsArray[i].r;
	*gc = ColorsArray[i].g;
	*bc = ColorsArray[i].b;
}

void DeleteColor(int idx)
{
	if (idx<0 || idx>255) return;
	if (ColorsArray[idx].alfa==1) // is free
	{
	 	ColorsArray[idx].r = (char)idx;	// create an gray color
		ColorsArray[idx].g = (char)idx;
		ColorsArray[idx].b = (char)idx;
		ColorsArray[idx].alfa=0;
		colorsFree++;
	}
	else if	(ColorsArray[idx].alfa>1) ColorsArray[idx].alfa--;
}

void SetColorFuzzy(int a)
{
	range =	a&15;
}

int	 GetFreeColors(void)
{
	return colorsFree;
}

void CreatePaletteEntry(int	rc,	int	gc,	int	bc,	int	idx)
{
	if (idx<0 || idx>255) return;
 	ColorsArray[idx].r = (char)rc;
	ColorsArray[idx].g = (char)gc;
	ColorsArray[idx].b = (char)bc;
	ColorsArray[idx].alfa++; // incr used counter
	_palette(idx,bc+gc*256+rc*65536);
}

//  WARNING colors are in range 0..63 !!!
FGPixel CreateColor2(int r, int g, int b)
{
	// first we look at current colors for identical (+-3%) color
	int i, r0, g0, b0, first = 0;

	r &= 0x3F;
	g &= 0x3F;
	b &= 0x3F;
	for (i = 0; i < 256; i++)
	{
		r0 = ColorsArray[i].r;
		g0 = ColorsArray[i].g;
		b0 = ColorsArray[i].b;
		if (ColorsArray[i].alfa)	// is existing colors ?
		{
			if ((r0 - range <= r && r0 + range >= r)
				&& (g0 - range <= g && g0 + range >= g)
				&& (b0 - range <= b && b0 + range >= b))
			{
				// WOW !!! we founds loved color ...
				first = i;
				if (r0 == r && g0 == g && b0 == b)	// must be equivalent
				{
					ColorsArray[i].alfa++;
					return i;	// return palette index
				}
			}
		}
	}

	if (first)
	{
		ColorsArray[first].alfa++;
		return first;			// return palette index
	}

	// we must create new palette entry for your color

	if (colorsFree == 0)
		return (FGPixel)-1;				// ERROR I haven't any palette entry for one

	colorsFree--;
	for (i = startFree; i < 256; i++)
	{
		if (ColorsArray[i].alfa == 0)	// is free color ?
		{
			CreatePaletteEntry(r, g, b, i);
			return (FGPixel)i;
		}
	}
	return (FGPixel)-2;					// ERROR - data mismatch !!!
}

//  WARNING colors are in range 0..63 !!!

FGPixel CreateColor(int r, int g, int b, int ind)
{
	if (ind<0 || ind>255)
	{
		return CreateColor2(r,g,b);
	}
	r &= 0x3F;
	g &= 0x3F;
	b &= 0x3F;
	CreatePaletteEntry(r, g, b, ind);
	if (ColorsArray[ind].alfa>1) return (FGPixel)ind;	// return palette index

	if (colorsFree == 0)
		return (FGPixel)-1;				// ERROR I haven't any palette entry for one

	colorsFree--;
	return (FGPixel)ind;					// ERROR - data mismatch !!!
}

FGPixel CreateColor8(int r, int g, int b)
{
	return CreateColor2(r>>2,g>>2,b>>2);
}
//
// Warning: do not reorder colors !!! (due to base.h colors definition)
//
void ReservedColors(void)
{
	CreateColor(0, 0, 0, 0); 		// cblack
	CreateColor(17, 17, 17, 1);	// CDARK);
	CreateColor(22, 22, 22, 2);	// CGRAYED);
	CreateColor(31, 32, 29, 3);	// CGRAY1);
	CreateColor(26, 30, 33, 4);	// CGRAY2);
	CreateColor(42, 46, 43, 5);	// CGRAY3);
	CreateColor(0, 0, 32, 6);		// CBLUE);
	CreateColor(0, 0, 62, 7);		// CBLUELIGHT);
	CreateColor(0, 30, 0, 8);		// CGREEN);
	CreateColor(0, 62, 0, 9);		// CGREENLIGHT);
	CreateColor(53, 17,18,10);		// CRED);
	CreateColor(62, 0, 0, 11);		// CREDLIGHT);
	CreateColor(26, 16, 10,12);	// CBROWN);
	CreateColor(63, 56, 0, 13);	// CYELLOW);
	CreateColor(54, 52, 46, 14);	// CWHITED);
	CreateColor(63, 63, 63, 15);	// CWHITE);
	CreatePaletteEntry(63, 63, 63, 255); // for CWHITE - mouse rect when dragging ...
}
#else
int	 GetFreeColors(void)
{
	return 0xFFFFFF;
}

void SetColorFuzzy(int a)
{
}

FGPixel CreateColor(int r, int g, int b, int)
{
	return FGPixel(DirectColor((r<<2)+(g<<10)+(b<<18)));
}

FGPixel CreateColor8(int r, int g, int b)
{
	return FGPixel(DirectColor(r+(g<<8)+(b<<16)));
}
#endif

#if 0
#define swap(a,b)           {a^=b; b^=a; a^=b;}
#define	MIN(a,b)	( (a) < (b) ? (a) : (b) )
#define MAX(a,b)	( (a) > (b) ? (a) : (b) )


static void line(void (*draw)(int,int),int a1, int b1, int a2, int b2)
{
	int xend, yend, dx, dy, c1, c2, step;
	register int p, x, y;

	dx = abs(a2-a1);
	dy = abs(b2-b1);

	if (dx>10000) return; // bulgar const
	if (dy>10000) return;

	if (dx > dy) { /* slope < 1 => step in x direction */
		x = MIN(a1,a2);
		xend = MAX(a1,a2);
		if (x == a1) {
			y = b1;
			step = ((b2-y)<0 ? -1 : 1);
		} else {
			y = b2;
			step = ((b1-y)<0 ? -1 : 1);
		}

		p = 2*dy - dx;
		c1 = 2*dy;
		c2 = 2*(dy-dx);
		
		draw(x, y);
		while (x < xend) {
			x++;
			if (p < 0) {
				p += c1;
			} else {
				y += step;
				p += c2;
			}
			draw(x, y);
		}
	} else { /* slope > 1 => step in y direction */
		y = MIN(b1,b2);
		yend = MAX(b1,b2);
		if (y == b1) {
			x = a1;
			step = ((a2-x)<0 ? -1 : 1);
		} else {
			x = a2;
			step = ((a1-x)<0 ? -1 : 1);
		}

		p = 2*dx - dy;
		c1 = 2*dx;
		c2 = 2*(dx-dy);
		
		draw(x, y);
		while (y < yend) {
			y++;
			if (p < 0) {
				p += c1;
			} else {
				x += step;
				p += c2;
			}
			draw(x, y);
		}
	}
}

static int xold, yold;

void draw_line(int x1, int y1, int x2, int y2)
{
	line(draw_point, x1,y1,xold=x2,yold=y2);
}

void drawto_line(int x2, int y2)
{
	line(draw_point, xold,yold,x2,y2);
	xold=x2;
	yold=y2;
}

void Line(int x1, int y1, int x2, int y2)
{
	line(Pixel, x1,y1,x2,y2);
}
#endif

#ifdef __WATCOMC__
#define	random rand
#define	srandom srand
#endif

extern int rnd(int);
#define rnd(a) (random()%(a+1))  // macro for randomize fnc

static int cas()
{
	static int a = 0, b;
	b = a;
	return ((a = clock()) - b);
}

static void rondo()
{
	volatile int i = X_width / 8 / 6, j = Y_width / 16;
	char *s1 = "FastGL ", *s2 = " FastGL";

	for (; j >= 0; j--)
		for (int a = i; a >= 0; a--)
			text_out(a * 8 * 6, j * 16, j & 1 ? s1 : s2);
}

static int _bench(void)
{
	int tt;
	int pp[][2] =
	{{10, 10}, {100, 60}, {50, 500}, {100, 400}, {500, 490}, {800, 100}, {10, 10}};
	int i, f;
	FGPixel *p;

	p = (FGPixel *)malloc(areasize(X_width, Y_width));
	f = clock();
	tt = ppop;
	for (i = 0; i < 10; i++)
	{
		clear_frame_buffer(i);
	}
	set_ppop(tt);
	for (i = 0; i < 10; i++)
	{
		set_fcolor(i);
		fill_convex(7, pp);
	}
	tt = clock();
	for (i = 0; i < 700; i++)
	{
		draw_line(0, 0, X_width, i);
		draw_line(0, i, X_width, 0);
	}
	for (i = 0; i < 1600; i++)
	{
		set_fcolor(i);
		fill_box(rnd(X_width), rnd(Y_width),100, 65);
	}
	get_block(0, 0, X_width, Y_width, p);
	for (i = 0; i < 600; i += 5)
	{
		put_block(i, i, X_width, Y_width, p);
	}
	put_block(0, 0, X_width, Y_width, p);
	for (i = 0; i < 600; i += 5)
	{
		put_block(i, 600 - i, X_width, Y_width, p);
	}
	put_block(0, 0, X_width, Y_width, p);
	for (i = 20; i < 130; i++)
	{
		set_fcolor(i);
		draw_circle(X_width / 2, Y_width / 2 - 20, i);
		fill_circle(X_width / 2, Y_width / 2 + 20, i);
	}
	for (i = 45; i < 60; i++)
	{
		set_fcolor(i);
		set_bcolor(i + 16);
		rondo();
	}
	free(p);
	return clock() - f;
}

void bench(void)
{
	int a,b;
	cas();
	set_ppop(_GSET);
	a = _bench();
	set_ppop(_GXOR);
	b = _bench();
	set_ppop(_GSET);
	set_fcolor(CBLACK);
	fill_box(100,90,290,140);
	set_colors(CWHITE, CBLACK);

	gprintf(120,110,"%dx%dx%d : %s", X_width,Y_width,FASTGL_BPP,linear?"linear":"banked");
	gprintf(120,140,"Graph mark - %d (redraw mode)", a);
	gprintf(120,170,"Graph mark - %d (xor mode)", b);
	printf("%dx%dx%d : %s\n", X_width,Y_width,FASTGL_BPP,linear?"linear":"banked");
	printf("Graph mark - %d (redraw mode)\n", a);
	printf("Graph mark - %d (xor mode)\n", b);
#ifndef __QNX__
	printf("MMX Extension : %s\n", mmx ? "Yes" : "No");
	gprintf(120, 200,"MMX Extension : %s", mmx ? "Yes" : "No");
#endif
}

/* do_arc:
 *  Helper function for the arc function. Calculates the points in an arc
 *  of radius r around point x, y, going anticlockwise from fixed point
 *  binary angle ang1 to ang2, and calls the specified routine for each one. 
 *  The output proc will be passed first a copy of the bmp parameter, then 
 *  the x, y point, then a copy of the d parameter (so putpixel() can be 
 *  used as the callback).
 */
static void do_arc(int x, int y, double ang1, double ang2, int r, void (*proc)(int, int))
{
   int px, py;
   int ex, ey;
   int px1, px2, px3;
   int py1, py2, py3;
   int d1, d2, d3;
   int ax, ay;
   int q, qe;
   double tg_cur, tg_end;
   int done = 0;
   double rr1, rr2, rr3;
   int rr = (r*r);

   rr1 = r;
   rr2 = x;
   rr3 = y;

   /* evaluate the start point and the end point */
   px = (int)(rr2 + rr1 * cos(ang1));
   py = (int)(rr3 - rr1 * sin(ang1));
   ex = (int)(rr2 + rr1 * cos(ang2));
   ey = (int)(rr3 - rr1 * sin(ang2));

   /* start quadrant */
   if (px >= x) {
      if (py <= y)
	 q = 1;                           /* quadrant 1 */
      else
	 q = 4;                           /* quadrant 4 */
   }
   else {
      if (py < y)
	 q = 2;                           /* quadrant 2 */
      else
	 q = 3;                           /* quadrant 3 */
   }

   /* end quadrant */
   if (ex >= x) {
      if (ey <= y)
	 qe = 1;                          /* quadrant 1 */
      else
	 qe = 4;                          /* quadrant 4 */
   }
   else {
      if (ey < y)
	 qe = 2;                          /* quadrant 2 */
      else
	 qe = 3;                          /* quadrant 3 */
   }

   #define loc_tg(_y, _x)  (_x-x) ? (double)(_y-y)/(_x-x) : (double)(_y-y)

   tg_end = loc_tg(ey, ex);

   while (!done) {
      proc(px, py);

      /* from here, we have only 3 possible direction of movement, eg.
       * for the first quadrant:
       *
       *    OOOOOOOOO
       *    OOOOOOOOO
       *    OOOOOO21O
       *    OOOOOO3*O
       */

      /* evaluate the 3 possible points */
      switch (q) {

	 case 1:
	    px1 = px;
	    py1 = py-1;
	    px2 = px-1;
	    py2 = py-1;
	    px3 = px-1;
	    py3 = py;

	    /* 2nd quadrant check */
	    if (px != x) {
	       break;
	    }
	    else {
	       /* we were in the end quadrant, changing is illegal. Exit. */
	       if (qe == q)
		  done = 1;
	       q++;
	    }
	    /* fall through */

	 case 2:
	    px1 = px-1;
	    py1 = py;
	    px2 = px-1;
	    py2 = py+1;
	    px3 = px;
	    py3 = py+1;

	    /* 3rd quadrant check */
	    if (py != y) {
	       break;
	    }
	    else {
	       /* we were in the end quadrant, changing is illegal. Exit. */
	       if (qe == q)
		  done = 1;
	       q++;
	    }
	    /* fall through */

	 case 3:
	    px1 = px;
	    py1 = py+1;
	    px2 = px+1;
	    py2 = py+1;
	    px3 = px+1;
	    py3 = py;

	    /* 4th quadrant check */
	    if (px != x) {
	       break;
	    }
	    else {
	       /* we were in the end quadrant, changing is illegal. Exit. */
	       if (qe == q)
		  done = 1;
	       q++;
	    }
	    /* fall through */

	 case 4:
	    px1 = px+1;
	    py1 = py;
	    px2 = px+1;
	    py2 = py-1;
	    px3 = px;
	    py3 = py-1;

	    /* 1st quadrant check */
	    if (py == y) {
	       /* we were in the end quadrant, changing is illegal. Exit. */
	       if (qe == q)
		  done = 1;

	       q = 1;
	       px1 = px;
	       py1 = py-1;
	       px2 = px-1;
	       py2 = py-1;
	       px3 = px-1;
	       py3 = py;
	    }
	    break;

	 default:
	    return;
      }

      /* now, we must decide which of the 3 points is the right point.
       * We evaluate the distance from center and, then, choose the
       * nearest point.
       */
      ax = x-px1;
      ay = y-py1;
      rr1 = ax*ax + ay*ay;

      ax = x-px2;
      ay = y-py2;
      rr2 = ax*ax + ay*ay;

      ax = x-px3;
      ay = y-py3;
      rr3 = ax*ax + ay*ay;

      /* difference from the main radius */
      if (rr1 > rr)
	 d1 = (int)(rr1-rr);
      else
	 d1 = (int)(rr-rr1);
      if (rr2 > rr)
	 d2 = (int)(rr2-rr);
      else
	 d2 = (int)(rr-rr2);
      if (rr3 > rr)
	 d3 = (int)(rr3-rr);
      else
	 d3 = (int)(rr-rr3);

      /* what is the minimum? */
      if (d1 <= d2) {
	 px = px1;
	 py = py1;
      }
      else if (d2 <= d3) {
	 px = px2;
	 py = py2;
      }
      else {
	 px = px3;
	 py = py3;
      }

      /* are we in the final quadrant? */
      if (qe == q) {
	 tg_cur = loc_tg(py, px);

	 /* is the arc finished? */
	 switch (q) {

	    case 1:
	       /* end quadrant = 1? */
	       if (tg_cur <= tg_end)
		  done = 1;
	       break;

	    case 2:
	       /* end quadrant = 2? */
	       if (tg_cur <= tg_end)
		  done = 1;
	       break;

	    case 3:
	       /* end quadrant = 3? */
	       if (tg_cur <= tg_end)
		  done = 1;
	       break;

	    case 4:
	       /* end quadrant = 4? */
	       if (tg_cur <= tg_end)
		  done = 1;
	       break;
	 }
      }
   }

   /* draw the last evaluated point */
   proc(px, py);
}

/* arc:
 *  Draws an arc.
 */
void draw_arc(int x, int y, double ang1, double ang2, int r)
{
   do_arc(x, y, ang1, ang2, r, draw_point);
}

/* arc:
 *  Draws an arc.
 */
void DrawArc(int x, int y, double ang1, double ang2, int r)
{
   do_arc(x, y, ang1, ang2, r, Pixel);
}


