#include <stdio.h>
#include <StopWatch.h>

#include "efbitmap.h"

#define Y2DMIN(x,y) ((x<y?x:y))
#define Y2DMAX(x,y) ((x>y?x:y))

/*
	Function: Y2DCopyPixelHLine32
	
	Copy a single line of pixels from the source to the destination.
	Honor the transparency if specified.
*/

void
Y2DCopyPixelHLine32(pixel_buffer *dst, pixel_buffer *src, 
	const long line, const int transferMode)
{
	long srcheight = src->height+1;
	long srcwidth= src->width+1;
	long dstheight = dst->height+1;
	long dstwidth= dst->width+1;
	long height = Y2DMIN(srcheight, dstheight);
	long width = Y2DMIN(srcwidth, dstwidth);

	unsigned long dstoffset = line * dst->bytes_per_row/4;
	unsigned long srcoffset = line * src->bytes_per_row/4;
	float * dstbits = (float *)dst->pixels;
	const float * srcbits = (const float *)src->pixels;
	
			
#ifdef DEBUG_TIMING
	printf("Y2DCopyPixelHLine32 - Line: %d  Width: %d  Height: %d\n",
		line, width, height);
	BStopWatch watch("Copy Pixel Line");
#endif


	if (TM_OPAQUE == transferMode)	// Do a straight copy
	{
		for (int x=0; x < width; x++)
		{
			dstbits[dstoffset+x] = srcbits[srcoffset+x];
		}
	}
	 else
	{
		unsigned char *checkbits = (unsigned char *)src->pixels;
		for (int x=0; x < width; x++)
		{
			// If the pixel is opaque, then copy it, otherwise
			// don't.  This allows for transparency.
			long offset = (line*src->bytes_per_row)+(x*4)+3;
			if (checkbits[offset])
			{
					dstbits[dstoffset+x] = srcbits[srcoffset+x];
			}
		}
	}
}



/*
	Function:  Y2DCopyRect32
	
	Copies a rectangular area from the source to a specified
	offset in the destination.  The transfer mode can be used
	as well to specify whether a copy is performed or an
	sover operation instead.
*/

void
Y2DCopyRect32(pixel_buffer *dst, 
	const pixel_buffer *src, 
	const ushort srcleft, const ushort srctop,
	const ushort srcright, const ushort srcbottom,
	const ushort x1, const ushort y1,
	const int transferMode)
{
	register long dstoffsetincr = dst->bytes_per_row/4;
	register long srcoffsetincr = src->bytes_per_row/4;

	register unsigned long dstoffset = y1*(dstoffsetincr)+x1;
	register unsigned long srcoffset = srctop*(srcoffsetincr)+srcleft;
	register float *dstbits = (float *)dst->pixels;
	register float *srcbits = (float *)src->pixels;
	register ulong height = srcbottom - srctop + 1;
	register ulong width = srcright - srcleft + 1;
	
	
	for (int y = srctop; y <= srcbottom; y++)
	{
		if (TM_OPAQUE == transferMode)
		{
			for (int x=0; x < width; x++)
			{
				dstbits[dstoffset+x] = srcbits[srcoffset+x];
			}
		} else
		{
			unsigned char *checkbits = (unsigned char *)src->pixels;
			for (int x=0; x < width; x++)
			{
				// If the pixel is opaque, then copy it, otherwise
				// don't.  This allows for transparency.
				long offset = (y*src->bytes_per_row)+((srcleft*4)+(x*4))+3;
				if (checkbits[offset])
				{
					dstbits[dstoffset+x] = srcbits[srcoffset+x];
				}
			}
		}

		dstoffset += dstoffsetincr;
		srcoffset += srcoffsetincr;
	}
}



/*
	Function: Y2DCopyPixels32
	
	Copy the entirety of pixels from a source to a destination.
	A transfer mode of 0 will be opaque transfers, whereas
	any other value will cause transparency to be honored.
*/

void
Y2DCopyPixels32(pixel_buffer *dst, const pixel_buffer *src, const int transferMode)
{
	Y2DCopyRect32(dst, src, 0, 0, src->width, src->height,
		0, 0,transferMode);
}



/*
	Function: Y2DBlendPixels32
	
	This function will take two images, and using scaling
	factors for each, blend them and display them into a
	third.  Used in steps, this would be the equivalent of
	a cross fade from one image to another.
	
	Methodology:
	It only displays every other line, starting at startRow.
	Thus, for any sequence of blends, this routine should be
	called at least twice. 
*/

void
Y2DBlendPixels32(pixel_buffer *fScreen, 
			pixel_buffer *fDestination, const float destBlend, 
			pixel_buffer *fSource, 
			const ushort dstleft, const ushort dsttop,
			const ushort dstright, const ushort dstbottom,
			const ushort x1, const ushort y1,
			const long startRow, const long rowStep,
			const int transferMode)
{
	// We're most interested in the height and
	// width of the destination.
	long dstheight = dstbottom - dsttop+1;
	long dstwidth= dstright - dstleft+1;
	
	float sourceBlend = 1.0 - destBlend;

	// Create a couple of Look Up Tables so that the multiplications
	// only have to happen once.
	register unsigned char dstBlendLUT[256];
	register unsigned char srcBlendLUT[256];
		
	// Multiply out lookup tables
	for (int ctr = 0; ctr < 256; ctr++)
	{
		dstBlendLUT[ctr] = ctr* destBlend;
		srcBlendLUT[ctr] = ctr* sourceBlend;
	}
	
	// Setup offsets for each of screen, destination, and source 
	long screenoffset = (y1+startRow)*(fScreen->bytes_per_row/4)+x1;
	long dstoffset = (startRow+dsttop)*fDestination->bytes_per_row+(dstleft*4);
	long srcoffset = (startRow+y1)*fSource->bytes_per_row+(x1*4);
	
	// Setup pixel pointers for each of screen, destination, and source
	register float * screenbits = (float *)fScreen->pixels;
	register unsigned char * dstbits = (unsigned char *)fDestination->pixels;
	register unsigned char * srcbits = (unsigned char *)fSource->pixels;

	//printf("Y2DBlendPixels32 - Width: %d  Height: %d\n",dstwidth, dstheight);
	
	// Use a temporary buffer to store calculated values
	// and use a floatPtr for fast transfers.
	register unsigned char buff[4];
	register float *floatPtr = (float *)&buff;
	
	for (int y = 0; y < dstheight; y += rowStep)
	{
		if (TM_OPAQUE == transferMode)
		{
			for (int x=0; x < dstwidth; x++)
			{
				register long x4 = x*4;
				buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] + dstBlendLUT[dstbits[dstoffset+(x4)]];
				buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] + dstBlendLUT[dstbits[dstoffset+(x4)+1]];
				buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] + dstBlendLUT[dstbits[dstoffset+(x4)+2]];
				buff[3] = 255;

				screenbits[screenoffset+x] = *floatPtr;
			}
		}
		 else
		{
			for (int x=0; x < dstwidth; x++)
			{
				register long x4 = x*4;
				if (dstbits[dstoffset+(x4)+3])
				{
					buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] + dstBlendLUT[dstbits[dstoffset+(x4)]];
					buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] + dstBlendLUT[dstbits[dstoffset+(x4)+1]];
					buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] + dstBlendLUT[dstbits[dstoffset+(x4)+2]];
					buff[3] = 255;

					screenbits[screenoffset+x] = *floatPtr;
				}
			}		
		}
		
		// Increment each of the offsets the appropriate amounts
		screenoffset += (fScreen->bytes_per_row/4)*rowStep;
		dstoffset += fDestination->bytes_per_row*rowStep;
		srcoffset += fSource->bytes_per_row*rowStep;
	}
}


/*
**	Function: Y2DTransferPixelsWhite32
**
**	Transfer pixels doing a source copy, with a factor.
**	The factor doesn't represent a blend of pixels, rather
**	it is the brightness of the source pixels.
*/

void
Y2DTransferPixelsWhite32(pixel_buffer *fScreen, 
			pixel_buffer *fSource, const float sourceBlend,
			const ulong srcleft, const ulong srctop,
			const ulong srcright, const ulong srcbottom,
			const ulong x1, const ulong y1,
			const int transferMode)
{
	long rowStep = 1;
	long startRow = 0;
	
	// We're most interested in the height and
	// width of the destination.
	long srcheight = srcbottom - srctop+1;
	long srcwidth= srcright - srcleft+1;
	
	// Create a Look Up Table so that the multiplications
	// only have to happen once.
	register unsigned char srcBlendLUT[256];
		
	// Multiply out lookup tables
	for (int ctr = 0; ctr < 256; ctr++)
	{
		srcBlendLUT[ctr] = (unsigned char)255 - (unsigned char)(sourceBlend*(float)255)+(sourceBlend*ctr);
	}
	
	// Setup offsets for each of screen, destination, and source 
	long screenoffset = (y1+startRow)*(fScreen->bytes_per_row/4)+x1;
	long srcoffset = (srctop)*fSource->bytes_per_row+(srcleft*4);
	
	// Setup pixel pointers for each of screen, destination, and source
	register float * screenbits = (float *)fScreen->pixels;
	register unsigned char * srcbits = (unsigned char *)fSource->pixels;

	//printf("Y2DTransferPixels32 - Width: %d  Height: %d\n",dstwidth, dstheight);
	
	// Use a temporary buffer to store calculated values
	// and use a floatPtr for fast transfers.
	register unsigned char buff[4];
	register float *floatPtr = (float *)&buff;
	
	for (int y = 0; y < srcheight; y += rowStep)
	{
		if (TM_OPAQUE == transferMode)
		{
			for (int x=0; x < srcwidth; x++)
			{
				register long x4 = x*4;
				buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] ;
				buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] ;
				buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] ;
				buff[3] = 255;

				screenbits[screenoffset+x] = *floatPtr;
			}
		}
		 else
		{
			for (int x=0; x < srcwidth; x++)
			{
				register long x4 = x*4;
				if (srcbits[srcoffset+(x4)+3])
				{
					buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] ;
					buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] ;
					buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] ;
					buff[3] = 255;

					screenbits[screenoffset+x] = *floatPtr;
				}
			}		
		}
		
		// Increment each of the offsets the appropriate amounts
		screenoffset += (fScreen->bytes_per_row/4)*rowStep;
		srcoffset += fSource->bytes_per_row*rowStep;
	}
}
/*
**	Function: Y2DTransferPixels32
**
**	Transfer pixels doing a source copy, with a factor.
**	The factor doesn't represent a blend of pixels, rather
**	it is the brightness of the source pixels.
*/

void
Y2DTransferPixels32(pixel_buffer *fScreen, 
			pixel_buffer *fSource, const float sourceBlend,
			const ulong srcleft, const ulong srctop,
			const ulong srcright, const ulong srcbottom,
			const ulong x1, const ulong y1,
			const int transferMode)
{
	long rowStep = 1;
	long startRow = 0;
	
	// We're most interested in the height and
	// width of the destination.
	long srcheight = srcbottom - srctop+1;
	long srcwidth= srcright - srcleft+1;
	
	// Create a Look Up Table so that the multiplications
	// only have to happen once.
	register unsigned char srcBlendLUT[256];
		
	// Multiply out lookup tables
	for (int ctr = 0; ctr < 256; ctr++)
	{
		srcBlendLUT[ctr] = (unsigned char)((float)ctr* sourceBlend + .5);
	}
	
	// Setup offsets for each of screen, destination, and source 
	long screenoffset = (y1+startRow)*(fScreen->bytes_per_row/4)+x1;
	long srcoffset = (srctop)*fSource->bytes_per_row+(srcleft*4);
	
	// Setup pixel pointers for each of screen, destination, and source
	register float * screenbits = (float *)fScreen->pixels;
	register unsigned char * srcbits = (unsigned char *)fSource->pixels;

	//printf("Y2DTransferPixels32 - Width: %d  Height: %d\n",dstwidth, dstheight);
	
	// Use a temporary buffer to store calculated values
	// and use a floatPtr for fast transfers.
	register unsigned char buff[4];
	register float *floatPtr = (float *)&buff;
	
	for (int y = 0; y < srcheight; y += rowStep)
	{
		if (TM_OPAQUE == transferMode)
		{
			for (int x=0; x < srcwidth; x++)
			{
				register long x4 = x*4;
				buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] ;
				buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] ;
				buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] ;
				buff[3] = 255;

				screenbits[screenoffset+x] = *floatPtr;
			}
		}
		 else
		{
			for (int x=0; x < srcwidth; x++)
			{
				register long x4 = x*4;
				if (srcbits[srcoffset+(x4)+3])
				{
					buff[0] = srcBlendLUT[srcbits[srcoffset+(x4)]] ;
					buff[1] = srcBlendLUT[srcbits[srcoffset+(x4)+1]] ;
					buff[2] = srcBlendLUT[srcbits[srcoffset+(x4)+2]] ;
					buff[3] = 255;

					screenbits[screenoffset+x] = *floatPtr;
				}
			}		
		}
		
		// Increment each of the offsets the appropriate amounts
		screenoffset += (fScreen->bytes_per_row/4)*rowStep;
		srcoffset += fSource->bytes_per_row*rowStep;
	}
}




/*
	Function: Y2DClear32
	
	Clears an area by filling it with a specified color.
	This is simply a rectangle fill by any other name.
	
	Its fast as can be because 4 bytes are transferred at
	a time through the use of the floatPtr.  Also, using 
	a float instead of a long ensures that we maximize the 
	usage of both the integer unit and the float unit.
*/

void 
Y2DRectFill32(const ulong dstleft, const ulong dsttop,
			const ulong dstright, const ulong dstbottom,
			const rgb_color *aColor, pixel_buffer *screenInfo)
{
	register	long dstheight = dstbottom - dsttop+1;
	register	long dstwidth= dstright - dstleft+1;

	register float *dstbits = (float *)screenInfo->pixels;
	register long dstoffset = dsttop * (screenInfo->bytes_per_row/4) + dstleft;
	register long rowStride = screenInfo->bytes_per_row / 4;
	register unsigned char buff[4];
	
	buff[0] = aColor->blue;
	buff[1] = aColor->green;
	buff[2] = aColor->red;
	buff[3] = aColor->alpha;
	register float *floatPtr = (float *)buff;
	
	register int y;
	register int x;
	for (y=0; y<dstheight; y++)
	{
		for (x=0; x < dstwidth; x++)
		{
			dstbits[dstoffset+x] = *floatPtr;
		}
		dstoffset += rowStride;
	}
}


void 
Y2DClear32(pixel_buffer *scrInfo, rgb_color *aColor)
{
	Y2DRectFill32(0, 0,scrInfo->width-1, scrInfo->height-1,aColor, scrInfo);
}


