// Copyright (C) 1996 Keith Whitwell.
// This file may only be copied under the terms of the GNU Library General
// Public License - see the file COPYING in the lib3d distribution.

#include <Lib3d/Vector.H>
#include "View8.H"

// Uncomment the below if you want to use the assembly inner loops.
// This will also boost the speed rating of this viewport, making it
// the default, instead of DitherViewport.
//
// Note that because the assembly uses %ebp without saving it, you must
// have compiled with -fomit-frame-pointer or equivalent.
//
// If you look at the final assembly generated, you will see that there is
// a fair amount of register shuffling at the boundaries of the inline
// region which negates a good deal of the gains of using assembly, especially
// for small triangles.  To avoid this, the region implemented in assembly
// should be expanded to include the vertical loops as well.

#if 0 && defined(__i386) && defined(NO_DEBUG)
#define USE_ASM
#endif

#ifdef USE_ASM
#define SPEED 82
#else
#define SPEED 78
#endif


class Viewport8Ci : public DitherViewport
{
public:
    void flatPolygonZb(uint nr,  PipelineData * const[], Colour colour );
    void flatTriangleZb( PipelineData * const[], Colour colour );
    const char *getName() const { return "Viewport8Ci"; }

protected:
    Viewport *clone( Device *device );
    ~Viewport8Ci();
    Viewport8Ci( Exemplar e ) : DitherViewport( e, SPEED ) {}
    Viewport8Ci( Device * );

    Colour getColour( uint r, uint g, uint b );
    void initializeColours();

protected:
    bool revertToDither;
    static Viewport8Ci *advertisment;
};

Viewport8Ci *Viewport8Ci::advertisment = new Viewport8Ci( Exemplar() ); 

Viewport8Ci::Viewport8Ci( Device *device )
    : DitherViewport( device ),
      revertToDither( false )
{
}

Viewport8Ci::~Viewport8Ci()
{
}

Viewport *
Viewport8Ci::clone( Device *device ) 
{ 
    uint depth = device->getDepth();
    if (depth == 8) { 
	debug() << "Creating new viewport for 8 bit device." << endlog;
	return new Viewport8Ci( device ); 
    } else {
	debug() << "Warning: Viewport8Ci not suitable for " << depth 
	        << " bit device." << endlog;
	return 0;
    }
}

void
Viewport8Ci::initializeColours()
{
    ditherTable = new uchar[16384];
    for(uint i = 0 ; i < 16384/sizeof(int) ; i++) {
	*((int*)ditherTable+i) = 0xff00ff00; // some impossible value.
    }
    getColour(0,0,0);		// pre-alloc the background...
}

Colour
Viewport8Ci::getColour( uint r, uint g, uint b )
{
    if (!ditherTable) initializeColours();

    Colour *colour =  ((Colour *)(ditherTable + (((r & 0xf0) << 6) | 
						 ((g & 0xf0) << 2) | 
						 ((b & 0xf0) >> 2))));

    if (*colour == 0xff00ff00) {
	uint red = (r & 0xf0) << 8;
	uint green = (g & 0xf0) << 8;
	uint blue = (b & 0xf0) << 8;

	uint tmp = device->allocateColour(red, green, blue);
	*colour = tmp | (tmp << 8) | (tmp << 16) | (tmp << 24);	// fool dither
    }

    return *colour;
}

#define EDGEMAX 1024
extern    int ex[2*EDGEMAX];
extern    uint ez[EDGEMAX];

void 
Viewport8Ci::flatPolygonZb(uint nr, 
			   PipelineData * const vertex[], 
			   Colour colour )
{
    if (0 && revertToDither) {
	DitherViewport::flatPolygonZb( nr, vertex, colour);
	return;
    }

    if (nr == 3) {
	Viewport8Ci::flatTriangleZb( vertex, colour );
	return;
    }
    uint i;

    int x1 = int(vertex[nr-1]->device.v[X]);
    int y1 = int(vertex[nr-1]->device.v[Y]);
    uint z1 = uint(vertex[nr-1]->device.v[Z]);

    int ymin = y1;
    int ymax = y1;

    // Need to find ymid in advance 
    for ( int k = nr-2 ; k >= 0; k-- ) {
	int y = int(vertex[k]->device.v[Y]);
	if (y < ymin) ymin = y; 
	if (y > ymax) ymax = y;
    }

    if (ymin == ymax) return;
    int ymid = (ymin + ymax) >> 1;
    int zmid = 0;			// z value of rhs of polygon at ymid.

    for ( i = 0 ; i < nr; i++ ) {
	
	int x0 = x1;
	int y0 = y1;
	uint z0 = z1;

	y1 = int(vertex[i]->device.v[Y]);
	x1 = int(vertex[i]->device.v[X]);
	z1 = uint(vertex[i]->device.v[Z]);

	int dy = y1 - y0;
	if (dy == 0) continue;

	int dx;
	int x;			// Start point for edge rasterization.
	int *ptr;		// Into edge rasterization buffer.

	if (dy < 0) {
	    if ( y0 == ymid ) {
		zmid = z0;
	    } else if ( y0 >= ymid && y1 <= ymid ) {
		// Includes the middle scanline of the polygon.
		zmid = int(z0) + ((ymid - y0)*(int(z1) - int(z0))) / dy;
	    }

	    dy = -dy;
	    ptr = ex + y1*2 + 1;
	    x = x1 * 256;
	    dx = (x0 * 256) - x;
	    int xSlope = dx / dy;
	    int   k = dy+1;
	    
	    do {
		*ptr = x>>8;
		ptr += 2;
		x += xSlope;
	    } while(--k);

	} else {

	    // Left edge.  We also want to interpolate the z coordinates.

	    uint *zptr = ez + y0;
	    uint  z = z0 + zBuffer->getGenerationMask();
	    int   dz = z1 - z0;
	    int   zSlope = dz / dy;
	    int   k = dy+1;
	    ptr = ex + y0*2 ;
	    x = x0 * 256;
	    dx = (x1 * 256) - x;
	    int xSlope = dx / dy;

	    do {
		*ptr = x>>8;
		ptr += 2;
		x += xSlope;
		*zptr = z;
		zptr++;
		z += zSlope;
	    } while (--k);
	}
    }

    zmid += zBuffer->getGenerationMask();
    int xspan = (ex[(ymid<<1)+1] - ex[ymid<<1]);
    if (xspan <= 0) return;
    int zSlope = (zmid - int(ez[ymid])) / xspan;

    uchar *cptr  = device->getBuffer() + ymin * device->getRowWidth();
    uint *zptr   = zBuffer->getBuffer() + ymin * zBuffer->getWidth();
    int  *exptr  = ex                  + ymin * 2;
    
    const uchar col = (colour & 0xff);
    
    for ( int y = ymin ; y < ymax ; y++ ) {
	int xmin = *exptr++;
	int i = (*exptr++ - xmin) - 1;
	uint z;

	if (i >= 0) {
	    uchar *cp = cptr + xmin;
	    uint *zp = zptr + xmin;
	    z = ez[y];
	    
	    while ( i > 0 ) {
		if (z < zp[0]) {	
		    zp[0] = uint(z);
		    cp[0] = col;
		}
		z += zSlope;
		
		if (z < zp[1]) {	
		    zp[1] = uint(z);
		    cp[1] = col;
		}
		z += zSlope;
		zp+=2;
		cp+=2;
		i -= 2;
	    }

	    if (i == 0) {
		if (z < *zp) {	
		    *zp = uint(z);
		    cp[0] = col;
		}
	    }
	} 

	cptr += device->getRowWidth();
	zptr += zBuffer->getWidth();
    }
}


void 
Viewport8Ci::flatTriangleZb(PipelineData * const vertex[], 
			  Colour colour )
{
    if (0) {
	Viewport8Ci::flatPolygonZb(3, vertex, colour);
	return;
    }
    if (0 && revertToDither) {
	DitherViewport::flatTriangleZb(vertex, colour);
	return;
    }

    int tx[3];

    // type = 0 -- elbow is on lhs.
    //      = 1 -- elbow is on rhs.
    
    // tx[0] = topmost vertex
    // tx[1] = elbow
    // tx[2] = bottommost vertex


    int y0 = vertex[0]->device.v[Y];
    int y1 = vertex[1]->device.v[Y];
    int y2 = vertex[2]->device.v[Y];
    int type;


    if (y0 > y1) {
	if (y1 > y2) {
	    type = 1;
	    tx[0] = 2;
	    tx[1] = 1;
	    tx[2] = 0;
	} else {
	    if (y0 > y2) {
		type = 0;
		tx[0] = 1;
		tx[1] = 2;
		tx[2] = 0;
	    } else {
		type = 1;
		tx[0] = 1;
		tx[1] = 0;
		tx[2] = 2;
	    }
	} 
    } else {
	if (y2 > y1) {
	    type = 0;
	    tx[0] = 0;
	    tx[1] = 1;
	    tx[2] = 2;
	} else {
	    if (y0 > y2) {
		type = 0;
		tx[0] = 2;
		tx[1] = 0;
		tx[2] = 1;
	    } else {
		type = 1;
		tx[0] = 0;
		tx[1] = 2;
		tx[2] = 1;
	    }
	} 
    }

    int  dy = vertex[tx[2]]->device.v[Y] - vertex[tx[0]]->device.v[Y];
    if (dy == 0) {
	return;
    }

    int ymin = vertex[tx[0]]->device.v[Y];
    int ymid = vertex[tx[1]]->device.v[Y];
    int ymax = vertex[tx[2]]->device.v[Y];

    int xmin = vertex[tx[0]]->device.v[X];
    int xmid = vertex[tx[1]]->device.v[X];
    int xmax = vertex[tx[2]]->device.v[X];

    int zmin = vertex[tx[0]]->device.v[Z];
    int zmid = vertex[tx[1]]->device.v[Z];
    int zmax = vertex[tx[2]]->device.v[Z];

    uchar *cptr  = device->getBuffer()  + ymin * device->getRowWidth();
    const uchar col = (colour & 0xff);

    int zi = int(zmin) + ((ymid - ymin)*(int(zmax) - int(zmin))) / dy;
    int xi = int(xmin) + ((ymid - ymin)*(int(xmax) - int(xmin))) / dy;
    int xspan = (xmid - xi);
    if (xspan == 0) return;
    int zSlope = (zmid - zi) / xspan;

    uint lz      = vertex[tx[0]]->device.v[Z] + zBuffer->getGenerationMask();
    int  dz      = vertex[tx[2]]->device.v[Z] - vertex[tx[0]]->device.v[Z];
    int  lzSlope = dz / dy;
    int  lx      = vertex[tx[0]]->device.v[X] * 256;
    int  dlx     = vertex[tx[2]]->device.v[X] - vertex[tx[0]]->device.v[X];
    int  lxSlope = (dlx * 256) / dy;
    uint *zptr   = (uint *)(zBuffer->getBuffer()) + ymin * zBuffer->getWidth();
    
    int sx, dsx, sxSlope;

    if (type == 1) {
	if (xspan <= 0) return;

	dy = ymid - ymin;
	sx      = xmin * 256;
	dsx     = xmid - xmin;
	
	for (int section = 0 ; ;) {
	    if (dy != 0) {
		sxSlope = (dsx * 256) / dy;
		
		do {
		    int xn = lx>>8;
		    int i = (sx>>8) - xn;

		    if (i > 0) {
			uchar *cp = cptr + xn;
			uint *zp = zptr + xn;
			uint z = lz;

#ifdef USE_ASM
			asm("\n\t"
			    "movl %5, %%edi\n\t"        // kludge
			    "movl (%%ecx), %%ebp\n"	// z1 = *zp;
			    ".align 4\n"
			    ".top:\n\t"		        // do {
			    "cmpl %%eax,%%ebp\n\t"	//   if (z0 < z1)
			    "jbe .cont\n\t"		//   {
			    "movl %%eax,(%%ecx)\n\t"	//      *zp = z0
			    "movb %%dl,(%%ebx)\n"	//      *cp = c  
			    "\t.align 4\n"
			    ".cont:\n\t"		//   }
			    "movl 4(%%ecx), %%ebp\n\t"	//   z1 = *zp;
			    "addl %%esi,%%eax\n\t"	//   z += zSlope
			    "addl $4,%%ecx\n\t"		//   zp++
			    "incl %%ebx\n\t"		//   cp++
			    "decl %%edi\n\t"	        //   --i
			    "jne .top\n\t"		// } while .. != 0
			    : /* no output registers */
			    : 
			    "a" (z),      // eax = z
			    "b" (cp),     // ebx = cp
			    "c" (zp),     // ecx = zp
			    "d" (col),     // dl  = colour
			    "S" (zSlope), // esi = zSlope
			    "g" (i)
			    : "%eax", "%ebx", "%ecx", "%edi", "%ebp" );
#else
			do {
			    if (z < zp[0]) {	
				zp[0] = uint(z);
				cp[0] = col;
			    }
			    z += zSlope;
			    zp++;
			    cp++;
			} while (--i);
#endif
		    }

		    cptr += device->getRowWidth();
		    zptr += zBuffer->getWidth();

		    sx += sxSlope;
		    lx += lxSlope;
		    lz += lzSlope;
		    
		} while (--dy);


	    }
	    if (section == 1) break;
	    section++;
	    dy = vertex[tx[2]]->device.v[Y] - vertex[tx[1]]->device.v[Y];
	    sx      = xmid * 256;
	    dsx     = xmax - xmid;
	}
    } else {
	if (xspan >= 0) return;

	cptr--;
	zptr--;
	
	dy = ymid - ymin;
	sx = vertex[tx[0]]->device.v[X] * 256;
	dsx = vertex[tx[1]]->device.v[X] - vertex[tx[0]]->device.v[X];

	for (int section = 0 ; ; ) {

	    if (dy != 0) {
		sxSlope = (dsx * 256) / dy;
	    
		do {
		    int xx = lx>>8;
		    int i = xx - (sx>>8);

 		    if (i > 0) {
			uchar *cp = cptr + xx;
			uint *zp = zptr + xx;
			uint z = lz;

#ifdef USE_ASM		    
			asm("\n\t"
			    "movl %5, %%edi\n\t"        // kludge
			    "movl (%%ecx), %%ebp\n"	// z1 = *zp;
			    ".align 2\n"
			    ".top3:\n\t"		// do {
			    "cmpl %%eax,%%ebp\n\t"	//   if (z0 < z1)
			    "jbe .cont3\n\t"		//   {
			    "movl %%eax,(%%ecx)\n\t"	//      *zp = z0
			    "movb %%dl,(%%ebx)\n"	//      *cp = c  
			    "\t.align 2\n"
			    ".cont3:\n\t"		//   }
			    "movl -4(%%ecx),%%ebp\n\t"	//   z1 = zp[-1]
			    "subl %%esi,%%eax\n\t"	//   z -= zSlope
			    "subl $4,%%ecx\n\t"		//   zp--
			    "decl %%ebx\n\t"		//   cp--
			    "decl %%edi\n\t"	        //   i--
			    "jne .top3\n\t"		// } while .. != 0
			    : /* no output registers */
			    : 
			    "a" (z),     // eax = z
			    "b" (cp),    // ebx = cp
			    "c" (zp),    // ecx = zp
			    "d" (col),    // dl  = colour
			    "S" (zSlope), // esi = zSlope
			    "g" (i)
			    : "%eax", "%ebx", "%ecx", "%edi", "%ebp" );
#else
			do {
			    if (z < zp[0]) {	
				zp[0] = uint(z);
				cp[0] = col;
			    }
			    z -= zSlope;
			    zp--;
			    cp--;
			} while (--i);
#endif
		    } 
		    cptr += device->getRowWidth();
		    zptr += zBuffer->getWidth();

		    sx += sxSlope;
		    lx += lxSlope;
		    lz += lzSlope;

		} while (--dy);
		
	    }
	    if (section) break;
	    section++;
	    dy = ymax - ymid;
	    sx = vertex[tx[1]]->device.v[X] * 256;
	    dsx = vertex[tx[2]]->device.v[X] - vertex[tx[1]]->device.v[X];
	}
    }

    return;    
}



















