#include "Conv_GC.h"


// TO 32 RGB

void Hermes_GC_32rgb888_32rgb888(char8 *source,char8 *dest,unsigned int count)
{
  while(count--)
  { WRITE32(dest,READ32(source));
    dest+=4;
    source+=4;
  }
}


// TO 32 BGR

void Hermes_GC_32rgb888_32bgr888(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel;
  char8 *s_ptr=(char8 *)&s_pixel,tmp;

  while(count--)
  { s_pixel=READ32(source);

    #if defined __LITTLE_ENDIAN__
    tmp=*(s_ptr+0); *(s_ptr+0)=*(s_ptr+2); *(s_ptr+2)=tmp;
    #elif defined __BIG_ENDIAN__
    tmp=*(s_ptr+1); *(s_ptr+1)=*(s_ptr+3); *(s_ptr+3)=tmp;
    #endif

    WRITE32(dest,s_pixel);

    dest+=4;
    source+=4;
  }
}



// TO 24 RGB

void Hermes_GC_32rgb888_24rgb888(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel;
  char8 *s_point=(char8 *)&s_pixel;

  while(count--)
  { s_pixel=READ32(source);

    *(dest+R_24)=*(s_point+R_32);
    *(dest+G_24)=*(s_point+G_32);
    *(dest+B_24)=*(s_point+B_32);

    source+=4;
    dest+=3;
  }
}


// TO 24 BGR

void Hermes_GC_32rgb888_24bgr888(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel;
  char8 *s_point=(char8 *)&s_pixel;

  while(count--)
  { s_pixel=READ32(source);

    // Note that R and B are swapped
    *(dest+B_24)=*(s_point+R_32);
    *(dest+G_24)=*(s_point+G_32);
    *(dest+R_24)=*(s_point+B_32);

    source+=4;
    dest+=3;
  }
}


// TO 16 RGB 565

void Hermes_GC_32rgb888_16rgb565(char8 *source,char8 *dest,unsigned int count)
{ unsigned int i;
  int32 s_pixel;
  int32 *dest_32=(int32 *)dest;
  int32 *source_32=(int32 *)source;

  // If the current pixel isn't dword aligned, try write one pixel first
  
  if((int)dest_32&0x3)
  { s_pixel=((*source_32>>8)&0xf800)|
            ((*source_32>>5)&0x7e0)|
            ((*source_32>>3)&0x1f);

    *((short16 *)dest_32)=(short16)s_pixel;
 
    dest_32=(int32 *)(dest+2);
    source_32++;
 
    count--;
  }

  // Write blocks of two pixels

  for(i=count>>1;i;i--)
  {  // This horrible construct is actually faster than loading into a variable
    *dest_32++=((*source_32>>8)&0xf800)|
	       ((*source_32>>5)&0x7e0)|
	       ((*source_32>>3)&0x1f)|
	       ((*(source_32+1)<<8)&0xf8000000)|
	       ((*(source_32+1)<<11)&0x7e00000)|
	       ((*(source_32+1)<<13)&0x1f0000);
    source_32+=2; 
  }
  
  // Eventually, write a single odd pixel that might be left
  if(count&1)
  { s_pixel=*source_32;

    WRITE16(dest_32,((s_pixel>>8)&0xf800)|
	            ((s_pixel>>5)&0x7e0)|
	            ((s_pixel>>3)&0x1f));
  }
}


// TO 16 BGR 565

void Hermes_GC_32rgb888_16bgr565(char8 *source,char8 *dest,unsigned int count)
{ unsigned int i;
  int32 r,g,b;
  int32 s_pixel,d_pixelblock;
  short16 d_pixel;

  // If the current pixel isn't dword aligned, try write one pixel first

  if(((int)dest&0x3)!=0)
  { s_pixel=READ32(source);
 
    r=(s_pixel>>19)&0x1f;
    g=(s_pixel>>5)&0x7e0;
    b=(s_pixel<<8)&0xf800;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);

    source+=4; 
    dest+=2;
    count--;
  }

  // Write blocks of two pixels
  
  for(i=0;i<count>>1;i++,source+=8, dest+=4)
  { s_pixel=READ32(source);

    d_pixelblock=((s_pixel>>19)&0x1f)|
                 ((s_pixel>>5)&0x7e0)|
                 ((s_pixel<<8)&0xf800);

    s_pixel=READ32(source+1);

    d_pixelblock|=(((s_pixel>>19)&0x1f)|
                  ((s_pixel>>5)&0x7e0)|
                  ((s_pixel<<8)&0xf800))<<16;
   
    WRITE32(dest,d_pixelblock);
  }


  // Eventually, write a single odd pixel that might be left
  if(count&1)
  { s_pixel=READ32(source);

    r=(s_pixel>>19)&0x1f;
    g=(s_pixel>>5)&0x7e0;
    b=(s_pixel<<8)&0xf800;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);
  }
}


// TO 16 RGB 555


void Hermes_GC_32rgb888_16rgb555(char8 *source,char8 *dest,unsigned int count)
{ int32 r,g,b;
  int32 s_pixel,d_pixelblock;
  short16 d_pixel;
  unsigned int i;


  if(((int)dest&0x3)!=0)
  { s_pixel=READ32(source);
 
    r=(s_pixel>>9)&0x7c00;
    g=(s_pixel>>6)&0x3e0;
    b=(s_pixel>>3)&0x1f;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);

    source+=4; 
    dest+=2;
    count--;
  }

  for(i=0;i<count>>1;i++,source+=8,dest+=4)
  { s_pixel=READ32(source);

    d_pixelblock=((s_pixel>>9)&0x7c00)|
                 ((s_pixel>>6)&0x3e0)|
                 ((s_pixel>>3)&0x1f);

    s_pixel=READ32(source+1);

    d_pixelblock|=(((s_pixel>>9)&0x7c00)|
                  ((s_pixel>>6)&0x3e0)|
                  ((s_pixel>>3)&0x1f))<<16;
   
    WRITE32(dest,d_pixelblock);
  }

  if(count&1)
  { s_pixel=READ32(source);

    r=(s_pixel>>9)&0x7c00;
    g=(s_pixel>>6)&0x3e0;
    b=(s_pixel>>3)&0x1f;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);
  }
}



// TO 16 BGR 555

void Hermes_GC_32rgb888_16bgr555(char8 *source,char8 *dest,unsigned int count)
{ int32 r,g,b;
  int32 s_pixel,d_pixelblock;
  short16 d_pixel;
  unsigned int i;


  if(((int)dest&0x3)!=0)
  { s_pixel=READ32(source);
 
    r=(s_pixel>>19)&0x1f;
    g=(s_pixel>>6)&0x3e0;
    b=(s_pixel<<7)&0x7c00;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);

    source+=4; 
    dest+=2;
    count--;
  }

  for(i=0;i<count>>1;i++,source+=8,dest+=4)
  { s_pixel=READ32(source);

    d_pixelblock=((s_pixel>>19)&0x1f)|
                 ((s_pixel>>6)&0x3e0)|
                 ((s_pixel<<7)&0x7c00);

    s_pixel=READ32(source+1);

    d_pixelblock|=(((s_pixel>>19)&0x1f)|
                  ((s_pixel>>6)&0x3e0)|
                  ((s_pixel<<7)&0x7c00))<<16;
   
    WRITE32(dest,d_pixelblock);
  }

  if(count&1)
  { s_pixel=READ32(source);

    r=(s_pixel>>19)&0x1f;
    g=(s_pixel>>6)&0x3e0;
    b=(s_pixel<<7)&0x7c00;

    d_pixel=(short16)(r|g|b);

    WRITE16(dest,d_pixel);
  }
}



// TO 8 RGB 332

void Hermes_GC_32rgb888_8rgb332(char8 *source,char8 *dest,unsigned int count)
{ unsigned int i;
  int32 s_pixel,d_block;
  char8 d_pixel;

  // Process single pixels until we are dword aligned

  while(count && ((int)dest&0x3)!=0)
  { s_pixel=READ32(source);

    d_pixel=((s_pixel>>16)&0xe0)|
            ((s_pixel>>11)&0x1c)|
            ((s_pixel>>6)&0x3);

    *(dest)=d_pixel;

    count--;
    dest++;
    source+=4;
  }  


  // Now process blocks of four pixels

  for(i=0;i<count>>2;i++,source+=16,dest+=4)
  { s_pixel=READ32(source);
    d_block=((s_pixel>>16)&0xe0)|((s_pixel>>11)&0x1c)|((s_pixel>>6)&0x3);

    s_pixel=READ32(source+1);
    d_block|=(((s_pixel>>16)&0xe0)|((s_pixel>>11)&0x1c)|((s_pixel>>6)&0x3))<<8;
    
    s_pixel=READ32(source+2);
    d_block|=(((s_pixel>>16)&0xe0)|((s_pixel>>11)&0x1c)|((s_pixel>>6)&0x3))<<16;
   
    s_pixel=READ32(source+3);
    d_block|=(((s_pixel>>16)&0xe0)|((s_pixel>>11)&0x1c)|((s_pixel>>6)&0x3))<<24;

    WRITE32(dest,d_block);
  }

  // Write a possibly remaining pixel
 
  if(count&1)
  { s_pixel=READ32(source);

    d_pixel=((s_pixel>>16)&0xe0)|
            ((s_pixel>>11)&0x1c)|
            ((s_pixel>>6)&0x3);

    *(dest)=d_pixel;
  }  
}



// GENERIC (slow) routines from 32 bit rgb to *. Note that these routines will
// hopefully NEVER be called as all cases should be catered for by special
// routines

void Hermes_GC_32rgb888_Generic32(char8 *source,char8 *dest,unsigned int count)
{ unsigned int i;
  int32 s_pixel,r,g,b;

  for(i=0;i<count;i++, source+=4, dest+=4)
  { s_pixel=READ32(source);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    
    WRITE32(dest,(r|g|b));    
  }
}


void Hermes_GC_32rgb888_Generic16(char8 *source,char8 *dest,unsigned int count)
{ unsigned int i;
  int32 s_pixel,r,g,b;
  int32 d_block;

  // This is a slower routine, but we will still write dwords, not words

  for(i=0;i<count>>1;i++,source+=8, dest+=4)
  { s_pixel=READ32(source);
  
    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    d_block=(int32)(r|g|b);

    s_pixel=READ32(source+1);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    d_block=d_block|((int32)(r|g|b)<<16);

    WRITE32(dest,d_block);
  }

  if(count&1)
  { s_pixel=READ32(source);
  
    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;

    WRITE16(dest,(short16)(r|g|b));
  }
}


// Even slower, most generic routines from any 32 bit format

void Hermes_GC_Generic32_Generic32(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel,r,g,b;

  while(count--)
  { s_pixel=READ32(source);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    
    WRITE32(dest,(r|g|b));
    
    source+=4;
    dest+=4;
  }
}


void Hermes_GC_Generic32_Generic24(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel,r,g,b;
  char8 *d_ptr=(char8 *)((int)&s_pixel);

  while(count--)
  { s_pixel=READ32(source);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    
    s_pixel=(r|g|b);

    *(dest+R_24)=*(d_ptr+R_32);
    *(dest+G_24)=*(d_ptr+G_32);
    *(dest+B_24)=*(d_ptr+B_32);

    source+=4;
    dest+=3;
  }
}


void Hermes_GC_Generic32_Generic16(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel,r,g,b;

  while(count--)
  { s_pixel=READ32(source);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    
    WRITE16(dest,(short16)(r|g|b));
    
    source+=4;
    dest+=2;
  }
}


void Hermes_GC_Generic32_Generic8(char8 *source,char8 *dest,unsigned int count)
{ int32 s_pixel,r,g,b;

  while(count--)
  { s_pixel=READ32(source);

    r=((s_pixel>>Hermes_GC_GI.r_right)<<Hermes_GC_GI.r_left)&
      Hermes_Generic_C.m_dest->mask_r;
    g=((s_pixel>>Hermes_GC_GI.g_right)<<Hermes_GC_GI.g_left)&
      Hermes_Generic_C.m_dest->mask_g;
    b=((s_pixel>>Hermes_GC_GI.b_right)<<Hermes_GC_GI.b_left)&
      Hermes_Generic_C.m_dest->mask_b;
    
    *(dest)=(char8)(r|g|b);
    
    source+=4;
    dest++;
  }
}


