// fl_draw_image.C

// I hope a simple and portable method of drawing color and monochrome
// images.  To keep this simple, only a single storage type is
// supported: 8 bit unsigned data, byte order RGB, and pixels are
// stored packed into rows with the origin at the top-left.  It is
// possible to alter the size of pixels with the "delta" argument, to
// add alpha or other information per pixel.  It is also possible to
// change the origin and direction of the image data by messing with
// the "delta" and "linedelta", making them negative, though this may
// defeat some of the shortcuts in translating the image for X.

// A list of assumptions made about the X display:

// Only bits_per_pixel of 8, 16, 32.

// Only scanline_pad of 8, 16, 32, 64, 128, etc.

// PsuedoColor visuals must have 8 bits_per_pixel, although the depth
// May be any number <= 8.  (this is the biggest limitation because it
// prevents 12 or 16 bit colormaps)

// The mask bits in TrueColor visuals for each color are
// contiguous and have at least one bit of each color.  This
// is not checked for.

////////////////////////////////////////////////////////////////

#include <config.h>
#include <FL/Fl.H>
#include <FL/fl_draw.H>
#include <FL/x.H>

static int visual_id;	// which visual we calculated for (so it can change)
static XImage i;	// template used to pass info to X
static int bytes_per_pixel;
static int scanline_add;
static int scanline_mask;

static U32 *buffer;	// our storage, always word aligned
static long buffer_size;

static void (*converter)(const uchar *from, uchar *to, int w, int delta);
static void (*mono_converter)(const uchar *from, uchar *to, int w, int delta);

#if HAVE_XCOLORMAP
////////////////////////////////////////////////////////////////
// PsuedoColor converter with error diffusion

// we make a 16x16x16 cube to "guess" the closest color available
// to an actual color:

static uchar cube[16*16*16];

static int calc_error(int r, int g, int b, int i) {
  // calculate sum-of-squares error between 4-bit index and pixel colors:
  int t; int s;
  t = ((r<<4)+8)-fl_xmap[i].r; s = t*t;
  t = ((g<<4)+8)-fl_xmap[i].g; s += t*t;
  t = ((b<<4)+8)-fl_xmap[i].b; s += t*t;
  return s;
}

static void improve(uchar *p, int& e, int r, int g, int b, int i) {
  if (i < FL_GRAY_RAMP || i > 255) return;
  int e1 = calc_error(r,g,b,i);
  if (e1 < e) {*p = i; e = e1;}
}

static int filled_color_cube;
static void fill_color_cube() {
  filled_color_cube = 1;
  int i;
  for (i=0;;) {
    fl_xpixel(i+FL_COLOR_CUBE);
    i = (i+109)%(FL_NUM_RED*FL_NUM_GREEN*FL_NUM_BLUE); if (!i) break;
  }
  for (i=0;;) {
    fl_xpixel(i+FL_GRAY_RAMP);
    i = (i+7)%FL_NUM_GRAY; if (!i) break;
  }
  // fill in the 16x16x16 cube:
  uchar *p = cube;
  for (int r = 0; r<16; r++) {
    int rr = (r*(FL_NUM_RED-1)+8)/16;
    for (int g = 0; g<16; g++) {
      int gg = (g*(FL_NUM_GREEN-1)+8)/16;
      for (int b = 0; b<16; b++, p++) {
	int bb = (b*(FL_NUM_BLUE-1)+8)/16;
	// initial try is value from color cube:
	int i = (bb*FL_NUM_RED+rr)*FL_NUM_GREEN+gg+FL_COLOR_CUBE;
	int e = calc_error(r,g,b,i);
	*p = uchar(i);
	// try neighbor pixels in the cube to see if they are better:
	improve(p,e,r,g,b,i+FL_NUM_RED*FL_NUM_GREEN);
	improve(p,e,r,g,b,i-FL_NUM_RED*FL_NUM_GREEN);
	improve(p,e,r,g,b,i+FL_NUM_GREEN);
	improve(p,e,r,g,b,i-FL_NUM_GREEN);
	improve(p,e,r,g,b,i+1);
	improve(p,e,r,g,b,i-1);
	// try the gray ramp:
	i = (g*(FL_NUM_GRAY-1)+8)/16+FL_GRAY_RAMP;
	improve(p,e,r,g,b,i);
	improve(p,e,r,g,b,i+1);
	improve(p,e,r,g,b,i-1);
      }
    }
  }
}

static int dir;	// direction-alternator
static int ri,gi,bi;	// keep error for "randomness"

static void psuedo_converter(const uchar *from, uchar *to, int w, int delta) {
  if (!filled_color_cube) fill_color_cube();
  int r=ri, g=gi, b=bi;
  int d, td;
  if (dir) {
    dir = 0;
    from = from+(w-1)*delta;
    to = to+(w-1);
    d = -delta;
    td = -1;
  } else {
    dir = 1;
    d = delta;
    td = 1;
  }
  for (; w--; from += d, to += td) {
    r += from[0]; if (r < 0) r = 0; else if (r>255) r = 255;
    g += from[1]; if (g < 0) g = 0; else if (g>255) g = 255;
    b += from[2]; if (b < 0) b = 0; else if (b>255) b = 255;
    int i = cube[((r<<4)&0xf00)+(g&0xf0)+(b>>4)];
    *to = fl_xmap[i].pixel;
    r -= fl_xmap[i].r;
    g -= fl_xmap[i].g;
    b -= fl_xmap[i].b;
  }
  ri = r; gi = g; bi = b;
}

static void psuedo_mono_converter(const uchar *from, uchar *to, int w, int delta) {
  if (!filled_color_cube) fill_color_cube();
  int r=ri;
  int d, td;
  if (dir) {
    dir = 0;
    from = from+(w-1)*delta;
    to = to+(w-1);
    d = -delta;
    td = -1;
  } else {
    dir = 1;
    d = delta;
    td = 1;
  }
  for (; w--; from += d, to += td) {
    r += from[0]; if (r < 0) r = 0; else if (r>255) r = 255;
    int i = cube[(r>>4)*0x111];
    *to = fl_xmap[i].pixel;
    r -= fl_xmap[i].g;
  }
  ri = r;
}
#endif

////////////////////////////////////////////////////////////////
// 16 bit TrueColor converter

#if !HAVE_XCOLORMAP
static int dir;	// direction-alternator
static int ri,gi,bi;	// keep error for "randomness"
#endif

static void short_mask_converter(const uchar *from, uchar *to, int w, int delta) {
  U16 *t = (U16 *)to;
  int d, td;
  if (dir) {
    dir = 0;
    from = from+(w-1)*delta;
    t = t+(w-1);
    d = -delta;
    td = -1;
  } else {
    dir = 1;
    d = delta;
    td = 1;
  }
  int r=ri, g=gi, b=bi;
  for (; w--; from += d, t += td) {
    r = (r&~fl_redmask)  +from[0]; if (r>255) r = 255;
    g = (g&~fl_greenmask)+from[1]; if (g>255) g = 255;
    b = (b&~fl_bluemask) +from[2]; if (b>255) b = 255;
    *t = (
      ((r&fl_redmask)<<fl_redshift)+
      ((g&fl_greenmask)<<fl_greenshift)+
      ((b&fl_bluemask)<<fl_blueshift)
      ) >> fl_extrashift;
  }
  ri = r; gi = g; bi = b;
}

static void short_mono_mask_converter(const uchar *from,uchar *to,int w, int delta) {
  U16 *t = (U16 *)to;
  int d, td;
  if (dir) {
    dir = 0;
    from = from+(w-1)*delta;
    t = t+(w-1);
    d = -delta;
    td = -1;
  } else {
    dir = 1;
    d = delta;
    td = 1;
  }
  uchar mask = fl_redmask & fl_greenmask & fl_bluemask;
  int r=ri;
  for (; w--; from += d, t += td) {
    uchar m = *from & mask;
    *t = (
      (m<<fl_redshift)+
      (m<<fl_greenshift)+
      (m<<fl_blueshift)
      ) >> fl_extrashift;
  }
  ri = r;
}

////////////////////////////////////////////////////////////////
// hi-speed 32bit TrueColor conversion, assumme error diffusion unnecessary:

static void rgbx_converter(const uchar *from, uchar *to, int w, int delta) {
  if (from == to) return;
  int d = delta-3;
  for (; w--; from += d) {
    *to++ = *from++;
    *to++ = *from++;
    *to++ = *from++;
    to++;
  }
}

static void xrgb_converter(const uchar *from, uchar *to, int w, int delta) {
  int d = delta-3;
  for (; w--; from += d) {
    uchar r = *from++;
    uchar g = *from++;
    uchar b = *from++;
    to++;
    *to++ = r;
    *to++ = g;
    *to++ = b;
  }
}

static void xbgr_converter(const uchar *from, uchar *to, int w, int delta) {
  int d = delta-3;
  for (; w--; from += d) {
    uchar r = *from++;
    uchar g = *from++;
    uchar b = *from++;
    to++;
    *to++ = b;
    *to++ = g;
    *to++ = r;
  }
}

static void bgrx_converter(const uchar *from, uchar *to, int w, int delta) {
  int d = delta-3;
  for (; w--; from += d) {
    uchar r = *from++;
    uchar g = *from++;
    uchar b = *from++;
    *to++ = b;
    *to++ = g;
    *to++ = r;
    to++;
  }
}

static void rrrx_converter(const uchar *from, uchar *to, int w, int delta) {
  for (; w--; from += delta) {
    *to++ = *from;
    *to++ = *from;
    *to++ = *from;
    to++;
  }
}

static void xrrr_converter(const uchar *from, uchar *to, int w, int delta) {
  for (; w--; from += delta) {
    to++;
    *to++ = *from;
    *to++ = *from;
    *to++ = *from;
  }
}

// arbitrary 32-bit converters.
// To speed this up I assumme at least 8 bits of each color are used.
// use #define SLOW to get the truly arbitrary version:

static void
long_mask_converter(const uchar *from, uchar *to, int w, int delta)
{
  U32 *t = (U32 *)to;
  for (; w > 0; from += delta, t++, w--) {
    *t =
#ifdef SLOW
	 (((from[0]&fl_redmask) << fl_redshift)+
	  ((from[1]&fl_greenmask)<<fl_greenshift)+
	  ((from[2]&fl_bluemask)<< fl_blueshift)
	  ) >> fl_extrashift;
#else
	 (from[0]<<fl_redshift)+
	 (from[1]<<fl_greenshift)+
	 (from[2]<<fl_blueshift);
#endif
  }
}

static void
long_mono_mask_converter(const uchar *from,uchar *to,int w, int delta)
{
  U32 *t = (U32 *)to;
#ifdef SLOW
  uchar mask = fl_redmask & fl_greenmask & fl_bluemask;
#endif
  for (; w > 0; from += delta, t++, w--) {
    *t =
#ifdef SLOW
	 (((*from & mask) << fl_redshift)+
	  ((*from & mask) << fl_greenshift)+
	  ((*from & mask) << fl_blueshift)
	  ) >> fl_extrashift;
#else
	 (*from << fl_redshift)+
	 (*from << fl_greenshift)+
	 (*from << fl_blueshift);
#endif
  }
}

////////////////////////////////////////////////////////////////

static void figure_out_visual() {
  visual_id = fl_visual->visual->visualid;

  static XPixmapFormatValues *pfvlist;
  static int FL_NUM_pfv;
  if (!pfvlist) pfvlist = XListPixmapFormats(fl_display,&FL_NUM_pfv);
  XPixmapFormatValues *pfv;
  for (pfv = pfvlist; pfv < pfvlist+FL_NUM_pfv; pfv++)
    if (pfv->depth == fl_visual->depth) break;
  i.format = ZPixmap;
  i.byte_order = ImageByteOrder(fl_display);
//i.bitmap_unit = 8;
//i.bitmap_bit_order = MSBFirst;
//i.bitmap_pad = 8;
  i.depth = fl_visual->depth;
  i.bits_per_pixel = pfv->bits_per_pixel;

#if HAVE_XCOLORMAP
  if (i.bits_per_pixel!=8 && i.bits_per_pixel!=16 && i.bits_per_pixel!=32)
#else
  if (i.bits_per_pixel!=16 && i.bits_per_pixel!=32)
#endif
    Fl::abort("Can't do %d bits_per_pixel",i.bits_per_pixel);
  bytes_per_pixel = i.bits_per_pixel/8;

  int n = pfv->scanline_pad/8;
  if (pfv->scanline_pad & 7 || (n&(n-1)))
    Fl::abort("Can't do scanline_pad of %d",pfv->scanline_pad);
  if (n<=1) n = 4; // always pad to multiples of 4
  scanline_add = n-1;
  scanline_mask = -n;

#if HAVE_XCOLORMAP
  if (bytes_per_pixel == 1) {
    // this works even for 8-bit TrueColor or other visuals & we get dithering
    converter = psuedo_converter;
    mono_converter = psuedo_mono_converter;
    return;
  }
  if (!fl_visual->red_mask)
    Fl::abort("Can't use colormap of more than 8 bits");
#endif

  // otherwise it is a TrueColor visual:
  fl_xpixel(0,0,0); // setup fl_redmask, etc, in fl_rgbcolor.C

  int rs = fl_redshift;
  int gs = fl_greenshift;
  int bs = fl_blueshift;
  if (::i.byte_order) {rs = 24-rs; gs = 24-gs; bs = 24-bs;}

  if (bytes_per_pixel == 2) {
    // All 16-bit TrueColor visuals are supported on any machine with
    // 24 or more bits per integer.  We may want to special-case the
    // very common 5-6-5 arrangment, though:
    ::i.byte_order = WORDS_BIGENDIAN;
    converter = short_mask_converter;
    mono_converter = short_mono_mask_converter;

  } else if (rs == 0 && gs == 8 && bs == 16) {
    converter = rgbx_converter;
    mono_converter = rrrx_converter;
  } else if (rs == 24 && gs == 16 && bs == 8) {
    converter = xbgr_converter;
    mono_converter = xrrr_converter;
  } else if (rs == 8 && gs == 16 && bs == 24) {
    converter = xrgb_converter;
    mono_converter = xrrr_converter;
  } else if (rs == 16 && gs == 8 && bs == 0) {
    converter = bgrx_converter;
    mono_converter = rrrx_converter;
  } else {
    // arbitrary 32-bit color mask:
    ::i.byte_order = WORDS_BIGENDIAN;
    converter = long_mask_converter;
    mono_converter = long_mono_mask_converter;
  }
}

// internal interface: return a buffer to put rgbx data into of given
// size, this buffer's contents will be overwritten when fl_draw_image
// is called on it.  Output parameter W is delta between lines.
// Type of buffer is U32 to force word alignment.

U32* fl_image_buffer(int w, int h, int& W) {
  if (!visual_id) figure_out_visual();
  W = ((w*4+scanline_add)&scanline_mask)/4;
  if (W*h > buffer_size) {
    delete[] buffer;
    buffer_size = W*h;
    buffer = new U32[buffer_size];
  }
  return buffer;
}

void fl_draw_image(const uchar *buf, int x, int y, int w, int h,
		   int delta, int linedelta) {

  if (!linedelta) linedelta = w*delta;

  if (fl_clipped) {
    if (fl_current_clip.x > x) {
      buf += delta*(fl_current_clip.x-x);
      w -= (fl_current_clip.x-x);
      x = fl_current_clip.x;
    }
    if (fl_current_clip.r < x+w) {
      w = fl_current_clip.r-x;
    }
    if (fl_current_clip.y > y) {
      buf += linedelta*(fl_current_clip.y - y);
      h -= (fl_current_clip.y - y);
      y = fl_current_clip.y;
    }
    if (fl_current_clip.b < y+h) {
      h = fl_current_clip.b-y;
    }
  }
  if (w<=0 || h<=0) return;

  if (!visual_id) figure_out_visual();
  i.width = w;
  i.height = h;


  int W = ((w*bytes_per_pixel+scanline_add)&scanline_mask)/4;
  if (delta==4 && converter==rgbx_converter && scanline_add==3) {
    // short-cut if data happens to be in correct format...
    // notice this will set bytes_per_line negative if image is bottom-to-top
    // I tested it on Linux, but it may fail on other Xlib implementations:
    i.data = (char *)(buf+(h-1)*linedelta);
    i.bytes_per_line = -linedelta;
  } else {
    if (W*h > buffer_size) {
      delete[] buffer;
      buffer_size = W*h;
      buffer = new U32[buffer_size];
    }
    if (buf == (uchar*)buffer) dir = 0; // backwards does not work first line
    i.data = (char *)buffer;
    i.bytes_per_line = W*4;
    U32 *to = buffer;
    for (int j = h; j--;) {
      converter(buf, (uchar*)to, w, delta);
      buf += linedelta;
      to += W;
    }
  }
  XPutImage(fl_display, fl_window, fl_gc, &i, 0, 0, x, y, w, h);
}

void fl_draw_image_mono(const uchar *buf, int x, int y, int w, int h,
			int delta, int linedelta) {
  if (!visual_id) figure_out_visual();
  void (*saved_converter)(const uchar *from, uchar *to, int w, int delta);
  saved_converter = converter;
  converter = mono_converter;
  fl_draw_image(buf,x,y,w,h,delta,linedelta);
  converter = saved_converter;
}

// End of fl_draw_image.C
