#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#include "mac_video.h"

#ifndef MAX
#define MAX(a,b) ((a) > (b) ? (a) : (b))
#endif
#ifndef MIN
#define MIN(a,b) ((a) < (b) ? (a) : (b))
#endif

#ifndef M_PI
#define M_PI 3.1415926536
#endif

int g_music_pos = 0;
int g_music_play = 0;

struct plasma {
	int8_t *sin256_fp7;		/* 256 entries, scaled by 127 */
	uint16_t *sqrt_fp;
};

struct plasma g_plasma;


void plasma_init(struct plasma *p) {
	int i;
    p->sin256_fp7 = malloc(sizeof p->sin256_fp7[0] * 256);
	if (!p->sin256_fp7) {
		printf("malloc failed\n");
		exit(1);
	}
    for (i = 0; i < 256; ++i) {
        float x = i/256.f;
        p->sin256_fp7[i] = sin(2*M_PI*x)*127.0;
    }
    {
    	int w = 320;
    	int h = 240;
    	int x, y;
    	int end = (w*w + h*h) >> 8;
	    p->sqrt_fp = malloc(sizeof p->sqrt_fp[0] * end);
		if (!p->sqrt_fp) {
			printf("malloc failed\n");
			exit(1);
		}
	    for (y = 0; y < h; ++y) {
			for (x = 0; x < w; ++x) {
				int i = (x*x+y*y) >> 8;
		        p->sqrt_fp[i] = sqrt(i)*256;
			}
		}
    }
}

#define VERSION 2

void plasma_update(struct plasma *p, struct framebuffer *fb) {
	int x, y;
	int w = fb->w;
	int h = fb->h;
	int half_w = w >> 1;
	int half_h = h >> 1;
	int time = g_music_pos;
	float tf = g_music_pos / 22050.f;
	int tf_fp = tf * 8;
	uint8_t *dst_row = fb->b;
	int min_cv = 1<<30;
	int max_cv = -(1<<30);
	for (y = 0; y < h; ++y) {
		uint16_t *dst = (void *)dst_row;
		int yc = (y - half_h)/4;
		float yf = y/(float)(h-1) - 0.5;
		for (x = 0; x < w; ++x) {			
			int r, g, b;
			int cv = 0;
			int sqrt_arg = (x*x + y*y) >> 8;
#if VERSION == 0
			/* pure float, easy to experiment with but very slow */
			float w0 = ((x+y) * 0.05 + tf);
			float w1 = (256*sqrt(sqrt_arg) * 0.05 + tf);
			cv += 127 * sin( w0 );
			cv += 127 * sin( w1 );
			cv = ((cv / 4) + 127) * 31/255;
			r = 31 * cv;
			g = 31 - r;
			b = 31 * (0.5 + 0.5 * sin(cv * 3.14));
#elif VERSION == 1
			/* intermediate, get lut entries, check all works */
			float w0 = (((x+y) >> 3) + tf);
			int sqrti = p->sqrt_fp[ sqrt_arg ];
			float w1 = ((sqrti >> 3) + tf);
			float k = 256.0/(2*M_PI);
			int w0i = (int)(w0*k) & 255;
			int w1i = (int)(w1*k) & 255;
			int w2i = (int)(cv * 3.14 * k) & 255;
			cv += p->sin256_fp7[w0i];
			cv += p->sin256_fp7[w1i];
			cv = (((cv >> 2) + 127) << 5) >> 8;
			r = 31 * cv;
			g = 31 - r;
			b = 31 * (0.5 + 0.5 * p->sin256_fp7[w2i]/127.0);
#elif VERSION == 2
			/*	approximate values and using only fixed point 
				NOTE: a lot more could be done to make this much faster
			*/
			int k = 40;
			int w0 = ((x+y) + tf_fp) >> 3;
			int sqrti = p->sqrt_fp[ sqrt_arg ];
			int w1 = (sqrti + tf_fp) >> 3;
			int w0i = (w0*40) & 255;
			int w1i = (w1*40) & 255;
			int w2i = (cv * 3 * k) & 255;
			cv += p->sin256_fp7[w0i];
			cv += p->sin256_fp7[w1i];
			cv = ((cv << 3) + 4063) >> 8;
			r = 31 * cv;
			g = 31 - r;
			b = 31 * (0.5 + 0.5 * p->sin256_fp7[w2i]/127.0);
#endif
			*dst++ = (1 << 15) | (r << 10) | (g << 5) | b;
		}
		dst_row += fb->rowbytes;
	}
}

void mac_demo_music_play(void *dst, int frames) {
	int i;
	uint16_t *s16_lr = dst;
	if (g_music_play) {
	 	for (i = 0; i < frames; ++i) {
			*s16_lr++ = 0;
			*s16_lr++ = 0;
		}
		g_music_pos += frames;
	} else {
	 	for (i = 0; i < frames; ++i) {
			*s16_lr++ = 0;
			*s16_lr++ = 0;
		}
	}
}

void mac_demo_init(int fb_pages, int fb_w, int fb_h, int fb_rowbytes) {
	plasma_init(&g_plasma);
	g_music_pos = 0;
}

void mac_demo_cleanup(void) {
}

void mac_demo_update(struct framebuffer *fb) {
	if (!g_music_play) {
		g_music_play = 1;
	}
	plasma_update(&g_plasma, fb);
}
