%define FREEDOS				;comment out for DOSBOX (need to enable XORPATTERN then)
%define CHECK_ESC			;if you 
;%define RESTORE_TEXTMODE	;does what it says
;%define ASPECT				;if you want aspect correction
%define DITHER				;triple interlace 
%define COLORS				;for non-grayscale palette
;%define XORPATTERN  		;DOSBOX requires this define (see FREEDOS define)
%define FPUABUSE			;'accepting' fpu stack overflow. not sure what i did there but it worked :D

org 100h                       

;--------------------------------------------------------------------
; init
; AX=0000 BX=0000 CX=00FF DX=CS DS=CS 
; SI=0100 DI=FFFE BP=09xx SP=FFFE (or FFFC for DOS child process)
; byte  [SI] = 104 = 0x68 = opcode byte for push
; dword [SI] = ~1.24 (float) = aspect ratio, using the aas as the hight byte (0x3f) sign/exponent
;--------------------------------------------------------------------
start:
	push    0A000h - 70		; modified to center to 160,100
%ifdef ASPECT	
	aas						; make aspect ratio float of ~1.24
%endif	
	pop     es              ; ES -> ScreenPointer
	mov     al,13h          
	int     10h          	; mode 13h
;--------------------------------------------------------------------		
; create palette 64 colors (clamped after 63)
;--------------------------------------------------------------------
	salc
palette:						
	mov 	dx,0x3c9	
	out 	dx,al
	out 	dx,al
%ifdef COLORS	
	ror		al,1
%endif	
	out 	dx,al
%ifdef COLORS
	rol		al,1
%endif
	cmp		al,63
	je		saturated
_im7616	equ $
	inc		ax
saturated:	
	loop	palette
;--------------------------------------------------------------------
; intro mainloop		
;--------------------------------------------------------------------
mainloop:
_mp1 equ $-3				; -0.117 ~ -0.1
	mov		bp,_Data	
	mov 	ax,0xCCCD		;Rrrola's trick ...
	mul		di				;... to approximate centered coords from DI in range [0..65535]
_i13354 equ $	
	sub 	dh,[si]			;align vertically (subtract 104)
	pusha 					 	
	; marcher iterations (animated to structure fadein and light enable after 80 frames)
	mov		dx,cx
	cmp		dx,0x50
	jl		maxiter
	mov		dx,0x2850
maxiter:
	lodsw										;si += 2	
%ifndef FPUABUSE 	
	fldz										;tt		;POTENTIAL: can be skipped if we use fist instead fistp at the end
%endif	
	; time
	fild	word [bx-6]							;time	tt
	fmul	dword[bp+_Speed-_Data]
	fst		dword[bp+si]	
	fld		st0									;time	time	tt	
	fsincos										;st		ct		time	tt		
;--------------------------------------------------------------------
; ray setup
;--------------------------------------------------------------------			
	; ray direction	
	mov		di,bp
	fld		dword[bp+_p5-_Data]					;Z		st		ct		time	tt
	fstp	dword[si+8]							;st		ct		time	tt
	fimul	word[si+_im7616-start-2]			;st/4	ct		time	tt	 
	fiadd 	word [bx-8]							;X		ct		time	tt
	fmul	dword[di+_RayNorm-_Data]			;X		ct		time	tt
%ifdef ASPECT	
	; aspect ratio correction to get closer to 16:9
	fmul	dword [si]							;X		ct		time	tt
%endif
	fstp	dword[bx]							;ct		time	tt				;abuse first 4 PSP bytes for storage of y direction
	fimul	word[si+_i13354-start-2]			;ct/4	time	tt			
	fiadd 	word [bx-9]							;Y		time	tt
	fmul	dword[di+_RayNorm-_Data]			;Y		time	tt
	fstp	dword[si+0]							;time	tt
	; position (z = time)
	fldz										;0		time	tt
	fldz										;0		0		time	tt
marcherloop:									;x		y		z		tt
	mov		cx,3
modspheresdf:	
	fld1										;1		x		y		z		tt
	fld		st3									;z		1		x		y		z		tt
	fsub	dword[bp+_MinusLarge_p5-_Data]		;z+.5	1		x		y		z		tt
	fprem										;fract	1		x		y		z		tt
	fsub	dword[bp+_p5-_Data]					;fract'	1		x		y		z		tt
	fmul	st0,st0								;dotz	1		x		y		z		tt
	fstp	st1									;dotz	x		y		z		tt
	loop	modspheresdf						;dotx	doty	dotz	x		y		z		tt
	faddp	st1,st0
	faddp	st1,st0								;dot	x		y		z		tt
	fsubr	dword[bp+_p4-_Data]					;.4-dot	x		y		z		tt	
planesdf:	
	fld		dword[bp+_p25-_Data]				;.25	.4-dot	x		y		z		tt
	fsub	st3									;.25-y	.4-dot	x		y		z		tt	
min:	
%ifndef FREEDOS
	push	ax
	fcom 	st1
	fnstsw 	ax
	sahf	
	ja 		_min0	
_min1:
	fxch 	st1
_min0:
	fstp 	st0
	pop		ax
%else
	fcomi 	st0,st1
	fcmovnb	st0,st1								
	fstp	st1									;nt		x		y		z		tt
%endif
updatett:	
	fadd	st4,st0								;nt		x		y		z		tt'
updatep:	
_MinusLarge_p5 equ $+1	
	fld		dword[si+8]
	fmul	st0,st1
	faddp	st4,st0								;nt		x		y		z'		tt'
	fld		dword[bx]
	fmul	st0,st1
	faddp	st3,st0								;nt		x		y'		z'		tt'
	fld		dword[si+0]
	fmulp	st1,st0
	faddp	st1,st0								;x'		y'		z'		tt'
	; setup light ray after half of the marching steps
	cmp		dl,dh
	jne		lightsetupdone
		; xor texture for hit point in ax
%ifndef XORPATTERN						
		xor		eax,eax
%endif		
		mov		cl,3
	xortexloop:			
		fld		st2		
%ifdef XORPATTERN		
		fmul	dword[bp+_m31p9-_Data]
%else		
		fimul	word [di+_10-_Data]
%endif		
		fist	dword[si]
%ifdef XORPATTERN				
		xor		ax,word [si]	
%else		
		crc32	eax, byte [si]	
%endif		
		loop	xortexloop						;x'		y'		z'		x		y		z		tt		
		fcompp
		fstp	st0								;x		y		z		tt		
		; distance to hit = p.z - time 
		fld		st2								;z		x		y		z		tt
		fsub	dword[bp+si]					;z-t	x		y		z		tt
		fstp	dword[bp+di]					;x		y		z		tt
		; slight offset for the hit position for the lightloop
		fld		dword[bp+_p01-_Data]			;.01	x		y		z		tt
		fsub	st2,st0							;.01	x		y-.01	z		tt
		fsub	st3,st0							;.01	x		y-.01	z-.01	tt
		; 'reset' tt
		fstp	st4								;x		y		z		0.01
		; ray direction points towards rotating lightloop
		fld		dword[bp+si]					;t		x		y		z		tt
		fsincos									;c		s		x		y		z		tt
		fstp	dword[si+0]						;s		x		y		z		tt
		fld		dword[si+_mp1-start-2]			;-.1	s		x		y		z		tt
		fstp	dword[bx]						;s		x		y		z		tt
		fstp	dword[si+8]						;x		y		z		tt		
lightsetupdone:
	dec		dl
	jns		marcherloop
marcher_done:									;x		y		z		st
	and		al,0x07
	fcompp
	fstp	st0									;st
	; combine shadow travel distance and hit distance
	fadd	dword[bp+di]						;st + pt
	fimul	word [di+_10-_Data]
%ifdef FPUABUSE	
	fist	word [bx-4]
%else
	fistp	word [bx-4]							;POTENTIAL: can be fist if we skip the very first fldz above
%endif	
	; combine light with texture
	add		word [bx-4],ax
;------------------------------------------------------------
; output pixel (al) and go to pixelloop until di == 0 again
;------------------------------------------------------------
outputpixel:
	popa
	stosb					; AL -> pixel, increment DI
%ifdef DITHER
	inc		di
	inc		di
%else
	and		di,di
%endif
jumpmainindirection:	
	jnz		mainloop	
_m31p9		equ $-3	
;------------------------------------------------------------
; increase time counter
;------------------------------------------------------------
	inc		cx
;------------------------------------------------------------
; check for ESC or render next frame
;------------------------------------------------------------
%ifdef CHECK_ESC	
	in      al,60h          
	dec     al
	jnz 	jumpmainindirection
%else
	jmp		mainloop
%endif
;------------------------------------------------------------
; restore text mode
;------------------------------------------------------------
%ifdef RESTORE_TEXTMODE	
	mov     al,0x0003       ; AH must be 00h
	int     10h             ; mode 03h         
%endif	
;------------------------------------------------------------
; exit
;------------------------------------------------------------
	;ret						; opcode = 0xC3, can be arranged with first data byte from below
	int		20h					; using [0] as storage, so using int 20 directly here
;==============================================================================	
; constants
;==============================================================================	
_Data:	
_RayNorm:	
_10:		dw	0x000A			; 0A,00 -> 0x000A -> 10				
_Speed:					
_p01:		dw  0x3780			; 00,00,80,37 -> 0x37800000 -> 1.0 / 65536.0
_p4:		dw	0x3C23			; 0A,D7,23,3C -> 0x3C23D70A -> 0.01			
_p5:		dw	0x3ECC			; CD,CC,CC,3E -> 0x3ECCCCCD -> 0.4
_p25:		dw	0x3F00			; 00,00,00,3F -> 0x3F000000 -> 0.5
			dw	0x3E80			; 00,00,80,3E -> 0x3E800000 -> 0.25
