;
;		Flick FLI-format Animation Viewer v1.2		  19 Feb 1994
;		--------------------------------------
;
;
;This program plays FLI/FLC-format bitmapped animation files on any ECS
;or AGA Amiga running OS2.04 or higher.  FLI/FLC-format files are
;produced by Autodesk Animator and Autodesk 3D Studio on a PC, as well
;as by other programs.
;
;The files in this archive may be distributed anywhere provided they are
;unmodified and are not sold for profit.
;
;Ownership and copyright of all files remains with the author:
;
;	Peter McGavin, 86 Totara Crescent, Lower Hutt, New Zealand.
;	e-mail: peterm@maths.grace.cri.nz
;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;		xdef	_chunky2planar

; Basically the same as peterm/chunky6.s, except unwind loops
; as much as possible.  This is much better optimised for a 68040,
; but inefficient on lower processors.

;-----------------------------------------------------------------------------
; Set Macro68 defaults

		default	_branch,_word
		default	_adrbasedisp,_word
		default	_pcbasedisp,_word
		default	_outerdisp,_word
		default	_absolute,_pcrel

;-----------------------------------------------------------------------------
; chunky2planar:	(new Motorola syntax)
;  a0 -> chunky pixels
;  a1 -> plane0 (assume other 7 planes are allocated contiguously)
;  d1 = width*height/32 (if generic is defined)


	ifnd generic
plsiz		equ	width*height/8
	endc


_LVOCacheClearU equ	-636

	ifd generic
	ifeq depth-8
_c2p_8_040::
	else
	ifeq depth-6
_c2p_6_040::
	else
	ifeq depth-4
_c2p_4_040::
	endc
	endc
	endc
	else
	ifeq depth-8
		ifeq	width-320
_c2p320x200x8_040::
		endc
	else
	ifeq depth-6
		ifeq	width-320
_c2p320x200x6_040::
		endc
	else
		die	"Unrecognised resolution"
	endc
	endc
	endc

		movem.l	d2-d7/a2-a6,-(sp)

		bset	#0,(firsttimeflag)
		bne.b	skip_relocate	; branch if not being called 1st time

; relocate the mainloop to a quad-longword boundary (for 030/040 cache line)

		lea	(begincode,pc),a2
		adda.w	#15,a2
		move.l	a2,d0
		and.w	#~15,d0
		movea.l	d0,a2
		lea	(mainloop,pc),a3
		move.w	#(endcode-mainloop)/2-1,d0
1$:		move.w	(a3)+,(a2)+
		dbra	d0,1$

; flush the caches

		movem.l	a0/a1/d1,-(sp)
		movea.l	(4).w,a6
		jsr	(_LVOCacheClearU,a6)
		movem.l	(sp)+,a0/a1/d1

skip_relocate:	move.w	sp,d0
		and.w	#15,d0
		add.w	#64,d0		; make room on stack for
		suba.w	d0,sp		; 64-byte quad-longword aligned buffer
		movea.l	sp,a3		; pointed to by a3
		move.w	d0,-(sp)	; and save the allocated size
	ifd generic
		move.l	d1,-(sp)	; plsiz on stack at (6,sp)
		move.l	d1,d0
	ifgt depth-4
		lsl.l	#3,d0
	else
		lsl.l	#2,d0
	endc
		sub.l	d1,d0
		move.l	d0,-(sp)	; 7*plsiz or 3*plsiz on stack at (2,sp)
		lsr.l	#2,d1
		subq.l	#1,d1
		move.w	d1,-(sp)	; outer loop counter on stack at (sp)
	else
		move.w	#plsiz/4-1,-(sp) ; outer loop counter on stack at (sp)
	endc

; set up register constants

		move.l	#$0f0f0f0f,d5	; d5 = constant $0f0f0f0f
		move.l	#$55555555,d6	; d6 = constant $55555555
		move.l	#$3333cccc,d7	; d7 = constant $3333cccc

; load up address registers with buffer ptrs

		lea	(4*4,a3),a4	; a4 -> plane2buf
	ifgt depth-4
		lea	(4*4,a4),a5	; a5 -> plane4buf
		lea	(4*4,a5),a6	; a6 -> plane6buf
	endc

; Macros part1 and part2 together convert 8 pixels from chunky to stack buffers

part1	macro
		move.l	(a0)+,d2	; 12 get next 4 chunky pixels in d2
		move.l	(a0)+,d3	; 12 get next 4 chunky pixels in d3

	ifgt depth-4
		move.l	d2,d0		;  4
		and.l	d5,d2		;  8 d5=$0f0f0f0f
		move.l	d3,d1		;  4
		and.l	d5,d3		;  8 d5=$0f0f0f0f
		eor.l	d2,d0		;  8
		eor.l	d3,d1		;  8
		lsr.l	#4,d1		; 16
		or.l	d1,d0		;  8
	endc
		lsl.l	#4,d2		; 16
		or.l	d3,d2		;  8
		move.l	d2,d3		;  4
		and.l	d7,d3		;  8 d7=$3333cccc
		eor.l	d3,d2		;  8
		lsr.w	#2,d3		; 10
		swap	d3		;  4
		lsl.w	#2,d3		; 10
		or.l	d2,d3		;  8
	ifgt depth-4
		move.l	d0,d1		;  4
		and.l	d7,d1		;  8 d7=$3333cccc
		eor.l	d1,d0		;  8
		lsr.w	#2,d1		; 10
		swap	d1		;  4
		lsl.w	#2,d1		; 10
		or.l	d0,d1		;  8
		move.l	d1,d2		;  4
		lsr.l	#7,d2		; 22
		move.l	d1,d0		;  4
		and.l	d6,d0		;  8 d6=$55555555
	endc
	endm

part2	macro
	ifgt depth-4
		eor.l	d0,d1		;  8
		move.l	d2,d4		;  4
		and.l	d6,d4		;  8 d6=$55555555
		eor.l	d4,d2		;  8
		or.l	d4,d1		;  8
		lsr.l	#1,d1		; 10
		move.b	d1,(8,a5)	; 12 plane 5
	ifgt depth-6
		swap	d1		;  4
		move.b	d1,(8,a6)	; 12 plane 7
	endc
		or.l	d0,d2		;  8
		move.b	d2,(a5)+	;  8 plane 4
	ifgt depth-6
		swap	d2		;  4
		move.b	d2,(a6)+	;  8 plane 6
	endc
	endc
		move.l	d3,d2		;  4
		lsr.l	#7,d2		; 22
		move.l	d3,d0		;  4
		and.l	d6,d0		;  8 d6=$55555555
		eor.l	d0,d3		;  8
		move.l	d2,d4		;  4
		and.l	d6,d4		;  8 d6=$55555555
		eor.l	d4,d2		;  8
		or.l	d4,d3		;  8
		lsr.l	#1,d3		; 10
		move.b	d3,(8,a3)	; 12 plane 1
		swap	d3		;  4
		move.b	d3,(8,a4)	; 12 plane 3
		or.l	d0,d2		;  8
		move.b	d2,(a3)+	;  8 plane 0
		swap	d2		;  4
		move.b	d2,(a4)+	;  8 plane 2
	endm

; optimised suba

subao	macro
	ifd generic
		suba.l	(6,sp),a1
	else
	iflt plsiz-32768
		suba.w	#plsiz,a1
	else
		suba.l	#plsiz,a1
	endc
	endc
	endm

; convert the first 32 pixels to stack buffers as a special case

	rept	4
		part1
		part2
	endr

begincode:	rept	8		; space for mainloop code relocation
		nop
		endr

; main loop (starts here) processes 64 chunky pixels at a time

mainloop:

; Process the next 32 pixels from chunky to stack buffers while at the same
; time moving the result of the previous 32 pixels from stack buffers to
; Chip ram planes.
; Chip writes are spaced as widely apart as possible, so that there is
; always something useful happening while waiting for the Chip bus.

	ifgt depth-4
	ifd generic
		adda.l	(2,sp),a1		; add 7*plsiz
	else
		adda.l	#7*plsiz,a1		; a1 points into plane 0
	endc
	ifgt depth-6
		move.l	(4,a6),(a1)		; plane 7
	endc
		part1
		subao
	ifgt depth-6
		move.l	(-4,a6),(a1)		; plane 6
	endc
		part2
		subao
		move.l	(3,a5),(a1)		; plane 5
		part1
		subao
		move.l	(-5,a5),(a1)		; plane 4
		part2
		subao
		move.l	(2,a4),(a1)		; plane 3
		part1
		subao
		move.l	(-6,a4),(a1)		; plane 2
		part2
		subao
		move.l	(1,a3),(a1)		; plane 1
		part1
		subao
		move.l	(-7,a3),(a1)+		; plane 0
		part2
	else
	ifd generic
		adda.l	(2,sp),a1		; add 3*plsiz
	else
		adda.l	#3*plsiz,a1		; a1 points into plane 0
	endc
		move.l	(4,a4),(a1)		; plane 3
		part1
		part2
		subao
		move.l	(-5,a4),(a1)		; plane 2
		part1
		part2
		subao
		move.l	(2,a3),(a1)		; plane 1
		part1
		part2
		subao
		move.l	(-7,a3),(a1)+		; plane 0
		part1
		part2
	endc

; check if finished

		sub.w	#1,(sp)
		beq.w	done

; restore stack buffer pointers

		subq.l	#8,a3
		subq.l	#8,a4
	ifgt depth-4
		subq.l	#8,a5
		subq.l	#8,a6
	endc

; Process the next 32 pixels from chunky to stack buffers while at the same
; time moving the result of the previous 32 pixels from stack buffers to
; Chip ram planes.

	ifgt depth-4
	ifd generic
		adda.l	(2,sp),a1		; add 7*plsiz
	else
		adda.l	#7*plsiz,a1		; a1 points into plane 0
	endc
	ifgt depth-6
		move.l	(12,a6),(a1)		; plane 7
	endc
		part1
		subao
	ifgt depth-6
		move.l	(4,a6),(a1)		; plane 6
	endc
		part2
		subao
		move.l	(11,a5),(a1)		; plane 5
		part1
		subao
		move.l	(3,a5),(a1)		; plane 4
		part2
		subao
		move.l	(10,a4),(a1)		; plane 3
		part1
		subao
		move.l	(2,a4),(a1)		; plane 2
		part2
		subao
		move.l	(9,a3),(a1)		; plane 1
		part1
		subao
		move.l	(1,a3),(a1)+		; plane 0
		part2
	else
	ifd generic
		adda.l	(2,sp),a1		; add 3*plsiz
	else
		adda.l	#3*plsiz,a1		; a1 points into plane 0
	endc
		move.l	(12,a4),(a1)		; plane 7
		part1
		part2
		subao
		move.l	(3,a4),(a1)		; plane 5
		part1
		part2
		subao
		move.l	(10,a3),(a1)		; plane 3
		part1
		part2
		subao
		move.l	(1,a3),(a1)+		; plane 1
		part1
		part2
	endc

; check if finished, go back for more

		sub.w	#1,(sp)
		bne.w	mainloop

; correction

		addq.l	#4,a3
		addq.l	#4,a4
		addq.l	#4,a5
		addq.l	#4,a6

; write the last longword from stack buffer to planes

done:	ifd generic
		adda.l	(2,sp),a1		; add 7*plsiz or 3*plsiz
	ifgt depth-4
	ifgt depth-6
		move.l	(4,a6),(a1)		; plane 7
	endc
		subao
	ifgt depth-6
		move.l	(-4,a6),(a1)		; plane 6
	endc
		subao
	endc
	else
		adda.l	#(depth-1)*plsiz,a1	; a1 points into plane 7
	ifgt depth-6
		move.l	(4,a6),(a1)		; plane 7
		subao
		move.l	(-4,a6),(a1)		; plane 6
		subao
	endc
	endc
	ifgt depth-4
		move.l	(4,a5),(a1)		; plane 5
		subao
		move.l	(-4,a5),(a1)		; plane 4
		subao
	endc
		move.l	(4,a4),(a1)		; plane 3
		subao
		move.l	(-4,a4),(a1)		; plane 2
		subao
		move.l	(4,a3),(a1)		; plane 1
		subao
		move.l	(-4,a3),(a1)+		; plane 0

; all done!  restore stack and return

	ifd generic
		adda.w	#4+4+2,sp		; remove stack variables
	else
		addq.w	#2,sp			; remove outer loop counter
	endc
		adda.w	(sp)+,sp		; remove aligned 32-byte buffer
		movem.l	(sp)+,d2-d7/a2-a6

		rts

endcode:

firsttimeflag:	dc.b	0
		even

;-----------------------------------------------------------------------------

		end
