
; This file includes routines for decoding a mono IMA ADPCM stream to either
;  8-bit or 16-bit mono output. Decoding is done through a callback function that
;  should be called each vblank.
;
; Currently the routine probably takes something like 3% of CPU time on 68060/50
;  and <10% on 68030/50.
; Potential areas for improvement:
;  make chipwrites in parallel with decoding logic
;  write longwords instead of words
;  pair decoding instructions (for 060)


	section	code,code

;------------------------------------------------------------------------------
; in	a0	WAV 16bit mono file
;		d0.w	replay period
; out	a0/a1 stuff to send to PaulaOutput

AdpcmSource_Init_16BitMonoInput_8BitMonoOutput
	lea	AdpcmSource_16BitMonoInput_8BitMonoOutput_MixState,a1
	move.l	a0,(a1)
	clr.w	4(a1)
	clr.l	8(a1)

	move.w	d0,-(sp)
	bsr	AdpcmSource_InitTables
	move.w	(sp)+,d0

	lea	AdpcmSource_16BitMonoInput_8BitMonoOutput_MixSamples,a0
	lea	AdpcmSource_16BitMonoInput_8BitMonoOutput_MixState,a1
	moveq	#PaulaOutput_Mode_8BitMono,d1

	bsr		PaulaOutput_Init
	rts

;------------------------------------------------------------------------------
; in	a0	WAV 16bit mono file
; out	a0/a1 stuff to send to PaulaOutput

AdpcmSource_Init_16BitMonoInput_14BitMonoOutput
	lea	AdpcmSource_16BitMonoInput_14BitMonoOutput_MixState,a1
	move.l	a0,(a1)
	clr.w	4(a1)
	clr.l	8(a1)

	move.w	d0,-(sp)
	bsr	AdpcmSource_InitTables
	move.w	(sp)+,d0

	lea	AdpcmSource_16BitMonoInput_14BitMonoOutput_MixSamples,a0
	lea	AdpcmSource_16BitMonoInput_14BitMonoOutput_MixState,a1
	moveq	#PaulaOutput_Mode_14BitMono,d1

	bsr		PaulaOutput_Init
	rts


;------------------------------------------------------------------------------
; in	d0	number of samples to mix
;	d1	current mix position
;	a0	output samples
;	a4	state

AdpcmSource_16BitMonoInput_8BitMonoOutput_MixSamples
	movem.l	d2-d7/a2-a6,-(sp)

	move.l	a0,a1
	move.l	(a4),a0
	move.w	4(a4),d2
	move.l	8(a4),d6
	
	move.l	a4,-(sp)
	bsr.s		AdpcmSource_DecodeSamples_8BitMonoOutput
	move.l	(sp)+,a4

	move.l	a0,(a4)
	move.w	d2,4(a4)
	move.l	d6,8(a4)
	movem.l	(sp)+,d2-d7/a2-a6
	rts

;------------------------------------------------------------------------------
; in	d0.l numsamples
;	d2.w state part 1
;	d6.l state part 2
;	a0 input (state part 3)
;	a1 output
; out	d2.w state part 1
;	d6.l state part 2
;	a0	input (state part 3)	

AdpcmSource_DecodeSamples_8BitMonoOutput
		move.l	d0,d7
		lsr.l	#1,d7
		subq.l	#1,d7

		lea	AdpcmSource_IndexTable,a3
		lea	AdpcmSource_StepTable,a2

		moveq	#0,d0
		move.w	#88<<6,d5
		move.l	#-32768,a4
		move.l	#32767,a5
		swap	d7
.samplePair2	swap	d7
.samplePair
		move.b	(a0)+,d0		; 8
		move.w	d0,d1			; 4
		and.b	#$f,d1			; 8
		lsr.b	#4,d0			; 16
						; = 36

.firstSample
		add.b	d0,d0			; 4
		add.b	d0,d0			; 4
		move.w	d2,d3			; 4
		add.w	d0,d3			; 4

		add.w	2(a3,d0.w),d2		; 16
		spl	d4			; 4
		ext.w	d4			; 4
		and.w	d4,d2			; 4
		cmp.w	d5,d2			; 4		; d5 == 88<<6
		bls.s	.indexClamp0Done	; 8 or 12	; clamp index against 88<<6
		move.w	d5,d2
.indexClamp0Done

		add.l	(a2,d3.w),d6		; 16
		cmp.l	a4,d6			; 8
		bge.s	.clampMin0Done		; 8 or 12
		move.l	a4,d6
.clampMin0Done
		cmp.l	a5,d6			; 8
		ble.s	.clampMax0Done		; 8 or 12
		move.l	a5,d6
.clampMax0Done
		ror.w	#8,d6
		move.b	d6,(a1)+		; 8
		ror.w	#8,d6

.secondSample
		add.b	d1,d1			; 4
		add.b	d1,d1			; 4
		move.w	d2,d3			; 4
		add.w	d1,d3			; 4

		add.w	2(a3,d1.w),d2		; 16
		spl	d4			; 4
		ext.w	d4			; 4
		and.w	d4,d2			; 4
		cmp.w	d5,d2			; 4		; d5 == 88<<6
		bls.s	.indexClamp1Done	; 8 or 12	; clamp index against 88<<6
		move.w	d5,d2
.indexClamp1Done

		add.l	(a2,d3.w),d6		; 16
		cmp.l	a4,d6			; 8
		bge.s	.clampMin1Done		; 8 or 12
		move.l	a4,d6
.clampMin1Done
		cmp.l	a5,d6			; 8
		ble.s	.clampMax1Done		; 8 or 12
		move.l	a5,d6
.clampMax1Done
		ror.w	#8,d6
		move.b	d6,(a1)+		; 8
		ror.w	#8,d6

						; = 224 approx

		dbf	d7,.samplePair		; 12
		swap	d7
		dbf	d7,.samplePair2
		rts

;------------------------------------------------------------------------------
; in	d0	number of samples to mix
;	d1	current mix position
;	a0	output samples (hi 8bits)
;	a1	output samples (low 6bits)
;	a4	state

AdpcmSource_16BitMonoInput_14BitMonoOutput_MixSamples
	movem.l	d2-d7/a2-a6,-(sp)

	move.l	(a4),a2
	move.w	4(a4),d2
	move.l	8(a4),d6

	move.l	a4,-(sp)
	bsr.s		AdpcmSource_DecodeSamples_14BitMonoOutput
	move.l	(sp)+,a4

	move.l	a2,(a4)
	move.w	d2,4(a4)
	move.l	d6,8(a4)


	movem.l	(sp)+,d2-d7/a2-a6
	rts


AdpcmSource_InitTables

		movem.l	d2-d4,-(sp)
		lea	AdpcmSource_IndexTable,a0
		moveq	#16-1,d0
.index
		move.l	(a0),d1
		lsl.l	#6,d1
		move.l	d1,(a0)+
		dbf	d0,.index
		
		lea	AdpcmSource_StepTable+89*4,a0
		lea	AdpcmSource_StepTable+89*16*4,a1
		moveq	#89-1,d0
.index2
		move.l	-(a0),d1
		moveq	#16-1,d2
.delta
		move.l	d1,d3
		moveq	#0,d4
		btst	#2,d2
		beq.s	.nBit2
		add.l	d3,d4
.nBit2
		asr.l	#1,d3
		btst	#1,d2
		beq.s	.nBit1
		add.l	d3,d4
.nBit1
		asr.l	#1,d3
		btst	#0,d2
		beq.s	.nBit0
		add.l	d3,d4
.nBit0
		asr.l	#1,d3
		add.l	d3,d4
		btst	#3,d2
		beq.s	.nBit3
		neg.l	d4
.nBit3
		move.l	d4,-(a1)
		
		dbf	d2,.delta
		dbf	d0,.index2

		movem.l	(sp)+,d2-d4
		rts


;------------------------------------------------------------------------------
; in	d0.l numsamples
;	d2.w state part 1
;	d6.l state part 2
;	a0 output (hi 8bit)
;	a1 output (low 6bit)
;	a2 input (state part 3)
; out	d2.w state part 1
;	d6.l state part 2
;	a2	input (state part 3)	

AdpcmSource_DecodeSamples_14BitMonoOutput
		move.l	d0,d7
		lsr.l	#1,d7

		lea	AdpcmSource_IndexTable,a3
		lea	AdpcmSource_StepTable,a6

		moveq	#0,d0
		move.l	#-32768,a4
		move.l	#32767,a5
.samplePair
		move.b	(a2)+,d0
		move.w	d0,d1
		and.b	#$f,d1
		lsr.b	#4,d0

.firstSample
		lsl.b	#2,d0
		move.w	d2,d3
		add.w	d0,d3

		add.w	2(a3,d0.w),d2
		spl	d4
		ext.w	d4
		and.w	d4,d2
		cmp.w	#88<<6,d2
		bls.s	.indexClamp0Done
		move.w	#88<<6,d2
.indexClamp0Done

		add.l	(a6,d3.w),d6
		cmp.l	a4,d6
		bge.s	.clampMin0Done
		move.l	a4,d6
.clampMin0Done
		cmp.l	a5,d6
		ble.s	.clampMax0Done
		move.l	a5,d6
.clampMax0Done
		move.w	d6,d5

.secondSample
		lsl.b	#2,d1
		move.w	d2,d3
		add.w	d1,d3

		add.w	2(a3,d1.w),d2
		spl	d4
		ext.w	d4
		and.w	d4,d2
		cmp.w	#88<<6,d2
		bls.s	.indexClamp1Done
		move.w	#88<<6,d2
.indexClamp1Done

		add.l	(a6,d3.w),d6
		cmp.l	a4,d6
		bge.s	.clampMin1Done
		move.l	a4,d6
.clampMin1Done
		cmp.l	a5,d6
		ble.s	.clampMax1Done
		move.l	a5,d6
.clampMax1Done

		ror.w	#8,d5
		move.w	d6,d3
		move.b	d5,d3
		move.b	d6,d5
		ror.w	#8,d3
		lsr.w	#2,d5
		and.w	#$3f3f,d5

		move.w	d3,(a0)+
		move.w	d5,(a1)+

		subq.l	#1,d7
		bne.s	.samplePair

		rts

		section	data,data

AdpcmSource_IndexTable
		dc.l	-1, -1, -1, -1, 2, 4, 6, 8
		dc.l	-1, -1, -1, -1, 2, 4, 6, 8

AdpcmSource_StepTable
		dc.l	7, 8, 9, 10, 11, 12, 13, 14, 16, 17
		dc.l	19, 21, 23, 25, 28, 31, 34, 37, 41, 45
		dc.l	50, 55, 60, 66, 73, 80, 88, 97, 107, 118
		dc.l	130, 143, 157, 173, 190, 209, 230, 253, 279, 307
		dc.l	337, 371, 408, 449, 494, 544, 598, 658, 724, 796
		dc.l	876, 963, 1060, 1166, 1282, 1411, 1552, 1707, 1878, 2066
		dc.l	2272, 2499, 2749, 3024, 3327, 3660, 4026, 4428, 4871, 5358
		dc.l	5894, 6484, 7132, 7845, 8630, 9493, 10442, 11487, 12635, 13899
		dc.l	15289, 16818, 18500, 20350, 22385, 24623, 27086, 29794, 32767
		ds.l	89*15
		
		section	bss,bss

AdpcmSource_16BitMonoInput_8BitMonoOutput_MixState
		ds.l	3

AdpcmSource_16BitMonoInput_14BitMonoOutput_MixState
		ds.l	3
