;
; x86 format converters for HERMES
; Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
; 
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions		
; 

BITS 32

GLOBAL _ConvertI8_32
GLOBAL _ConvertI8_16
GLOBAL _ConvertI8_INDEX8

GLOBAL _ConvertI8_SetLookup
	
EXTERN _ConvertX86
EXTERN _x86return

SECTION .data

store_ecx dd 0			; This turned out to be faster than push/pop
x86lookup dd 0
			
SECTION .text


;; Convert_*
;; Paramters:	
;;   ESI = source 
;;   EDI = dest
;;   ECX = amount (NOT 0!!! (the ConvertX86 routine checks for that though))
;; Destroys:
;;   EAX, EBX, EDX

_ConvertI8_32:	

	mov ebx,0
	mov edx,[x86lookup]
.L1:
	mov bl,[esi]
	inc esi
	
	mov eax,[edx+ebx*4]

	mov [edi],eax
	add edi,4

	dec ecx
	jnz .L1

	jmp _x86return
	
		
_ConvertI8_16:
	mov ebx,0
	mov edx,[x86lookup]

	mov [store_ecx],ecx

	and ecx,0fffffffeh
	jz .L2			
		
.L1:
	mov bl,[esi+1]
	
	mov eax,[edx+ebx*4]

	shl eax,16
	mov bl,[esi]
	
	or eax,[edx+ebx*4]
	add esi,2
	
	mov [edi],eax
	add edi,4
			
	sub ecx,2
	jnz .L1

.L2:
	test dword [store_ecx],1
	jz .L3

	mov eax,0		; draw the remaining pixel, no need to be
	mov al,[esi]		; superfast
	mov ebx,[edx+eax*4]
	mov [edi],bx		; better than two byte moves, according to
	                        ; intel's docs
	 
	inc esi
	add edi,2
	
.L3:		
	jmp _x86return

			

_ConvertI8_INDEX8:
	mov edx,ecx

	and ecx,0fffffffch	; Clear the lower two bits = number of
	                        ; blocks of 4 pixels we can draw
	jz .L2			; We have 1,2 or 3 pixels only!
	
.L1:
	shr ecx,2
	rep movsd
	

.L2:
	mov ecx,edx		; Get the remaining pixels to draw
	
	and ecx,3
	jz .L4			; width was modulo 4
	
.L3:	
	mov al,[esi]
	inc esi
	
	mov [edi],al
	inc edi
		
	dec ecx
	jnz .L3
	
.L4:	
	jmp _x86return



_ConvertI8_SetLookup:
	push ebp
	mov ebp,esp
	
	mov eax,[ebp+8]
	mov [x86lookup],eax
	
	pop ebp
	ret
			
	