;
; x86 format converters for HERMES
; Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
; 
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions		
;
; Some routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
; 

	
BITS 32

GLOBAL _Convert32_32RGB888
GLOBAL _Convert32_16RGB565
GLOBAL _Convert32_16RGB555
GLOBAL _Convert32_8RGB332

GLOBAL _ConvertX86
GLOBAL _x86return
	
SECTION .text


;; _Convert_*
;; Paramters:	
;;   ESI = source 
;;   EDI = dest
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
;; Destroys:
;;   EAX, EBX, EDX


_Convert32_32RGB888:
	;; I'll use the fpu later.. for now, this is enough
	rep movsd

	jmp _x86return


;; From:	00000000 | rrrrrrrr | gggggggg | bbbbbbbb
;;   To:	00000000 | 00000000 | rrrrrggg | gggbbbbb
;; Algorithm:
;;   - Look if there's just one pixel
;;   - If not, draw ecx/2 pixel pairs
;;   - Draw a singled odd pixels that may be left the slow way
	
	

_Convert32_16RGB565:
	; check short
	cmp ecx,16
	ja .L3

.L1 ; short loop
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,4
	add edi,2
	dec ecx
	jnz .L1

.L2:				; End of short loop
	jmp _x86return

	
.L3	; head
	mov ebx,edi
	and ebx,11b
	jz .L4
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,4
	add edi,2
	dec ecx

.L4:	 
    ; save count
	push ecx

    ; unroll twice
	shr ecx,1
    
    ; point arrays to end
	lea esi,[esi+ecx*8]
	lea edi,[edi+ecx*4]

    ; negative counter 
	neg ecx
	jmp .L6

.L5:	    
	mov [edi+ecx*4-4],eax
.L6:	
	mov eax,[esi+ecx*8]

        shr ah,2
        mov ebx,[esi+ecx*8+4]

        shr eax,3
        mov edx,[esi+ecx*8+4]

        shr bh,2
        mov dl,[esi+ecx*8+2]

        shl ebx,13
        and eax,000007FFh
        
        shl edx,8
        and ebx,07FF0000h

        and edx,0F800F800h
        add eax,ebx

        add eax,edx
        inc ecx

        jnz .L5                 

	mov [edi+ecx*4-4],eax

    ; tail
	pop ecx
	test ecx,1
	jz .L7
	
	mov bl,[esi+0]    ; blue
	mov al,[esi+1]    ; green
	mov ah,[esi+2]    ; red
	shr ah,3
	and al,11111100b
	shl eax,3
	shr bl,3
	add al,bl
	mov [edi+0],al
	mov [edi+1],ah
	add esi,4
	add edi,2

.L7:	
	jmp _x86return




;; 32 BIT RGB TO 16 BIT RGB 555
;; (called 16 bit not 15 because it writes two bytes)
;; This is just the RGB565 routine with the following changes:
;;        Blue : everything stays the same
;;        Green: Shifted right by 6 instead of 5, mask 1111100000 instead
;;               of 11111100000
;;        Red  : Shifted right by 9 instead of 8, mask 111110000000000 instead
;;               of 1111100000000000 (one less zero)
_Convert32_16RGB555:

	push ecx
	
	and ecx,0fffffffeh
	jz .L2			; There was just one pixel to draw.. er..
	
.L1:
	mov eax,[esi]
	
	mov edx,eax
	mov ebx,eax
	
	shr edx,6
	and eax,0000000011111000b
	
	shr al,3
	and edx,0000001111100000b

	shr ebx,9
	or eax,edx

	and ebx,0111110000000000b
	mov edx,[esi+4]
	
	or eax,ebx
	add esi,8
	
	shl eax,16
	mov ebx,edx
	
	shr dl,3
	and dl,0000000000011111b
	
	shr ebx,6
	
	and ebx,0000001111100000b
	
	mov al,dl
	or eax,ebx
	
	shr edx,9
	and edx,0111110000000000b
	
	or eax,edx
	
	ror eax,16
	
	mov [edi],eax
	add edi,4
	
	dec ecx
	dec ecx
	
	jnz .L1


.L2:
	pop ecx
	and ecx,1
	jz .L3
			
	mov ebx,[esi]

	mov eax,ebx
	mov edx,ebx
	
	shr eax,3
	
	shr edx,6
	
	and eax,0000000000011111b
	and edx,0000001111100000b

	shr ebx,9

	or eax,edx

	and ebx,0111110000000000b

	or eax,ebx

	mov [edi],al
	mov [edi+1],ah

	add edi,2
	add esi,4
	
.L3:	
	jmp _x86return




	
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
;; This routine writes FOUR pixels at once (dword) and then, if they exist
;; the trailing three pixels
_Convert32_8RGB332:

	push ecx

	and ecx,0fffffffch	; line width = 1,2 or 3
	jnz .L1
	
	jmp .L2			; short jump out of range :(
	
.L1:
	mov eax,[esi]		; first pair of pixels
	mov edx,[esi+4]

	shr dl,6
	mov ebx,eax

	shr al,6
	and ah,0e0h

	shr ebx,16
	and dh,0e0h
	
	shr ah,3
	and bl,0e0h

	shr dh,3
	
	or al,bl
	
	mov ebx,edx	
	or al,ah
	
	shr ebx,16
	or dl,dh

	and bl,0e0h
	
	or dl,bl

	mov ah,dl

	
		
	mov ebx,[esi+8]		; second pair of pixels

	mov edx,ebx
	and bh,0e0h

	shr bl,6
	and edx,0e00000h

	shr edx,16

	shr bh,3

	ror eax,16
	or bl,dl

	mov edx,[esi+12]
	or bl,bh
	
	mov al,bl

	mov ebx,edx
	and dh,0e0h

	shr dl,6
	and ebx,0e00000h
	
	shr dh,3
	mov ah,dl

	shr ebx,16
	or ah,dh

	or ah,bl

	rol eax,16
	add esi,16
			
	mov [edi],eax	
	add edi,4
	

	sub ecx,4
	jz .L2			; L1 out of range for short jump :(
	
	jmp .L1
.L2:
	
	pop ecx
	and ecx,3		; mask out number of pixels to draw
	
	jz .L4			; Nothing to do anymore

.L3:
	mov eax,[esi]		; single pixel conversion for trailing pixels

        mov ebx,eax

        shr al,6
        and ah,0e0h

        shr ebx,16

        shr ah,3
        and bl,0e0h

        or al,ah
        or al,bl

        mov [edi],al

        inc edi
        add esi,4

	dec ecx
	jnz .L3
	
.L4:	
	jmp _x86return



;; _ConvertX86:	 
;; [ESP+8] ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;;   0:	void *s_pixels
;;   4:	int s_width
;;   8:	int s_height
;;  12:	int s_add
;;  16:	void *d_pixels
;;  20:	int d_width
;;  24:	int d_height
;;  28:	int d_add
;;  32:	void (*converter_function)() 
_ConvertX86:
	cmp dword [eax+4],0
	je endconvert
	
	push ebp
	mov ebp,eax
	
	mov esi,[ebp+0]
	mov edi,[ebp+16]
	
y_loop:	
	mov ecx,[ebp+4]

	jmp [ebp+32]

_x86return:	
	add esi,[ebp+12]
	add edi,[ebp+28]
	
	dec dword  [ebp+24]
	jnz y_loop

	
	pop ebp

endconvert:	
	ret		
