;
; MMX format converters for HERMES
; Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
; 
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions		
;
; COPYRIGHT NOTICE
; 
; This file partly contains code that is (c) Intel Corporation, specifically
; the mode detection routine, and the converter to 15 bit (8 pixel
; conversion routine from the mmx programming tutorial pages).
; 
	
BITS 32

GLOBAL _ConvertMMX
GLOBAL _CheckMMX
	
GLOBAL _ConvertMMX32_32RGB888
GLOBAL _ConvertMMX32_16RGB565
GLOBAL _ConvertMMX32_16RGB555

GLOBAL _mmxreturn

 
SECTION .data
	
ALIGN 16	

mmx32_rgb555_rb dd 00f800f8h,00f800f8h    ; Constants for conversion routines 
mmx32_rgb555_add dd 20000008h,20000008h
mmx32_rgb555_g dd 0000f800h,0000f800h

		
cpu_flags dd 0
	
			
SECTION .text


_ConvertMMX32_32RGB888:

	mov edx,ecx
	
	and ecx,0fffffffch	; clear lower three bits
	jz .L2			; Length = 1,2 or 3 pixel

.L1:
	movq mm0,[esi]
	movq mm1,[esi+8]

	movq [edi],mm0
	movq [edi+8],mm1
	
	add esi,16
	add edi,16
	
	sub ecx,4
	jnz .L1

.L2:
	mov ecx,edx
	
	and ecx,3
	jz .L4

.L3:	
	mov eax,[esi]
	mov [edi],eax

	add esi,4
	add edi,4

	dec ecx
	jnz .L3
	
.L4:			
	jmp _mmxreturn

	


;; Gone for now, it didn't draw correctly AND was slower than the x86 routine 
_ConvertMMX32_16RGB565:
	
	jmp _mmxreturn

	

	
_ConvertMMX32_16RGB555:

	movq mm7,qword [mmx32_rgb555_add]
	movq mm6,qword [mmx32_rgb555_g]
        
	mov edx,ecx		           ; Save ecx 

	and ecx,0fffffff8h	           ; clear lower three bits
	jnz .L_OK
	jmp .L2 

.L_OK:
	
	movq mm2,[esi+8]

	movq mm0,[esi]
	movq mm3,mm2

	pand mm3,qword [mmx32_rgb555_rb]
	movq mm1,mm0

	pand mm1,qword [mmx32_rgb555_rb]
	pmaddwd mm3,mm7

	pmaddwd mm1,mm7
	pand mm2,mm6

.L1:
	movq mm4,[esi+24]
	pand mm0,mm6

	movq mm5,[esi+16]
	por mm3,mm2

	psrld mm3,6
	por mm1,mm0

	movq mm0,mm4
	psrld mm1,6

	pand mm0,qword [mmx32_rgb555_rb]
	packssdw mm1,mm3

	movq mm3,mm5
	pmaddwd mm0,mm7

	pand mm3,qword [mmx32_rgb555_rb]
	pand mm4,mm6

	movq [edi],mm1			
	pmaddwd mm3,mm7

	add esi,32
	por mm4,mm0

	pand mm5,mm6
	psrld mm4,6

	movq mm2,[esi+8]
	por mm5,mm3

	movq mm0,[esi]
	psrld mm5,6

	movq mm3,mm2
	movq mm1,mm0

	pand mm3,qword [mmx32_rgb555_rb]
	packssdw mm5,mm4

	pand mm1,qword [mmx32_rgb555_rb]
	pand mm2,mm6

	movq [edi+8],mm5
	pmaddwd mm3,mm7

	pmaddwd mm1,mm7
	add edi,16
	
	sub ecx,8
	jz .L2
	jmp .L1


.L2:	
	mov ecx,edx
	
	and ecx,7
	jz .L4
	
.L3:	
	mov ebx,[esi]
	add esi,4
	
        mov eax,ebx
        mov edx,ebx

        shr eax,3
        shr edx,6

        and eax,0000000000011111b
        and edx,0000001111100000b

        shr ebx,9

        or eax,edx

        and ebx,0111110000000000b

        or eax,ebx

        mov [edi],ax
        add edi,2

	dec ecx
	jnz .L3	

.L4:		
	jmp _mmxreturn

	
		
;; ConvertMMX:	 
;; EAX = ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;;   0:	void *s_pixels
;;   4:	int s_width
;;   8:	int s_height
;;  12:	int s_add
;;  16:	void *d_pixels
;;  20:	int d_width
;;  24:	int d_height
;;  28:	int d_add
;;  32:	void (*converter_function)() 
_ConvertMMX:

	cmp dword [eax+4],0
	je endconvert
	
	push ebp
	mov ebp,eax
	
	mov esi,[ebp+0]
	mov edi,[ebp+16]

	
y_loop:	
	mov ecx,[ebp+4]

	jmp [ebp+32]

_mmxreturn:	
	add esi,[ebp+12]
	add edi,[ebp+28]
	
	dec dword  [ebp+24]
	jnz y_loop

	
	pop ebp

endconvert:
	emms
	ret		


	
_CheckMMX:
	pushfd
	pop eax
	
	mov ecx,eax
	
	xor eax,040000h
	push eax
	
	popfd
	pushfd

	pop eax
	xor eax,ecx
	jz .L1			; Processor is 386

	push ecx
	popfd

	mov eax,ecx
	xor eax,200000h

	push eax
	popfd
	pushfd

	pop eax
	xor eax,ecx
	je .L1
	
	pusha
	
	mov eax,1
	cpuid

	mov [cpu_flags],edx

	popa

	mov eax,[cpu_flags]

.L1:	
	ret
	

