;    OpenGUI - Drawing & Windowing library

;    Copyright (C) 1996,2000  Marian Krivos

;    This library is free software; you can redistribute it and/or
;    modify it under the terms of the GNU Library General Public
;    License as published by the Free Software Foundation; either
;    version 2 of the License, or (at your option) any later version.

;    This library is distributed in the hope that it will be useful,
;    but WITHOUT ANY WARRANTY; without even the implied warranty of
;    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;    Library General Public License for more details.

;    You should have received a copy of the GNU Library General Public
;    License along with this library; if not, write to the Free
;    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

;    nezmar@internet.alcatel.sk

;    engine.asm - graphics kernel   


section	.text
				extern	 _transp_color
				extern	 _cx_maxwork
				extern	 _cy_maxwork
				extern	 _cx_work
				extern	 _cy_work
				extern	 _Y_width
				extern	 _X_width
				extern	_clip_x_min
				extern	_clip_y_min
				extern	_clip_x_max
				extern	_clip_y_max
				extern	_mmx_state
				extern	_Image
				extern	_cx_max
				extern	_cy_max
				extern	__CurrColor
				extern	__CurrBkColor
				extern	_fontw
				extern	_fonth
				extern	_fontn
				extern	_font
				extern	_driver
				extern	_mmx
				extern	_videobase
				extern	_videobasesegment
				extern	_VideoSelector
				extern	_lfb
				extern	_bpp
				extern	_granularity
				extern	_ppop

 				global	_draw_hline
				global	draw_hline2
 				global	__fill_rect
 				global	_L1RamToRam
 				global	_L1RamToRamPpop
 				global	_L1RamToVideo8
 				global	_L1VideoToRam8
 				global	_L1RamToVideo2
 				global	_draw_point
 				global	_get_point
				global	_test_mmx
				global	_rdtsc
				global	_set_mmx
				global	_reset_mmx
				global	_is_mmx
 				global	LinuxInit
 				global	_L1Box
 				global	_L1BoxX

				global 	_SetPalette256
				global	_peek
				global	_poke
 				global	_set_ppop
				global	__degraduj
 				global	_CharOutClip
				global	__setvideomode
				global	___dpmi_int
				global inpb
				global inp
				global inpw
				global outpb
				global outp
				global outpw
				global inpl
				global _inpl
				global outpl
				global _outpl
				global vga_setpage
				

%include		"lines.asm"

inp:
inpb:		push	edx
			mov	dx,[esp+8]
			in	al,dx
			pop	edx
			ret
		
inpw:		push	edx
			mov	dx,[esp+8]
			in	ax,dx
			pop	edx
			ret
_inpl:
inpl:		push	edx
			mov	dx,[esp+8]
			in	eax,dx
			pop	edx
			ret

outp:
outpb:		push	edx
			mov	dx,[esp+8]
			mov	al,[esp+12]
			out	dx,al
			pop	edx
			ret
		
outpw:		push	edx
			mov	dx,[esp+8]
			mov	ax,[esp+12]
			out	dx,ax
			pop	edx
			ret
_outpl:
outpl:		push	edx
			mov	dx,[esp+8]
			mov	eax,[esp+12]
			out	dx,eax
			pop	edx
			ret
ends

_SetPalette256:	mov		dx,0x3c8
				push	esi
				mov		esi,[esp+8]
				xor		al,al
				out		dx,al
				inc		dx
				mov		ecx,768
				rep		outsb
				pop		esi
				ret

_init_ds_alias:
				push	bx
				mov		bx,cs
				mov		ax,0x000a
				int		0x31
				jnc		ds_alias_ok
				mov		ah,0x4c
				int		0x21
ds_alias_ok:	mov		[_ds_alias],ax
				pop		bx
				ret
___dpmi_int:
___watcom_int:	push	ebp
				mov		ebp,esp
				push	ebx
				push	edi
				push	es
				mov		ebx,[ebp+8]
				mov		ax,[_ds_alias]
				cmp		ax,0
				jne		_int_1
				call	_init_ds_alias
_int_1:			mov		es,ax
				xor		ecx,ecx
				mov		edi,[ebp+12]
				mov		word  [edi+0x20],0		; eflags
				mov		dword [edi+0x2e],0		; esp
				mov		word  [edi+0x30],0		; ss
				mov		ax,0x300;
				stc
				int		0x31
				mov		eax,0
				jnc		__int_ok
				dec		eax
__int_ok:		pop		es
				pop		edi
				pop		ebx
				pop		ebp
				ret

; to call :	es:di  = source
;			ds:esi = destination
;			ecx	   = counter
; after	  :	es:di &	ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled
toram8:			and		ecx,ecx
				jz		rtrn	 		; return if	nothing	to do
toram88:		test	edi,3			; round	to 8/16
				je		L30s
%ifdef LINUX
toram7:			mov		al,[es:edi]
%else
toram7:			mov		al,[es:di]
%endif
				mov		[esi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		toram8
rtrn:			ret

L30s:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		toram7			; yes, back	to loop	without	test [edi]

				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		L30				; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
%ifdef LINUX
tram11:			movq	mm0,[es:edi]
%else
tram11:			movq	mm0,[es:di]
%endif
				movq	[esi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		tram11
				and		ebx,7			; do rest bytes
				jmp		tram22

L30:			shr		cx,2
%ifdef LINUX
L31:			mov		eax,[es:edi]
%else
L31:			mov		eax,[es:di]
%endif
				mov		[esi],eax
				add		esi,4
				add		edi,4
				dec		cx
				jne		L31
				and		ebx,3
tram22:			je		L33
%ifdef LINUX
tram33:			mov		al,[es:edi]
%else
tram33:			mov		al,[es:di]
%endif
				mov		[esi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		tram33
L33:			pop		ebx
				ret

ltoram8:		and		ecx,ecx
				jz		lrtrn	 		; return if	nothing	to do
ltoram88:		test	edi,3			; round	to 8/16
				je		lL30s
ltoram7:		mov		al,[es:edi]
				mov		[esi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		ltoram8
lrtrn:			ret

lL30s:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		ltoram7			; yes, back	to loop	without	test [edi]

				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		lL30				; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
ltram11:		movq	mm0,[es:edi]
				movq	[esi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		ltram11
				and		ebx,7			; do rest bytes
				jmp		ltram22

lL30:			shr		ecx,2
lL31:			mov		eax,[es:edi]
				mov		[esi],eax
				add		esi,4
				add		edi,4
				dec		ecx
				jne		lL31
				and		ebx,3
ltram22:		je		lL33
ltram33:		mov		al,[es:edi]
				mov		[esi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		ltram33
lL33:			pop		ebx
				ret

; to call :	es:di  = dest
;			ds:esi = src
;			ecx	   = counter
; after	  :	es:di &	ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled
tset:			and		ecx,ecx
				je		rtrn8
tset88:			test	edi,3
				je		tset6
tset7:			mov		al,[esi]
%ifdef LINUX
				mov		[es:edi],al
%else
				mov		[es:di],al
%endif
				inc		esi
				inc		edi
				dec		ecx
				jnz		tset88
rtrn8:			ret

tset6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		tset7			; yes, back	to loop	without	test [edi]

tset0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		tset1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
tset11:			movq	mm0,[esi]
%ifdef LINUX
				movq	[es:edi],mm0
%else
				movq	[es:di],mm0
%endif
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		tset11
				and		ebx,7			; do rest bytes
				jmp		tset22

tset1:			shr		ecx,2

; copy ecx*4 bytes from ds:esi -> es:edi

tset3:
%ifndef LINUX
				push	edi
				and		edi,0xFFFF
%endif
				rep		movsd
%ifndef LINUX
				and		dword [esp],0xFFFF0000
				add		dword [esp],edi
				pop		edi
%endif
				and		ebx,3
tset22:			je		tset4
tset5:			mov		al,[esi]
%ifdef LINUX
				mov		[es:edi],al
%else
				mov		[es:di],al
%endif
				inc		esi
				inc		edi
				dec		ebx
				jnz		tset5
tset4:			pop		ebx
rtrn9:			ret


; to call :	es:edi  = dest
;			ds:esi = src
;			ecx	   = counter
; after	  :	es:edi & ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled

tand:			and		ecx,ecx
				je		rtrn4
tand88:			test	edi,3
				je		tand6
tand7:			mov		al,[esi]
%ifdef LINUX
				and		[es:edi],al
%else		
				and		[es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ecx
				jnz		tand88
rtrn4:			ret

tand6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		tand7			; yes, back	to loop	without	test [edi]

tand0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		tand1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
tand11:			movq	mm0,[esi]
%ifdef LINUX
				pand	mm0,[es:edi]
				movq	[es:edi],mm0
%else		
				pand	mm0,[es:di]
				movq	[es:di],mm0
%endif		
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		tand11
				and		ebx,7			; do rest bytes
				jmp		tand22

tand1:			shr		ecx,2
tand3:			mov		eax,[esi]
%ifdef LINUX
				and		[es:edi],eax
%else		
				and		[es:di],eax
%endif		
				add		esi,4
				add		edi,4
				dec		ecx
				jnz		tand3
				and		ebx,3
tand22:			je		tand4
tand5:			mov		al,[esi]
%ifdef LINUX
				and		[es:edi],al
%else				 
		and		[es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ebx
				jnz		tand5
tand4:			pop		ebx
				ret


; to call :	es:di  = dest
;			ds:esi = src
;			ecx	   = counter
; after	  :	es:di &	ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled

txor:			and		ecx,ecx
				je		rtrn6
txor88:			test	edi,3
				je		txor6
txor7:			mov		al,[esi]
%ifdef LINUX
				xor		[es:edi],al
%else		
				xor		[es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ecx
				jnz		txor88
rtrn6:			ret

txor6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		txor7			; yes, back	to loop	without	test [edi]

txor0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		txor1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
txor11:			movq	mm0,[esi]
%ifdef LINUX
				pxor	mm0,[es:edi]
				movq	[es:edi],mm0
%else
				pxor	mm0,[es:di]
				movq	[es:di],mm0
%endif		
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		txor11
				and		ebx,7			; do rest bytes
				jmp		txor22

txor1:			shr		ecx,2
				push	edx
txor3:			mov		eax,[esi]
				add		esi,4
%ifdef LINUX
				mov		edx,[es:edi]
				xor		eax,edx
				mov		[es:edi],eax
%else		
				mov		edx,[es:di]
				xor		eax,edx
				mov		[es:di],eax
%endif		
				add		edi,4
				dec		ecx
				jnz		txor3
				pop		edx
				and		ebx,3
txor22:			je		txor4
txor5:			mov		al,[esi]
%ifdef LINUX
				xor		[es:edi],al
%else		
				xor		[es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ebx
				jnz		txor5
txor4:			pop		ebx
				ret

; to call :	es:di  = dest
;			ds:esi = src
;			ecx	   = counter
; after	  :	es:di &	ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled

tor:			and		ecx,ecx
				je		rtrn5
tor88:			test	edi,3
				je		tor6
tor7:			mov		al,[esi]
%ifdef LINUX
				or	   [es:edi],al
%else		
				or	   [es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ecx
				jnz		tor88
rtrn5:			ret

tor6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		tor7			; yes, back	to loop	without	test [edi]

tor0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		tor1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
tor11:			movq	mm0,[esi]
%ifdef LINUX
				por	mm0,[es:edi]
				movq	[es:edi],mm0
%else		
				por	mm0,[es:di]
				movq	[es:di],mm0
%endif		
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		tor11
				and		ebx,7			; do rest bytes
				jmp		tor22

tor1:			shr		ecx,2
tor3:			mov		eax,[esi]
%ifdef LINUX
				or	   [es:edi],eax
%else		
				or	   [es:di],eax
%endif		
				add		esi,4
				add		edi,4
				dec		ecx
				jnz		tor3
				and		ebx,3
tor22:			je		tor4
tor5:			mov		al,[esi]
%ifdef LINUX
				or	   [es:edi],al
%else		
				or	   [es:di],al
%endif		
				inc		esi
				inc		edi
				dec		ebx
				jnz		tor5
tor4:			pop		ebx
				ret

_CharOutClip:   cmp		byte [_fontw], 8
				jnz		near _CharOutClip12
				cmp		byte [_bpp], 1
				jnz		near _CharOutClip12
; optimize for 8bit and 8 pixels wide
				mov		edx,[esp+8]
				mov		eax,[esp+4]
				push	edi
				push	esi
				push	ebx
				push	ecx
				push	ebp
				cld
				mov		edi,eax		; font
				mov		esi,edx		; dst
				mov		ecx,[_fonth]; microline	counter
				xchg	eax,ebx
				mov		al,[__CurrColor]	; ebx
				mov		ah,al
				push	ax
				push	ax
				pop		eax
				xchg	eax,ebx
				xchg	eax,edx
				mov		al,[__CurrBkColor]	; edx
				mov		ah,al
				push	ax
				push	ax
				pop		eax
				xchg	eax,edx
				push	ds
				pop		es
				call	_is_mmx
				jz		L127		; no MMX
				mov		[mmxtmp],ebx
				mov		[mmxtmp+4],ebx
				movq	mm0,[mmxtmp]	; foreground
				mov		[mmxtmp],edx
				mov		[mmxtmp+4],edx
				movq	mm1,[mmxtmp]	; background
L127_2:			movq	mm3,[esi]		; get font 8 byte
				movq	mm5,mm3
				pand	mm3,mm0
				pandn	mm5,mm1
				por		mm3,mm5
				add		esi,8
				movq	[edi],mm3
				add		edi,[esp+32]
				dec		ecx
				jnz		L127_2
				jmp		L127_1

L127:			push	ecx
				push	edi
				lodsd
				mov		ebp,eax
				and		eax,ebx
				xchg	eax,ebp
				not		eax
				and		eax,edx
				or		eax,ebp
				stosd
				lodsd
				mov		ebp,eax
				and		eax,ebx
				xchg	eax,ebp
				not		eax
				and		eax,edx
				or		eax,ebp
				stosd
				pop		edi
				add		edi,[esp+36]
				pop		ecx
				dec		ecx
				jnz		L127
L127_1:			pop		ebp
				pop		ecx
				pop		ebx
				pop		esi
				pop		edi
				ret

LC8:			mov		al,[eax]	; ebx
				mov		ah,al
				push	ax
				push	ax
				pop		eax
				ret
LC16:			mov		ax,[eax]	; ebx
				push	ax
				push	ax
				pop		eax
				ret
LC32:			mov		eax,[eax]	; ebx
				ret
				
_CharOutClip12:	mov		edx,[esp+8]
				mov		eax,[esp+4]
				push	edi
				push	esi
				push	ebx
				push	ecx
				push	ebp
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [esp+32],cl ; correct pixelsize
				cld
				mov		edi,eax		; font
				mov		esi,edx		; dst
				xchg	eax,ebx
				mov		eax,__CurrColor	; ebx
				call	[LoadColor]
				xchg	eax,ebx
				xchg	eax,edx
				mov		eax,__CurrBkColor	; edx
				call	[LoadColor]
				xchg	eax,edx
				push	ds
				pop		es
				mov		ecx,[_fonth]; microline	counter
				mov		eax,[_fontw]
				push	edx
				mul		word [_bpp]
				pop		edx
				shr		al,2   ; div 4
				mov		ch,al

L12712:			push	ecx
				push	edi
L12713:			lodsd
				mov		ebp,eax
				and		eax,ebx
				xchg	eax,ebp
				not		eax
				and		eax,edx
				or		eax,ebp
				stosd
				dec		ch
				jnz		L12713
				pop		edi
				pop		ecx
				add		edi,[esp+32]
				dec		cl
				jnz		L12712

       			pop		ebp
				pop		ecx
				pop		ebx
				pop		esi
				pop		edi
				ret

rorr:			mov		al,[esi]
				or		al,byte	[es:edi]
				mov		[es:edi],al
				ret

randd:			mov		al,[esi]
				and		al,byte	[es:edi]
				mov		[es:edi],al
				ret

rset:			mov		al,byte	[esi]
				mov		[es:edi],al
				ret

rxorr:			mov		al,[esi]
				xor		al,byte	[es:edi]
				mov		[es:edi],al
				ret

rpluss:			mov		al,[esi]
				add		al,byte	[es:edi]
				mov		[es:edi],al
				ret

rminuss:		mov		al,[esi]
				sub		al,byte	[es:edi]
				mov		[es:edi],al
				ret

rnott:			not		byte [es:edi]
				ret

rtransp:        mov     al,[esi]
                cmp		al,[_transp_color]
				je		rtransp1
                mov     [es:edi],al
rtransp1:       ret

; set hardware
L132:           mov		eax,[_driver]
				mov		eax,[eax*4+L257]
				mov		[set_bank_proc],eax
				cmp		word [vmode],256
				jc		L134
okpaging:		cmp		eax,L5
				jne		L133
				mov		dx,03ceH
				mov		ax,0406H
				out		dx,al
				inc		dl
				in		al,dx
				and		al,0fbH
				or		al,ah
				out		dx,al
				jmp		L134
L133:			cmp		eax,L3
				jne		L1342
				mov		dx,03c4H
				mov		ax,1206H
				out		dx,ax
				jmp		L134
L1342:			cmp		byte [_driver],4 ;; S3 v2
				jne		L1341
				jmp		L134
L1341:			cmp		eax,nVidia ;; nVidia
				jne		L1343

				jmp		L134
L1343:
L134:			ret

%ifndef LINUX
__setvideomode:	mov		ax,[esp+4]
				mov		[vmode],ax
				push	edi
				push	ebx
				mov		bx,ax
				cmp		ax,100h
				jnc		_setv2

				int		10h				; mode below 256
				mov		eax,1			; force	ok
				cmp		word [vmode],3	; is text mode?
				jz		_setv4			;
				jmp		L154 			; no, make fonts etc.

_setv2:			mov		ax,4f02H
				int		10H
				cmp		ax,004fH
				mov		eax,1
				je		L154
				dec		eax
				jmp		_setv4

L154:			push	eax
				push	es
				cmp		byte [_lfb],0
                jnz		seg_done
				call	L132
				push	ebx
				push	edx
				xor		ebx,ebx
				mov		ax,2
				mov		bx,word	[_videobasesegment]
				int		31h
				mov		[_VideoSelector],ax
				pop		edx
				pop		ebx
seg_done:		pop		es
				pop		edx
				movzx	eax,ax
				test	ax,ax			; test;	zero->err
				jz		_setv4
_setv3:			mov		eax,edx
_setv4:			pop		ebx
				pop		edi
				ret		   	; return eax=0 if error

_peek:			push	ebp
				mov		ebp,esp
				push	es
				push	edi
				mov		es,[ebp+8]
				mov		edi,[ebp+12]
				mov		eax,[es:edi]
				pop		edi
				pop		es
				pop		ebp
				ret

_poke:			push	ebp
				mov		ebp,esp
				push	es
				push	edi
				mov		es,[ebp+8]
				mov		edi,[ebp+12]
				mov		eax,[ebp+16]
				mov		[es:edi],eax
				pop		edi
				pop		es
				pop		ebp
				ret
%endif

;
; Set draw mode
;
_set_ppop:		mov		ax,[esp+4]
				cmp		al,9
				jc		_set_ppop2
				xor		eax,eax
_set_ppop2:		push	dword [_ppop]
				mov		[_ppop],al

				movzx	eax,byte [_bpp]
				dec		al
				mov		edx,[eax*4+tlL245]
				mov		eax,[_ppop]
				mov		eax,[eax*4+edx]
				mov		[DrawPointVector],eax
				mov		[MemPixel],eax
				movzx	eax,byte [_bpp]
				dec		al
				mov		edx,[eax*4+lftable]
				mov		eax,[_ppop]
				mov		eax,[eax*4+edx]
				mov		[fill],eax

				movzx	eax,byte [_bpp]
				dec		al
				mov		eax,[eax*4+lftable]
				mov		eax,[eax]
				mov		[lfill],eax

				movzx	eax,byte [_bpp]
				dec		al
				mov		eax,[eax*4+GetPointVectorTable]
				mov		[GetPointVector],eax
				
				movzx	eax,byte [_bpp]
				dec		al
				mov		eax,[eax*4+LoadColorTable]
				mov		[LoadColor],eax
				
				mov		eax,[_ppop]
				mov		eax,[eax*4+L246l] ; ramtovideo2
				mov		[L243],eax

				cmp		byte [_lfb],0
				jnz		L25

				mov		eax,[_ppop]
				mov		eax,[eax*4+L245]
				mov		[DrawPointVector],eax

				movzx	eax, byte [_bpp]
				mov		eax,[eax*4+L246]
				mov		[L243],eax

L25:			pop		eax
				ret

_L1BoxX:		push	ebx
				push	edi
				cld
				call	_set_mmx
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [esp+16],cl ; correct pixelsize
				shl		dword [esp+24],cl ; correct pixelsize
				mov		ebx,[esp+20]
				mov		ecx,[esp+16]
				mov		edi,[esp+12]
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ds
				pop		es
_L1BoxX1:		push	ebx
				push	ecx
				call	[fill]
				pop		ecx
				pop		ebx
				add		edi,[esp+24]
				dec		ebx
				jnz		_L1BoxX1
				call	_reset_mmx
				pop		edi
				pop		ebx
				ret

_L1Box:			push	ebx
				push	edi
				call	_set_mmx
				cld
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [esp+16],cl ; correct pixelsize
				shl		dword [esp+24],cl ; correct pixelsize
				mov		ebx,[esp+20]
				mov		ecx,[esp+16]
				mov		edi,[esp+12]
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ds
				pop		es
_L1Box1:		push	ebx
				push	ecx
				call	[lfill]
				pop		ecx
				pop		ebx
				add		edi,[esp+24]
				dec		ebx
				jnz		_L1Box1
				call	_reset_mmx
				pop		edi
				pop		ebx
				ret

_L1RamToRamPpop:cmp		byte [_ppop],0
				jz		near _L1RamToRam ;; jump to fastest code
				push	ebp
				mov		ebp,esp
				push	eax
				push	ebx
				push	ecx
				push	edx
				push	esi
				push	edi
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [ebp+16],cl ; correct pixelsize
;				shl		dword [ebp+20],cl
				shl		dword [ebp+24],cl
				shl		dword [ebp+28],cl
				mov		ebx,[ebp+16]
				mov		ecx,[ebp+20]
				mov		esi,[ebp+8]
				mov		edi,[ebp+12]
				push	es
				push	ds
				pop		es
				cld
				call	_set_mmx
L34p:			push	ecx
				push	ebx
				mov		ecx,ebx
				cmp		ecx,4
				jb		near L35p

				cmp		ecx,8			; minimum 16 bytes for mmx
				jc		near L33x1p
				call	_is_mmx
				mov		al,[_ppop]
				jz		near L33x1p
				shr		ecx,3
				cmp		al,1
				je		L32x1pxor
				cmp		al,2
				je		L32x1pand
				cmp		al,3
				je		L32x1por
                cmp     al,8
                je      near L35transpx

L32x1pset:		movq	mm0,[esi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		L32x1pset
				and		ebx,7
				jmp		L33x2p

L32x1pxor:		movq	mm0,[esi]
				pxor	mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		L32x1pxor
				and		ebx,7
				jmp		L33x2p

L32x1pand:		movq	mm0,[esi]
				pand	mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		L32x1pand
				and		ebx,7
				jmp		L33x2p

L32x1por:		movq	mm0,[esi]
				por		mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		L32x1por
				and		ebx,7
				jmp		L33x2p

L33x1p:			shr		cx,2
				cmp		al,1
				je		L35xor
				cmp		al,2
				je		L35and
				cmp		al,3
				je		L35or
                cmp     al,8
                je      L35transpx
; else set mode
				repe	movsd
				jmp		L35p

L35xor:			mov		eax,[esi]
				add		esi,4
				xor		eax,[es:edi]
				mov		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		L35xor
				jmp		L35p

L35and:			mov		eax,[esi]
				add		esi,4
				and		eax,[es:edi]
				mov		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		L35and
				jmp		L35p

L35or:			mov		eax,[esi]
				add		esi,4
				or		eax,[es:edi]
				mov		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		L35or
				jmp		L35p

L35transpx:     mov		ecx,ebx
call lttransp
jmp L36p
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
				push	ebx
				mov		ebx,[_transp_color]
				shr		ecx,2
L35transp:      mov     eax,[esi]
				mov     edx,[es:edi]
				cmp		eax,ebx
				je		nomove1

				cmp		al,0
				jne		nomove2
				mov		al,dl
nomove2:		cmp		ah,0
				jne		nomove3
				mov		ah,dh
nomove3:		rol		eax,16
				rol		edx,16
				cmp		al,0
				jne		nomove4
				mov		al,dl
nomove4:		cmp		ah,0
				jne		nomove5
				mov		ah,dh
nomove5:        rol		eax,16
				rol		edx,16
                mov     [es:edi],eax
nomove1:        add		esi,4
                add		edi,4
                dec		ecx
				jnz		L35transp
				pop		ebx
                jmp     L35p

L35p:			and		ebx,3
L33x2p:			mov		ecx,ebx
				je		L36p
				mov		ebx,[_ppop]
L355:			call	[rtable+ebx*4]
				inc		esi
				inc		edi
				dec		cx
				jnz		L355
L36p:			add		esi,[ebp+28]
				add		edi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jnz		near L34p
				call	_reset_mmx
				pop		es
				pop		edi
				pop		esi
				pop		edx
				pop		ecx
				pop		ebx
				pop		eax
				pop		ebp
				ret

;; ------------------------------------------------
_L1RamToVideo2:	push	ebp
				mov		ebp,esp
				push	ebx
				push	ecx
				call    _set_mmx
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [ebp+16],cl ; correct pixelsize
;				shl		dword [ebp+20],cl
				shl		dword [ebp+24],cl
				shl		dword [ebp+28],cl
				mov		ebx,[ebp+16]
				mov		ecx,[ebp+20]
				push	esi
				push	edi
				push	es
				mov		esi,[ebp+8]
				mov		edi,[ebp+12]
				mov		es,[_VideoSelector]
				cmp		byte [_lfb],0
				jnz		near linRTV2			; use linear code
				cld
%ifdef LINUX
				sub	edi,[_videobase]
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L41:			push	ecx
				push	ebx
				mov		ecx,ebx
				add		di,cx
				mov		bx,di
				sub		di,cx
				jae		L42
				sub		cx,bx
%ifdef LINUX
				push	edi
				and	edi,0xFFFF	;
				add	edi,[_videobase]
%endif		
				call	[L243]
%ifdef LINUX
				pop		edi
				and		edi,0xFFFF0000
				add		edi,0x10000
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
				mov		cx,bx
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	[L243]
				jmp		L43
L42:
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	[L243]
L43:
%ifdef LINUX
				pop		ecx
				pop		edi
				add		edi,ecx
%endif		
				add		di,[ebp+28]
				jae		L44
				add		edi,00010000H
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L44:			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jne		near L41
RTV2end:
				call	_reset_mmx
				pop		es
				pop		edi
				pop		esi
				pop		ecx
				pop		ebx
				pop		ebp
				ret
; linear code ram to video SET
linRTV2:		push	ecx
				push	ebx
				mov		ecx,ebx
				call	[L243]
				add		edi,[ebp+28]
     			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jne		near linRTV2
				jmp		RTV2end

;; -------------------------------------------
;
;
_L1RamToVideo8:	push	ebp
				mov		ebp,esp
				push	ebx
				push	ecx
				call	_set_mmx
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [ebp+16],cl ; correct pixelsize
;				shl		dword [ebp+20],cl
				shl		dword [ebp+24],cl
				shl		dword [ebp+28],cl
				mov		ebx,[ebp+16]
				mov		ecx,[ebp+20]
				push	esi
				push	edi
				mov		esi,[ebp+8]
				mov		edi,[ebp+12]
				push	es
				mov		es,[_VideoSelector]
				cld
				cmp		byte [_lfb],0
				jnz		near linRTV8			; use linear code
%ifdef LINUX
				sub	edi,[_videobase]
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L37x:			push	ecx	; edi =	full relativna adresa
				push	ebx
				mov		ecx,ebx
				add		di,cx
				mov		bx,di
				sub		di,cx
				jae		L38x
				sub		cx,bx	; tak a	makame na dvakrat
%ifdef LINUX
				push	edi
				and	edi,0xFFFF	;
				add	edi,[_videobase]
%endif		
				call	tset
%ifdef LINUX
				pop		edi
				and		edi,0xFFFF0000
				add		edi,0x10000
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
				mov		cx,bx
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	tset
				jmp		L39x
L38x:			
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	tset
L39x:			
%ifdef LINUX
				pop		ecx
				pop		edi
				add		edi,ecx
%endif		
				add		di,[ebp+28]
				jae		L40x
				add		edi,0x10000
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L40x:			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jnz		near L37x
RTVend:
				call	_reset_mmx
				pop		es
				pop		edi
				pop		esi
				pop		ecx
				pop		ebx
				pop		ebp
				ret

; linear code ram to video SET
linRTV8:		push	ecx
				push	ebx
				mov		ecx,ebx
				call	ltset
				add		edi,[ebp+28]
     			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jne		near linRTV8
				jmp		RTVend

;; -------------------------------------------

_L1VideoToRam8:	push	ebp
				mov		ebp,esp
				push	ebx
				push	ecx
				push	esi
				push	edi
%ifndef NO_MMX
				call	_set_mmx
%endif
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [ebp+16],cl ; correct pixelsize
;				shl		dword [ebp+20],cl
				shl		dword [ebp+24],cl
				shl		dword [ebp+28],cl
				mov		ecx,[ebp+20]
				mov		ebx,[ebp+16]
				mov		edi,[ebp+8]
				mov		esi,[ebp+12]
				push	es
				mov		es,[_VideoSelector]
				cld
				cmp		byte [_lfb],0
				jnz		near linVTR8			; use linear code
%ifdef LINUX
				sub	edi,[_videobase]
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L50:			push	ecx
				push	ebx
				mov		ecx,ebx
				add		di,cx
				mov		bx,di
				sub		di,cx
				jae		L51
				sub		cx,bx
%ifdef LINUX
				push	edi
				and	edi,0xFFFF	;
				add	edi,[_videobase]
%endif		
				call	toram8
%ifdef LINUX
				pop		edi
				and		edi,0xFFFF0000
				add		edi,0x10000
%endif		
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
				mov		cx,bx
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	toram8
				jmp		L52
L51:
%ifdef LINUX
				push	edi
				push	ecx
				and	edi,0xFFFF
				add	edi,[_videobase]
%endif		
				call	toram8
L52:
%ifdef LINUX
				pop		ecx
				pop		edi
				add		edi,ecx
%endif		
				add		di,[ebp+28]
				jae		L53
				add		edi,00010000H
				rol		edi,10H
				mov		ax,di
				rol		edi,10H
				call	[set_bank_proc]
L53:			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jne		near L50

VTRend:
%ifndef NO_MMX
				call	_reset_mmx
%endif
				pop		es
				pop		edi
				pop		esi
				pop		ecx
				pop		ebx
				pop		ebp
				ret

; linear code video to ram SET
linVTR8:		push	ecx
				push	ebx
				mov		ecx,ebx
				call	ltoram8
				add		edi,[ebp+28]
     			add		esi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jne		near linVTR8
				jmp		VTRend

;; ---------------------------------------------

_draw_hline:	mov		eax,[esp+4]
				mov		edx,[esp+8]
				mov		ebx,[esp+12]
				call	_set_mmx
				call	draw_hline2
				jmp		_reset_mmx

draw_hline2:	cmp		ebx,0
				jbe		near L147	; null width
				cmp		eax,[_clip_x_max]
				jg		near L147			; last over RIGHT border
				add		ebx,eax 	
				cmp		ebx,[_clip_x_min]
				jle		near L147
				cmp		ebx,[_clip_x_max]
				jle		okxx1
				mov		ebx,[_clip_x_max]

okxx1:			sub		ebx,eax
				cmp		eax, dword [_clip_x_min]
				jg		okxx

				add		ebx,eax
				sub		ebx,[_clip_x_min]
				mov		eax,[_clip_x_min]
				
okxx:			cmp		byte [_lfb],0
				jz		bank_draw_line

				mov		ecx,eax
				mov		ebx,ebx
				mov		eax,[_X_width]
				imul	edx
				add		eax,ecx
				mov		ecx,[_bpp]
				shr		ecx,1
				shl		eax,cl
				add		eax,[_videobase]
				shl		ebx,cl
				push	edi
				mov		edi,eax
				mov		ecx,ebx
				mov		eax,__CurrColor
				call	[LoadColor]
				cld
                push	es
				mov		es,[_VideoSelector]
				call	[fill]
                pop		es
				pop		edi
				ret

bank_draw_line:	push	ebp
				push	es
				push	edi
				push	ebx
				mov		ebx,eax
				mov		ax,[_X_width]
				mul		dx
				add		ax,bx
				adc		dx,0
				mov		di,dx
				rol		edi,10H
				mov		di,ax
				mov		es,[_VideoSelector]

				mov		eax,__CurrColor
				call	[LoadColor]
				xchg	eax,edx
				push	eax
				call	[set_bank_proc]
				pop		eax
				xchg	eax,edx
				cld
				pop		ebx
				and		edi,0000ffffH
				add		di,bx
				movzx	ecx,di
				sub		di,bx
				jae		L141
				sub		ebx,ecx
				push	ecx
				xchg	ecx,ebx
				add	edi,[_videobase]
				call	[fill]
				pop		ebx
				inc		dl
				sub		edi,[_videobase]
%ifdef LINUX
				xor		edi,edi
%else
				xor		edi,edi
%endif
L141:			movzx	ecx,bx
				xchg	eax,edx
				call	[set_bank_proc]
				xchg	eax,edx
				add	edi,[_videobase]
				call	[fill]
				pop		edi
				pop		es
				pop		ebp
L147:			ret

__fill_rect:	push	ebx
				push	ecx
				call	_set_mmx
				cld
				mov		ecx,[esp+24]
				mov		ebx,[esp+20]
				mov		edx,[esp+16]
				mov		eax,[esp+12]
__fill_rect2:	push	eax
				push	ebx
				push	ecx
				push	edx
				call	draw_hline2
				pop		edx
				pop		ecx
				pop		ebx
				pop		eax
				inc		edx
				dec		ecx
				jnz		__fill_rect2
				call	_reset_mmx
				pop		ecx
				pop		ebx
				ret

;
; linux	ready
;
; copy rectangle part of one window to area in other window
_L1RamToRam:	push	ebp
				mov		ebp,esp
				push	eax
				push	ebx
				push	ecx
				push	edx
				push	esi
				push	edi
				mov		cl,[_bpp]
				shr		cl,1
				shl		dword [ebp+16],cl ; correct pixelsize
;				shl		dword [ebp+20],cl
				shl		dword [ebp+24],cl
				shl		dword [ebp+28],cl
				mov		ebx,[ebp+16]
				mov		ecx,[ebp+20]
				mov		esi,[ebp+8]
				mov		edi,[ebp+12]
				push	es
				push	ds
				pop		es
				cld
%ifndef NO_MMX
				call	_set_mmx
%endif
L34:			push	ecx
				push	ebx
				and		ebx,ebx
				jz		L36	 		; return if	nothing	to do
toramx: 		test	edi,3		; round	to 8/16
				jz		L34xx
        		mov		cl,[esi]
				mov		[edi],cl
				inc		esi
				inc		edi
				dec		ebx
				jnz		toramx
				jmp		L36

L34xx:			mov		ecx,ebx
%ifndef NO_MMX
				cmp		ecx,16			; minimum 16 bytes for mmx
				jc		L33x1
				call	_is_mmx
				jz		L33x1
				shr		ecx,3
L32x1:			movq	mm0,[esi]
				movq	[edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		L32x1
				and		ebx,7
				jmp		L33x2
%endif
L33x1:			cmp		ecx,4
				jb		L35
				shr		cx,2
				repe	movsd
L35:			and		ebx,3
L33x2:			mov		ecx,ebx
				je		L36
				repe	movsb

L36:			add		esi,[ebp+28]
				add		edi,[ebp+24]
				pop		ebx
				pop		ecx
				dec		ecx
				jnz		near L34

%ifndef NO_MMX
				call	_reset_mmx
%endif
				pop		es
				pop		edi
				pop		esi
				pop		edx
				pop		ecx
				pop		ebx
				pop		eax
				pop		ebp
				ret


_draw_point:	push	ebp
				mov		ebp,esp
				mov		eax,[ebp+8]
				mov		edx,[ebp+12]
				mov		[oldx],eax
				mov		[oldy],edx
				cmp		eax,[_clip_x_min]
				jl		L45
				cmp		eax,[_clip_x_max]
				jae		L45
				cmp		edx,[_clip_y_min]
				jl		L45
				cmp		edx,[_clip_y_max]
				jae		L45
				push	ebx
				push	ecx
				push	edi
				mov		ebx,eax
				cmp		byte [_lfb],0
				jnz		_draw_point_linear
				mov		ax,[_X_width]
				mul		dx
				add		ax,bx
				adc		dx,0000H
				mov		di,dx
				rol		edi,10H
				mov		di,ax
				jmp		_draw_point_linear2
_draw_point_linear:
				mov		eax,[_X_width]
				mov		cl,[_bpp]
				shr		cl,1
				imul	edx
				add		eax,ebx
				shl		eax,cl
				mov		edi,eax
_draw_point_linear2:
				push	es
				mov		es,[_VideoSelector]
				xchg	edi,ebp
				add		ebp,[_videobase]
				call	[DrawPointVector]
				pop		es
				pop		edi
				pop		ecx
				pop		ebx
L45:			pop		ebp
				ret

;; -----------------------------------------------------
gperr:			mov     eax,0x80000000
				ret
                                
_get_point:		mov		eax,[esp+4]
				mov		edx,[esp+8]
                cmp     ax,[_X_width]
                jnc     gperr
                cmp		dx,[_Y_width]
                jnc     gperr
				push	ebx
				push	edi
				push	es
				
				mov		es,[_VideoSelector]
				mov		ebx,eax
				mov		ax,[_X_width]
				mul		dx					; dx = sirka*y
				add		ax,bx
				adc		dx,0000H
				cmp		byte [_lfb],0
				jz		getpoint2
				rol		edx,16
				mov		dx,ax ;	edx	= ptr
				add		edx,[_videobase]
				mov		edi,edx
				xor		eax,eax
				call	[GetPointVector]
				jmp		getpoint3
getpoint2:		mov		di,ax
				mov		ax,dx
				call	[set_bank_proc]
				xor		eax,eax
				add		edi,[_videobase]
				mov		al,[es:di]
getpoint3:		pop		es
				pop		edi
				pop		ebx
				ret
; linear get point vectors
GetPoint8:		mov		al,[es:edi]
				ret
GetPoint16:		mov		ax,[es:edi]
				ret
GetPoint32:		mov		eax,[es:edi]
				ret
;; -----------------------------------------------------

;
; common DRAW_POINT routines
;
; es:ebp = color
; banked OR
;
porr:			
%ifdef LINUX
				push	ebp
				sub	ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and		ebp,0xFFFF
				add		ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				mov		al,[es:ebp]
				or		al,[__CurrColor]
				mov		[es:ebp],al
				pop		ebp
%else		
				mov		al,[es:bp]
				or		al,[__CurrColor]
				mov		[es:bp],al
%endif		
				ret

; banked AND
pandd:			
%ifdef LINUX
				push	ebp
				sub	ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and	ebp,0xFFFF
				add	ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				mov		al,[es:ebp]
				and		al,[__CurrColor]
				mov		[es:ebp],al
				pop	ebp
%else		
				mov		al,[es:bp]
				and		al,[__CurrColor]
				mov		[es:bp],al
%endif		
				ret

; banked SET
pset:			
%ifdef LINUX
				push	ebp
				sub		ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and		ebp,0xFFFF
				add		ebp,[_videobase]
%endif		
				call	[set_bank_proc]
				mov		al,[__CurrColor]
%ifdef LINUX
				mov		[es:ebp],al
				pop		ebp
%else		
				mov		[es:bp],al
%endif		
				ret

; banked REPLACE
preplace:
%ifdef LINUX
				push	ebp
				sub		ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and		ebp,0xFFFF
				add		ebp,[_videobase]
%endif		
				call	[set_bank_proc]
				mov		al,[__CurrColor]
%ifdef LINUX
				cmp		byte [es:ebp],16
				jc		prepl2
				mov		[es:ebp],al
				pop		ebp
%else		
				cmp		byte [es:bp],16
				jc		prepl2
				mov		[es:bp],al
%endif		
prepl2:			ret

; banked XOR
pxorr:			
%ifdef LINUX
				push	ebp
				sub	ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and	ebp,0xFFFF
				add	ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				mov		al,[es:ebp]
				xor		al,[__CurrColor]
				mov		[es:ebp],al
				pop	ebp
%else		
				mov		al,[es:bp]
				xor		al,[__CurrColor]
				mov		[es:bp],al
%endif		
				ret

; banked PLUS
ppluss:			
%ifdef LINUX
				push	ebp
				sub	ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and	ebp,0xFFFF
				add	ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				mov		al,[es:ebp]
				add		al,[__CurrColor]
				mov		[es:ebp],al
				pop	ebp
%else		
				mov		al,[es:bp]
				add		al,[__CurrColor]
				mov		[es:bp],al
%endif		
				ret

; banked MINUS
pminuss:		
%ifdef LINUX
				push	ebp
				sub		ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and		ebp,0xFFFF
				add		ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				mov		al,[es:ebp]
				sub		al,[__CurrColor]
				mov		[es:ebp],al
				pop	ebp
%else		
				mov		al,[es:bp]
				sub		al,[__CurrColor]
				mov		[es:bp],al
%endif		
				ret
; banked NOT
pnott:			
%ifdef LINUX
				push	ebp
				sub		ebp,[_videobase]
%endif		
				rol		ebp,10H
				mov		ax,bp
				rol		ebp,10H
%ifdef LINUX
				and	ebp,0xFFFF
				add	ebp,[_videobase]
%endif		
				call	[set_bank_proc]
%ifdef LINUX
				not		byte [es:ebp]
				pop	ebp
%else
				not		byte [es:bp]
%endif		
				ret
;; -----------------------------------------------------
; linear AND
lpandd:			mov		al,[es:ebp]
				and		al,[__CurrColor]
				mov		[es:ebp],al
				ret
; linear OR
lporr:			mov		al,[es:ebp]
				or		al,[__CurrColor]
				mov		[es:ebp],al
				ret

; linear SET
lpset:			mov		al,[__CurrColor]
				mov		[es:ebp],al
				ret

; linear REPL
lpreplace:		cmp		byte [es:ebp],16
				jc		prepl3
				mov		al,[__CurrColor]
				mov		[es:ebp],al
prepl3:			ret

; linear NOT
lpxorr:			mov		al,[es:ebp]
				xor		al,[__CurrColor]
				mov		[es:ebp],al
				ret

; linear PLUS
lppluss:		mov		al,[es:ebp]
				add		al,[__CurrColor]
				mov		[es:ebp],al
				ret

; linear MINUS
lpminuss:		mov		al,[es:ebp]
				sub		al,[__CurrColor]
				mov		[es:ebp],al
				ret
; linear NOT
lpnott:			not		byte [es:ebp]
				ret

;; -----------------------------------------------------
; linear AND16
lpandd16:		mov		ax,[es:ebp]
				and		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret
; linear OR16
lporr16:		mov		ax,[es:ebp]
				or		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret

; linear SET16
lpset16:		mov		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret

; linear NOT16
lpxorr16:		mov		ax,[es:ebp]
				xor		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret

; linear PLUS16
lppluss16:		mov		ax,[es:ebp]
				add		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret

; linear MINUS16
lpminuss16:		mov		ax,[es:ebp]
				sub		ax,[__CurrColor]
				mov		[es:ebp],ax
				ret
; linear NOT16
lpnott16:		not		word [es:ebp]
				ret

;; -----------------------------------------------------
; linear AND32
lpandd32:		mov		eax,[es:ebp]
				and		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret
; linear OR32
lporr32:		mov		eax,[es:ebp]
				or		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret

; linear SET32
lpset32:		mov		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret

; linear NOT32
lpxorr32:		mov		eax,[es:ebp]
				xor		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret

; linear PLUS32
lppluss32:		mov		eax,[es:ebp]
				add		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret

; linear MINUS32
lpminuss32:		mov		eax,[es:ebp]
				sub		eax,[__CurrColor]
				mov		[es:ebp],eax
				ret
; linear NOT32
lpnott32:		not		dword [es:ebp]
				ret

;; -----------------------------------------------------
freplace:		and		ecx,ecx
				je		near rtrn3	 		; return if	nothing	to do
freplace2:		cmp		byte [es:edi],16
				jc		freplace3
				mov		byte [es:edi],al		
freplace3:		inc		edi
				dec		ecx
				jnz		freplace2
				ret

ttransp:        and     ecx,ecx
                je      rtrn_x
				push	ebx
				mov		ebx,[_transp_color]
ttransp2:       mov     al,[esi]
				cmp		al,bl
				jz		ttransp3
%ifdef LINUX
                mov     [es:edi],al
%else
                mov     [es:di],al
%endif
ttransp3:       inc     esi
                inc     edi
                dec		ecx
				jnz		ttransp2
				pop		ebx
rtrn_x:			ret

lttransp16:		mov     ax,[esi]
				cmp		ax,bx
				jz		lttransp163
                mov     [es:edi],ax
lttransp163:	add     esi,2
                add     edi,2
                sub		ecx,2
				jnz		lttransp16
				pop		ebx
				ret

lttransp32:		mov     eax,[esi]
				cmp		eax,ebx
				jz		lttransp323
                mov     [es:edi],eax
lttransp323:	add     esi,4
                add     edi,4
                sub		ecx,4
				jnz		lttransp32
				pop		ebx
				ret

lttransp:		and     ecx,ecx
                je      lrtrn_x
				push	ebx
				mov		ebx,[_transp_color]
				cmp		byte [_bpp],2
				jz		lttransp16
				jnc		lttransp32

lttransp2:		mov     al,[esi]
				cmp		al,bl
				jz		lttransp3
                mov     [es:edi],al
lttransp3:		inc     esi
                inc     edi
                dec		ecx
				jnz		lttransp2
				pop		ebx
lrtrn_x:		ret

; 8bit transparent routine
;				mov		ebx,ecx
;				shr		ecx,3
;				cmp		ecx,0
;				je		lrtrn_y
;				mov		eax,_transp_color
;				call	loadmmx
;lttransp2:		movq	mm1,[esi]		; mm0 = src
;				movq	mm2,mm0			; mm3 = dst
;				movq	mm3,[es:edi]		; mm2 = tmp color
;				pcmpeqb	mm2,mm1
;				add		esi,8
;				pand	mm3,mm2			; back
;				add		edi,8
;				pandn	mm2,mm1
;          		dec		ecx
;				por		mm3,mm2
;                movq    [es:edi-8],mm3
;				jnz		lttransp2

;lrtrn_y:		mov		ecx,ebx
;				and		ecx,7
;				jz		lrtrn_z
;
;				mov		ebx,[_transp_color]
;lttransp8:		mov     al,[esi]
;				cmp		al,bl
;				jz		lttransp3
;                mov     [es:edi],al
;lttransp3:		inc     esi
;                inc     edi
;                dec		ecx
;				jnz		lttransp8
;
;lrtrn_z:		pop		ebx
;lrtrn_x:		ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)
fset:			and		ecx,ecx
				je		rtrn2	 		; return if	nothing	to do
fill88:			test	edi,3			; round	to 8/16
				je		fill1
fill2:			stosb
				dec		ecx
				jnz		fill88
rtrn2:			ret

fill1:			cmp		ecx,8
				jb		fill2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fill3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fill4:			movq	[es:edi],mm0
				add		edi,8
				dec		ecx
				jnz		fill4
				and		ebx,7			; do rest bytes
				jmp		fill5

fill3:			shr		ecx,2
				rep		stosd
				and		ebx,3
fill5:			je		fill9
				mov		ecx,ebx
				repe	stosb
fill9:			pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fxor:			and		ecx,ecx
				je		rtrn2	 		; return if	nothing	to do
fxor88:			test	edi,3			; round	to 8/16
				je		fxor1
fxor2:			xor		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fxor88
				ret

fxor1:			cmp		ecx,8
				jb		fxor2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fxor3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fxor4:			movq	mm1,[es:edi]
				pxor	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fxor4
				and		ebx,7			; do rest bytes
				jmp		fxor5

fxor3:			shr		ecx,2
fxor6:			xor		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		fxor6
				and		ebx,3
fxor5:			je		fxor9
fxor7:			xor		[es:edi],al
				inc		edi
				dec		ebx
				jnz		fxor7
fxor9:			pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

for:			and		ecx,ecx
				je		rtrn3	 		; return if	nothing	to do
for88:			test	edi,3			; round	to 8/16
				je		for1
for2:			or		[es:edi],al
				inc		edi
				dec		ecx
				jnz		for88
rtrn3:			ret

for1:			cmp		ecx,8
				jb		for2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		for3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
for4:			movq	mm1,[es:edi]
				por		mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		for4
				and		ebx,7			; do rest bytes
				jmp		for5

for3:			shr		ecx,2
for6:			or	   [es:edi],eax
				add		edi,4
				dec		ecx
				jnz		for6
				and		ebx,3
for5:			je		for9
for7:			or	   [es:edi],al
				inc		edi
				dec		ebx
				jnz		for7
for9:			pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fplus:			and		ecx,ecx
				je		rtrn3	 		; return if	nothing	to do
fplus88:		test	edi,3			; round	to 8/16
				je		fplus1
fplus2:			add		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fplus88
				ret

fplus1:			cmp		ecx,8
				jb		fplus2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fplus3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fplus4:			movq	mm1,[es:edi]
				paddb	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fplus4
				and		ebx,7			; do rest bytes
				jmp		fplus5

fplus3:			;shr	 ecx,2
fplus6:			add		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fplus6
				jmp		fplus9
fplus5:			je		fplus9
fplus7:			add		[es:edi],al
				inc		edi
				dec		ebx
				jnz		fplus7
fplus9:			pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fnot:			and		ecx,ecx
				je		near rtrn3	 		; return if	nothing	to do
fnot88:			test	edi,3			; round	to 8/16
				je		fnot1
fnot2:			not		byte [es:edi]
				inc		edi
				dec		ecx
				jnz		fnot88
				ret

fnot1:			cmp		ecx,8
				jb		fnot2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fnot3			; no, goto no MMX
				movq	mm0,[minusone]
				shr		ecx,3			; yes, run MMX routine
fnot4:			movq	mm1,[es:edi]
				pxor	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fnot4
				and		ebx,7			; do rest bytes
				jmp		fnot5

fnot3:			shr		ecx,2
fnot6:			not		dword [es:edi]
				add		edi,4
				dec		ecx
				jnz		fnot6
				and		ebx,3
fnot5:			je		fnot9
fnot7:			not		byte [es:edi]
				inc		edi
				dec		ebx
				jnz		fnot7
fnot9:			pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fminus:			and		ecx,ecx
				je		rtrn7	 		; return if	nothing	to do
fminus88:		test	edi,3			; round	to 8/16
				je		fminus1
fminus2:		sub		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fminus88
rtrn7:			ret

fminus1:		cmp		ecx,8
				jb		fminus2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fminus3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fminus4:		movq	mm1,[es:edi]
				psubb	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fminus4
				and		ebx,7			; do rest bytes
				jmp		fminus5

fminus3:	   ; shr	 ecx,2
fminus6:		sub		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fminus6
				jmp		fminus9
fminus5:		je		fminus9
fminus7:		sub		[es:edi],al
				inc		edi
				dec		ebx
				jnz		fminus7
fminus9:		pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fand:			and		ecx,ecx
				je		rtrn7	 		; return if	nothing	to do
fand88:			test	edi,3			; round	to 8/16
				je		fand1
fand2:			and		[es:edi],al
				inc		edi
				dec		ecx
				jnz		fand88
				ret

fand1:			cmp		ecx,8
				jb		fand2
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fand3			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fand4:			movq	mm1,[es:edi]
				pand	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fand4
				and		ebx,7			; do rest bytes
				jmp		fand5

fand3:			shr		ecx,2
fand6:			and		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		fand6
				and		ebx,3
				push	ebx
fand5:			je		fand9
fand7:			and		[es:edi],al
				inc		edi
				dec		ebx
				jnz		fand7
fand9:			pop		ebx
				ret

%ifndef NO_MMX
_rdtsc:			rdtsc
				ret

_test_mmx:		pushfd
				pop		eax
				push	ebx
				mov		edx,eax
				xor		eax,200000H
				push	eax
				popfd
				pushfd
				pop		eax
				cmp		eax,edx
				mov		eax,0
				jz		mmx2
				mov		eax,1
				cpuid
				test	edx,800000H
				mov		eax,0
				jz		mmx2
				inc		eax
				emms
				emms
mmx2:			mov		[_mmx],eax
				pop		ebx
				ret

_reset_mmx:		push	eax
				mov		eax,[_mmx]
				and		eax,eax
				jz		_set_mmx22
				emms
				dec		dword [_mmx_state]
_set_mmx22:		pop		eax
				ret

_set_mmx:		push	eax
				mov		eax,[_mmx]
				and		eax,eax
				jz		_set_mmx2
				emms
				inc		dword [_mmx_state]
_set_mmx2:		pop		eax
				ret

_is_mmx:		push	eax
				mov		eax,[_mmx]
				and		eax,eax
				pop		eax
				ret

mmx_color:		mov		eax,__CurrColor
loadmmx:		call	[LoadColor]
				mov		[mmxtmp],eax
				mov		[mmxtmp+4],eax
				movq	mm0,[mmxtmp]
%endif
dummy:			ret
;; ------------------------------------------------
fset16:			and		ecx,ecx
				je		rtrn216	 		; return if	nothing	to do
fill8816:		test	edi,2			; round	to 8/16
				jz		fill116
fill216:		stosw
				sub		ecx,2
				jnz		fill8816
rtrn216:		ret

fill116:		push	ebx
				cmp		ecx,8
				jb		fill716
				mov		ebx,ecx
				call	_is_mmx
				jz		fill316			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fill416:		movq	[es:edi],mm0
				add		edi,8
				dec		ecx
				jnz		fill416
				and		ebx,7
				jmp		fill516

fill316:		shr		ecx,2
				rep		stosd
				and		ebx,3
fill516:		je		fill916
				mov		ecx,ebx
fill716:		shr		ecx,1
				rep		stosw
fill916:		pop		ebx
				ret

fset32:			and		ecx,ecx
				je		rtrn232	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				shr		ecx,2
				rep		stosd
rtrn232:		ret

for32:			and		ecx,ecx
				je		rtrn232	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ebx
				mov		ebx,eax
				shr		ecx,2
for232:			mov		eax,[es:edi]
				or		eax,ebx
				stosd
				dec		ecx
				jnz		for232
				pop		ebx
				ret

fxor32:			and		ecx,ecx
				je		rtrn232	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ebx
				mov		ebx,eax
				shr		ecx,2
fxor232:		mov		eax,[es:edi]
				xor		eax,ebx
				stosd
				dec		ecx
				jnz		fxor232
				pop		ebx
rtrn2326:		ret

fand32:			and		ecx,ecx
				je		rtrn2326	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ebx
				mov		ebx,eax
				shr		ecx,2
fand232:		mov		eax,[es:edi]
				and		eax,ebx
				stosd
				dec		ecx
				jnz		fand232
				pop		ebx
				ret

fplus32:		and		ecx,ecx
				je		rtrn2326	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ebx
				mov		ebx,eax
				shr		ecx,2
fplus232:		mov		eax,[es:edi]
				add		eax,ebx
				stosd
				dec		ecx
				jnz		fplus232
				pop		ebx
rtrn233:				ret

fminus32:		and		ecx,ecx
				je		rtrn233	 		; return if	nothing	to do
				mov		eax,__CurrColor
				call	[LoadColor]
				push	ebx
				mov		ebx,eax
				shr		ecx,2
fminus232:		mov		eax,[es:edi]
				sub		eax,ebx
				stosd
				dec		ecx
				jnz		fminus232
				pop		ebx
				ret

fnot32:			and		ecx,ecx
				je		rtrn233	 		; return if	nothing	to do
				shr		ecx,2
fnot232:		not		dword [es:edi]
				and		eax,ebx
				add		edi,4
				dec		ecx
				jnz		fnot232
				ret


;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

fand16:			and		ecx,ecx
				je		rtrn716	 		; return if	nothing	to do
fand8816:		test	edi,1			; round	to 8/16
				je		fand116
fand216:		and		[es:edi],ax
				add		edi,2
				sub		ecx,2
				jnz		fand8816
rtrn716:		ret

fand116:		cmp		ecx,8
				jb		fand216
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fand316			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fand416:		movq	mm1,[es:edi]
				pand	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fand416
				and		ebx,7			; do rest bytes
				jmp		fand516

fand316:		shr		ecx,2
fand616:		and		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		fand616
				and		ebx,3
				push	ebx
fand516:		je		fand916
				shr		ebx,1
fand716:		and		[es:edi],ax
				add		edi,2
				dec		ebx
				jnz		fand716
fand916:		pop		ebx
				ret

fxor16:			and		ecx,ecx
				je		rtrn916	 		; return if	nothing	to do
fxor8816:		test	edi,1			; round	to 8/16
				je		fxor116
fxor216:		xor		[es:edi],ax
				add		edi,2
				sub		ecx,2
				jnz		fxor8816
rtrn916:		ret

fxor116:		cmp		ecx,8
				jb		fxor216
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fxor316			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
fxor416:		movq	mm1,[es:edi]
				pxor	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fxor416
				and		ebx,7			; do rest bytes
				jmp		fxor516

fxor316:		shr		ecx,2
fxor616:		xor		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		fxor616
				and		ebx,3
fxor516:		je		fxor916
				shr		ebx,1
fxor716:		xor		[es:edi],ax
				add		edi,2
				dec		ebx
				jnz		fxor716
fxor916:		pop		ebx
				ret

;		MMX	state MUST BE ON before	call
;		set	(ecx) bytes	to (eax) value from	(es:edi)

for16:			and		ecx,ecx
				je		rtrn316	 		; return if	nothing	to do
for8816:		test	edi,1			; round	to 8/16
				je		for116
for216:			or		[es:edi],ax
				add		edi,2
				sub		ecx,2
				jnz		for8816
rtrn316:		ret

for116:			cmp		ecx,8
				jb		for216
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		for316			; no, goto no MMX
				call	mmx_color
				shr		ecx,3			; yes, run MMX routine
for416:			movq	mm1,[es:edi]
				por		mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		for416
				and		ebx,7			; do rest bytes
				jmp		for516

for316:			shr		ecx,2
for616:			or		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		for616
				and		ebx,3
for516:			je		for916
				shr		ebx,1
for716:			or		[es:edi],ax
				add		edi,2
				dec		ebx
				jnz		for716
for916:			pop		ebx
				ret

fnot16:			and		ecx,ecx
				je		near rtrn316	 		; return if	nothing	to do
fnot8816:		test	edi,1			; round	to 8/16
				je		fnot116
fnot216:		not		word [es:edi]
				add		edi,2
				sub		ecx,2
				jnz		fnot8816
				ret

fnot116:		cmp		ecx,8
				jb		fnot216
				push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		fnot316			; no, goto no MMX
				movq	mm0,[minusone]
				shr		ecx,3			; yes, run MMX routine
fnot416:		movq	mm1,[es:edi]
				pxor	mm1,mm0
				movq	[es:edi],mm1
				add		edi,8
				dec		ecx
				jnz		fnot416
				and		ebx,7			; do rest bytes
				jmp		fnot516

fnot316:		shr		ecx,2
fnot616:		not		dword [es:edi]
				add		edi,4
				dec		ecx
				jnz		fnot616
				and		ebx,3
fnot516:		je		fnot916
				shr		ebx,1
fnot716:		not		word [es:edi]
				add		edi,2
				dec		ebx
				jnz		fnot716
fnot916:		pop		ebx
				ret

ltset:			and		ecx,ecx
				je		lrtrn9
ltset88:		test	edi,3
				je		ltset6
ltset7:			mov		al,[esi]
				mov		[es:edi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		ltset88
lrtrn9:			ret

ltset6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		ltset7			; yes, back	to loop	without	test [edi]

ltset0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		ltset1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
ltset11:		movq	mm0,[esi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		ltset11
				and		ebx,7			; do rest bytes
				jmp		ltset22

ltset1:			shr		ecx,2

; copy ecx*4 bytes from ds:esi -> es:edi

ltset3:
				rep		movsd
				and		ebx,3
ltset22:		je		ltset4
ltset5:			mov		al,[esi]
				mov		[es:edi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		ltset5
ltset4:			pop		ebx
				ret

ltand:			and		ecx,ecx
				je		lrtrn4
ltand88:		test	edi,3
				je		ltand6
ltand7:			mov		al,[esi]
				and		[es:edi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		ltand88
lrtrn4:			ret

ltand6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		ltand7			; yes, back	to loop	without	test [edi]

ltand0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		ltand1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
ltand11:		movq	mm0,[esi]
				pand	mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		ltand11
				and		ebx,7			; do rest bytes
				jmp		ltand22

ltand1:			shr		ecx,2
ltand3:			mov		eax,[esi]
				and		[es:edi],eax
				add		esi,4
				add		edi,4
				dec		ecx
				jnz		ltand3
				and		ebx,3
ltand22:		je		ltand4
ltand5:			mov		al,[esi]
				and		[es:edi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		ltand5
ltand4:			pop		ebx
				ret
;; ----------------------------------------
ltxor:			and		ecx,ecx
				je		lrtrn6
ltxor88:		test	edi,3
				je		ltxor6
ltxor7:			mov		al,[esi]
				xor		[es:edi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		ltxor88
lrtrn6:			ret

ltxor6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		ltxor7			; yes, back	to loop	without	test [edi]

ltxor0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		ltxor1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
ltxor11:		movq	mm0,[esi]
				pxor	mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		ltxor11
				and		ebx,7			; do rest bytes
				jmp		ltxor22

ltxor1:			shr		ecx,2
				push	edx
ltxor3:			mov		eax,[esi]
				add		esi,4
				mov		edx,[es:edi]
				xor		eax,edx
				mov		[es:edi],eax
				add		edi,4
				dec		ecx
				jnz		ltxor3
				pop		edx
				and		ebx,3
ltxor22:		je		ltxor4
ltxor5:			mov		al,[esi]
				xor		[es:edi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		ltxor5
ltxor4:			pop		ebx
				ret

; to call :	es:di  = dest
;			ds:esi = src
;			ecx	   = counter
; after	  :	es:di &	ds:esi -> current position
;			eax	& ecx changed
;			ebx	& edx unchanged
;			mm0	changed	if MMX extension enabled

ltor:			and		ecx,ecx
				je		lrtrn5
ltor88:			test	edi,3
				je		ltor6
ltor7:			mov		al,[esi]
				or		[es:edi],al
				inc		esi
				inc		edi
				dec		ecx
				jnz		ltor88
lrtrn5:			ret

ltor6:			cmp		ecx,8			; addr is rounded, less	than 8 byte?
				jb		ltor7			; yes, back	to loop	without	test [edi]

ltor0:			push	ebx
				mov		ebx,ecx
				call	_is_mmx
				jz		ltor1			; no, goto no MMX

				shr		ecx,3			; yes, run MMX routine
ltor11:			movq	mm0,[esi]
				por		mm0,[es:edi]
				movq	[es:edi],mm0
				add		esi,8
				add		edi,8
				dec		ecx
				jnz		ltor11
				and		ebx,7			; do rest bytes
				jmp		ltor22

ltor1:			shr		ecx,2
ltor3:			mov		eax,[esi]
				or		[es:edi],eax
				add		esi,4
				add		edi,4
				dec		ecx
				jnz		ltor3
				and		ebx,3
ltor22:			je		ltor4
ltor5:			mov		al,[esi]
				or		[es:edi],al
				inc		esi
				inc		edi
				dec		ebx
				jnz		ltor5
ltor4:			pop		ebx
				ret

%ifdef LINUX
				extern ptr_vga_setpage

set_vga_banking:push	eax
		push	ebx
		push	ecx
		push	edx
		push	esi
		push	edi
		and	eax,255
		push	eax				
		call	[ptr_vga_setpage]
		pop		eax
		pop		edi
		pop		esi
		pop		edx
		pop		ecx
		pop		ebx
		pop		eax
vga_setpage:	ret

LinuxInit:	mov		ax,ds
			mov		[_VideoSelector],ax
			mov		eax,set_vga_banking
			mov		[set_bank_proc],eax
			ret
%endif

L1:				cmp		al,[L253]
				je		L2
				mov		[L253],al
				push	ebx
				push	ecx
				push	edx
				xor		ah,ah
				mov		cl,[_granularity]
				rol		al,cl
				mov		dx,ax
				mov		ax,4f05H
				xor		bx,bx
				push	edx
				int		10H
				pop		edx
                mov     ax,4f05H
                mov     bx,1
                int     10H
				pop		edx
				pop		ecx
				pop		ebx
L2:				ret

matrox_page:	; Matrox series
				cmp		al,[L253]
				je		L2
				mov		[L253],al
				push	dx
				mov		dx,0x3de
				mov		ah,al
				mov		al,4
				out		dx,ax
				pop		dx
				ret

intel740:			; I 740
				cmp		al,[L253]
				je		L2
				mov		[L253],al
				push	eax
				push	edx

				mov		ah,al	; save page
				mov		al,0xe2
				mov		dx,0x3d6
				out		dx,al
				inc		dx
				in		al,dx
				dec		dx
				
				and		al,0xf0
				cmp		al,0x60
				mov		al,0x0e
				jne		intel2
				shl		ah,2
intel2:			out		dx,ax	; page in

				pop		edx
				pop		eax
				ret

permedia_page:	; Permedia 2
				cmp		al,[L253]
				je		L4
				mov		[L253],al
				push	ax
				push	bx
				push	dx
				mov		bl,0xf8
				mov		bh,al
				and		bh,7	; page 0..7
				mov    dx,0x3CE
				mov    ah,9
				in     al,dx
				xchg   ah,al
				out    dx,al
				inc    dx
				in     al,dx
				and    al,bl
				or     al,bh
				out    dx,al
				dec    dx
				xchg   ah,al
				out    dx,al
				pop		dx
				pop		bx
				pop		ax
				ret

wd_page:		; Western digital / Paradise / Cirrus Logic
L3:				cmp		al,[L253]
				je		L4
				mov		[L253],al
				push	dx
				mov		dx,03ceH
				shl		al,04H
				mov		ah,al
				mov		al,09H
				out		dx,ax
				pop		dx
L4:				ret

L5:				cmp		al,[L253]
				je		L6
				mov		[L253],al
				mov		ah,al
				xor		ah,02H
				push	edx
				mov		dx,03c4H
				mov		al,0eH
				out		dx,al
				inc		dx
				in		al,dx
				and		al,0f0H
				or		al,ah
				out		dx,al
				pop		edx
L6:				ret

L7:				cmp		al,[L253]
				je		L8
				mov		[L253],al
				push	cx
				push	dx
				mov		dx,0x3d4
				mov		ch,al
				and		al,0fH
				mov		cl,al
				mov		ax,4838H
				out		dx,ax
				mov		ax,0a539H
				out		dx,ax
				mov		al,31H
				out		dx,al
				inc		dl
				in		al,dx
				or		al,09H
				out		dx,al
				dec		dl
				mov		al,35H
				out		dx,al
				inc		dl
				in		al,dx
				and		al,0f0H
				or		al,cl
				out		dx,al
				dec		dl
				mov		al,51H
				out		dx,al
				inc		dl
				in		al,dx
				and		al,0f3H
				shr		ch,02H
				and		ch,0cH
				or		al,ch
				out		dx,al
				pop		dx
				pop		cx
L8:				ret

L9:				cmp		al,[L253]
				je		L10
				mov		[L253],al
				push	cx
				push	dx
				mov		dx,0x3d4
				mov		cl,al
				mov		ax,4838H
				out		dx,ax
				mov		ax,0a539H
				out		dx,ax
				mov		al,6aH
				mov		ah,cl
				out		dx,ax
				pop		dx
				pop		cx
L10:			ret

L11:			cmp		al,[L253]
				je		L12
				mov		[L253],al
				push	dx
				mov		ah,al
				mov		dx,6002H
				out		dx,ax
				pop		dx
L12:			ret

L13:			cmp		al,[L253]
				je		L14
				mov		[L253],al
				mov		ah,al
				shl		ah,4
				or		al,ah
				push	edx
				mov		dx,0x3cd
				out		dx,al
				pop		edx
L14:			ret

L15:			cmp		al,[L253]
				je		L16
				mov		[L253],al
				mov		ah,al
				cli
				push	edx
				mov		edx,0x1ce
				mov		al,0xb2
				out		dx,al
				inc		dl
				in		al,dx
				and		al,0e1H
				shl		ah,1
				or		ah,al
				mov		al,0b2H
				dec		dl
				out		dx,ax
				pop		edx
				sti
L16:			ret

L17:			cmp		al,[L253]
				je		L18
				mov		[L253],al
				mov		ah,al
				shl		ah,3
				or		al,ah
				or		al,40H
				push	edx
				mov		dx,03cdH
				out		dx,al
				pop		edx
L18:			ret

L19:			cmp		al,[L253]
				je		L20
				mov		[L253],al
				push	ecx
				mov		cl,[_granularity]
				shl		al,cl
				mov		ah,al
				push	edx
				mov		dx,03d6H
				mov		al,10H
				out		dx,al
				inc		dx
				in		al,dx
				and		al,0c0H
				or		al,ah
				out		dx,al
				pop		edx
				pop		ecx
L20:			ret

; BANSHEE
L21:			cmp		al,[L253]
				je		L23
				mov		[L253],al
				push      eax
				push      ebx
				push      dx
				sub       ebx,ebx
				mov       bl,al
				shl       bx,1
				mov       dx,0x3d4
				mov       al,0x1C
				out       dx,al
				mov       ah,al
				inc       dx
				in        al,dx
				dec       dx
				xchg      al,ah
				mov       dh,ah
				mov       dl,0x2C
				in        eax,dx
				and       eax,0xFFF00000
				mov       ax,bx
				shl       ebx,10
				or        eax,ebx
				out       dx,eax
				pop       dx
				pop       ebx
				pop       eax
L23:			ret

;nVidia:			cmp		al,[L253]
;				je		L23
;				mov		[L253],al
;				push	dx
;				push	ax
;				mov		ah,al
;				shl	   	ah,1
;				mov		dx,0x3d4
;				mov	   	al,0x1D
;				out	   	dx,ax
;				inc	   	al
;				out	   	dx,ax
;				pop	   	ax
;				pop	   	dx
;				ret

nVidia:			cmp     al,[L253]
                je      L23
                mov     [L253],al

                push   dx
				push   ax
				mov    dx,0x3c4
				in     al,dx
				push   ax
				push   dx
				mov    dx,0x3D0
				in     ax,dx
				cmp    ax,0xffff
				jne    nVidia4

				pop    dx
				mov    ax,0x5706
				out    dx,ax
				pop    ax
				out    dx,al
				pop    ax
				jp	   nVidia2

nVidia4:		mov    ax,0x5706
				out    dx,ax
				pop    dx
				pop    ax
				out    dx,al
				pop    ax

nVidia2:        push   	ax
				mov		ah,al
            	shl    	ah,1
	            mov		dx,0x3d4
    	        in     	al,dx
        	    push   	ax
            	mov    	al,0x1D
	            out    	dx,ax
    	        inc    	ax
        	    out    	dx,ax
            	pop    	ax
	            out    	dx,al
    	        pop    	ax
        	    pop    	dx
            	ret

rendition:		cmp		al,[L253]
				je		L208
				mov		[L253],al
				push	eax
				push	dx
				mov		ah,al
				mov		dx,0x3df
				in		al,dx
				mov		dh,al
				mov		dl,0x74
				mov		al,ah
				shl		eax,16
				out		dx,eax
;				in		eax,dx
				pop		dx
				pop		eax
L208:			ret
				
__degraduj:		push	edi
				push 	esi
				push 	ecx
				push 	ebx
				mov		edi,[esp+20] ; dynamika
				mov		esi,[esp+24] ; statika
				mov		ecx,[esp+28] ; bmp
				mov		ebx,[esp+32] ; kolko

degraduj2:		mov		eax,[edi]
				mov		edx,[esi]
				push	ebx
				mov		ebx,eax
				and		ebx,0xF0F0F0F0	; vymaskuj id 
				test	eax,eax	 	; je tam nejaka	dynamika ?
				jz		nieje2			; skok ak tam nieje	vobec nic
				and		al,0x0F		; je v 1 bajte dyn.	?
				jz		byte2		; nie zachovaj dl
				dec		al
				or		al,bl
				or		dl,dl
				jnz		byte2
				mov		dl,al
byte2:			and		ah,0x0F		; je v 1 bajte statika ?
				jz		byte3
				dec		ah
				or		ah,bh
				or		dh,dh
				jnz		byte3
				mov		dh,ah
byte3:			rol		eax,16
				rol		edx,16
				rol		ebx,16
				and		al,0x0F		; je v 1 bajte statika ?
				jz		byte4
				dec		al
				or		al,bl
				or		dl,dl
				jnz		byte4
				mov		dl,al
byte4:			and		ah,0x0F		; je v 1 bajte statika ?
				jz		byte5
				dec		ah
				or		ah,bh
				or		dh,dh
				jnz		byte5
				mov		dh,ah
byte5:			rol		edx,16
				rol		eax,16
nieje2:			mov		[ecx],edx		; urob vysledny	bajt
				mov		[edi],eax
				add		edi,4
				add		esi,4
				add		ecx,4
				pop		ebx
				dec		ebx
				jnz		near degraduj2
				pop		ebx
				pop		ecx
				pop		esi
				pop		edi
				ret

section	.data
DrawPointVector	DD		pset
L243			DD		tset
fill			dd		fset
MemPixel		dd		lpset
lMemPixel		dd		lpset

L246			DD		tset ; ppop	table
				DD		txor
				DD		tand
				DD		tor
				dd		tset
				dd		tset
				dd		tset
				dd		tset
				dd		ttransp

L246l			DD		ltset ; ppop	table
				DD		ltxor
				DD		ltand
				DD		ltor
				dd		ltset
				dd		ltset
				dd		ltset
				dd		ltset
				dd		lttransp

rtable			DD		rset
				DD		rxorr
				DD		randd
				DD		rorr
				dd		rpluss
				dd		rminuss
				dd		rnott
				dd		rset
				dd		rtransp

L247			DB		00H,00H,00H,00H
L248			DB		00H,00H,00H,00H
L249			DB		00H,00H,00H,00H
L250			DB		00H,00H,00H,00H
L251			DB		00H,00H,00H,00H
L252			DB		00H,00H,00H,00H,00H,00H,00H,00H
				DB		00H,00H,00H,00H,00H,00H,00H,00H
				DB		00H,00H,00H,00H,00H,00H,00H,00H
				DB		00H,00H,00H,00H

LoadColor		dd		LC8
LoadColorTable  dd		LC8, LC16, LC32, LC32

L253			DB		0ffH,00H,00H,00H
minusone		dd		-1,-1

mmxtmp			dd		0,0
vmode			dd		0
oldx			dd		0
oldy			dd		0
_ds_alias		dw		0,0
temp			dd		0

set_bank_proc	DD		dummy

L257			DD		L1
				DD		L3
				DD		L5
				DD		L7
				DD		L9
				DD		L11
				DD		L13
				DD		L15
				DD		L17
				DD		L19
				dd		wd_page
				dd		L21
				dd		nVidia
				dd		matrox_page
				dd		permedia_page
				dd		intel740
				dd		L1; intel810
				dd		rendition

L245			DD		pset
				DD		pxorr
				DD		pandd
				DD		porr
				dd		ppluss
				dd		pminuss
				dd		pnott
				dd		preplace
				dd		pset

;; table for draw_point [es:ebp = al]

tlL245:			dd		lL245,lL24516,lL24532,lL24532,
lL245			DD		lpset
				DD		lpxorr
				DD		lpandd
				DD		lporr
				dd		lppluss
				dd		lpminuss
				dd		lpnott
				dd		lpreplace
				dd		lpset
lL24516
     			DD		lpset16
				DD		lpxorr16
				DD		lpandd16
				DD		lporr16
				dd		lppluss16
				dd		lpminuss16
				dd		lpnott16
				dd		lpset16
				dd		lpset16
lL24532
				DD		lpset32
				DD		lpxorr32
				DD		lpandd32
				DD		lporr32
				dd		lppluss32
				dd		lpminuss32
				dd		lpnott32
				dd		lpset32
				dd		lpset32
; banked fill
;tftable			dd		ftable,	ftable16, ftable32, ftable32
ftable			DD		fset
				DD		fxor
				DD		fand
				DD		for
				dd		fplus
				dd		fminus
				dd		fnot
				dd		freplace
				dd		fset

ftable16		DD		fset16
				DD		fxor16
				DD		fand16
				DD		for16
				dd		fset16
				dd		fset16
				dd		fnot16
				dd		fset16
				dd		fset16

ftable32		DD		fset32
				DD		fxor32
				DD		fand32
				DD		for32
				dd		fplus32
				dd		fminus32
				dd		fnot32
				dd		fset32
				dd		fset32

lftable			dd		ftable, ftable16, ftable32, ftable32
lfill			dd		ftable

GetPointVector	dd		GetPoint8
GetPointVectorTable
				dd		GetPoint8
				dd		GetPoint16
				dd		GetPoint32
				dd		GetPoint32

				END

