#
# x86 surface clear routines for HERMES
# Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
# This source code is licensed under the GNU LGPL
# 
# Please refer to the file COPYING.LIB contained in the distribution for
# licensing conditions
#

#BITS 32


.globl _ClearX86_32
.globl _ClearX86_24
.globl _ClearX86_16
.globl _ClearX86_8

.text

##   
## --------------------------------------------------------------------------
## HermesClearInterface (ebp+..)
##   0: char8 *dest
##   4: int32 value
##   8: unsigned int width (already checked to be >0!)
##  12: unsigned int height (already checked to be >0!)
##  16: int add


_ClearX86_32: 
        pushl %ebp
        movl %esp,%ebp

        movl 8(%ebp),%ebp

        movl 4(%ebp),%eax       # pixel value   
        movl (%ebp),%edi        # destination

        movl 12(%ebp),%edx      # height

_ClearX86_32.L_y: 
        movl 8(%ebp),%ecx
        rep
 stosl

        addl 16(%ebp),%edi

        decl %edx
        jnz _ClearX86_32.L_y

        popl %ebp
        ret



_ClearX86_24: 
        ret



_ClearX86_16: 
        pushl %ebp
        movl %esp,%ebp

        movl 8(%ebp),%ebp

        movl 4(%ebp),%eax       # pixel value   
        movl (%ebp),%edi        # destination

        movl 12(%ebp),%edx      # height
        movl %eax,%ebx

        shll $16,%eax           # Duplicate pixel value
        andl $0x0ffff,%ebx

        orl %ebx,%eax
_ClearX86_16.L_y: 
        movl 8(%ebp),%ecx

        testl $3,%edi           # Check if destination is aligned mod 4
        jz _ClearX86_16.L_aligned

        movw %ax,(%edi)         # otherwise write one pixel
        addl $2,%edi

        decl %ecx
        jz _ClearX86_16.L_endline

_ClearX86_16.L_aligned: 
        movl %ecx,%ebx
        shrl %ecx

        jz _ClearX86_16.L_last

        rep
 stosl

_ClearX86_16.L_last: 
        testb $1,%bl
        jz _ClearX86_16.L_endline

        movw %ax,(%edi)
        addl $2,%edi

_ClearX86_16.L_endline: 
        addl 16(%ebp),%edi

        decl %edx
        jnz _ClearX86_16.L_y

        popl %ebp
        ret



## Clear8_x86 isnt optimised fully yet as it seems to be a tiny bit slower
## than the C routine
_ClearX86_8: 
        pushl %ebp
        movl %esp,%ebp

        movl 8(%ebp),%ebp

        movl 4(%ebp),%eax       # pixel value           
        movl 12(%ebp),%edx      # height

        movl %eax,%ebx

        shll $8,%eax            # Put the byte pixel value in all four bytes
        andl $0x0ff,%ebx        # of eax

        movb %bl,%al
        movl (%ebp),%edi        # destination

        shll $16,%eax
        movb %bl,%bh

        movb %bh,%ah
        movb %bl,%al

_ClearX86_8.L_y: 
        movl 8(%ebp),%ecx

        testl $3,%edi
        jz _ClearX86_8.L_aligned

        movl %edi,%ebx

        andl $3,%ebx

        movb %al,(%edi)
        incl %edi
        decl %ecx
        jz _ClearX86_8.L_endline
        decl %ebx
        jz _ClearX86_8.L_aligned

        movb %al,(%edi)
        incl %edi
        decl %ecx
        jz _ClearX86_8.L_endline
        decl %ebx
        jz _ClearX86_8.L_aligned

        movb %al,(%edi)
        incl %edi
        decl %ecx
        jz _ClearX86_8.L_endline
        decl %ebx
        jz _ClearX86_8.L_aligned

_ClearX86_8.L_aligned: 
        movl %ecx,%ebx

        shrl $2,%ecx
        jz _ClearX86_8.L_last

        rep
 stosl

_ClearX86_8.L_last: 
        movl %ebx,%ecx

        andl $3,%ecx
        jz _ClearX86_8.L_endline

_ClearX86_8.L_cleanup: 
        movb %al,(%edi)         # Write remaining (1,2 or 3) pixels
        incl %edi

        decl %ecx
        jnz _ClearX86_8.L_cleanup

_ClearX86_8.L_endline: 
        addl 16(%ebp),%edi

        decl %edx
        jnz _ClearX86_8.L_y

        popl %ebp
        ret


