;ASM file to go with ugraf unit
;implement 386 optimization
;look at ugraf.pas for more details

.model tpascal
.386
.code

public move
public fillchar
public filldouble
public move8bitse
public move8bitsl
public move16bitse
public move16bitsl
public move24bitse
public move24bitsl
public move32bits

move proc far                                           ;Speed comparison with system's (on a 486)
                                                        ;Time of execution (in ticks) :
                                                        ;system : 31+9*number
                                                        ;here   : 46+9*(number div 4) + 7*(number mod 4)
                                                        ;for number=32768, execution time
                                                        ;system : 294943
                                                        ;here   :  73774
                                                        ;Note : Data should be aligned for best performances
                                                        ;(this is the case for bitmap created with create_bitmap,
                                                        ; but it may not be the case for not dynamically allocated
                                                        ; tp variables)
arg  source : dword, dest : dword, taille : word

     push ds
     cld
     lds  si, source
     les  di, dest
     mov  cx, taille
     mov  bx, cx
     shr  cx, 2
     jz   suite

     rep movsd

suite:
      test bx, 10b
      jz   suite2

      movsw

suite2:
       test bx, 1
       jz   fin

       movsb

fin:
        pop  ds
        ret
move    endp

fillchar proc far
arg      dest : dword, taille : word, value : byte

         cld
         les  di, dest
         mov  al, value
         mov  ah, al
         mov  bx, ax
         shl  eax, 16
         mov  ax, bx

         mov  bx, taille
         mov  cx, bx

         shr  cx, 2
         jz   suitef

         rep stosd

suitef:
         test bx, 10b
         jz   suitef2

         stosw

suitef2:
         test bx, 1
         jz   finf

         stosb

finf:

         ret
fillchar endp


filldouble proc far
arg       dest : dword, taille : word, value : dword

          cld
          les  di, dest
          mov  eax, value
          mov  cx, taille
          or   cx, cx
          jz   finfd

          rep  stosd

finfd:
          ret
filldouble endp

;Note that from now, the function waits in "number" the total bytes to be moves, not the total units to be moved
move8bitse proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  cx, number
           or   cx, cx
           jz   fin_mv8e
loopmv8e:
           lodsd
           stosb
           dec  cx
           jnz  loopmv8e

fin_mv8e:
           pop  ds
           ret
move8bitse endp

move8bitsl proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  cx, number
           or   cx, cx
           jz   fin_mv8l
           xor  eax, eax

loopmv8l:
           lodsb
           stosd
           dec  cx
           jnz  loopmv8l

fin_mv8l:
           pop  ds
           ret
move8bitsl endp


move16bitse proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  cx, number
           shr  cx, 1
           or   cx, cx
           jz   fin_mv16e
loopmv16e:
           lodsd
           stosw
           dec  cx
           jnz  loopmv16e

fin_mv16e:
           pop  ds
           ret
move16bitse endp

move16bitsl proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  cx, number
           shr  cx, 1
           or   cx, cx
           jz   fin_mv16l
           xor  eax, eax

loopmv16l:
           lodsw
           stosd
           dec  cx
           jnz  loopmv16l

fin_mv16l:
           pop  ds
           ret
move16bitsl endp


move24bitse proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  ax, number
           xor  dx, dx
           mov  cx, 3
           div  cx
           mov  cx, ax

           or   cx, cx
           jz   fin_mv24e
loopmv24e:
           lodsd
           stosw
           shr  eax, 16
           stosb
           dec  cx
           jnz  loopmv24e

fin_mv24e:
           pop  ds
           ret
move24bitse endp

move24bitsl proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  ax, number
           xor  dx, dx
           mov  cx, 3
           div  cx
           mov  cx, ax
           or   cx, cx
           jz   fin_mv24l
           xor  eax, eax

loopmv24l:
           movsw
           movsb
           inc  di                              ;NOT optimized !
           dec  cx
           jnz  loopmv24l

fin_mv24l:
           pop  ds
           ret
move24bitsl endp


move32bits proc far
arg        source : dword, dest : dword, number : word

           push ds
           cld
           lds  si, source
           les  di, dest
           mov  cx, number
           shr  cx, 2
           or   cx, cx
           jz   fin_mv32

           rep  movsd

fin_mv32:
           pop  ds
           ret
move32bits endp

code ends
     end