; MoDeX HorizontalLine optimization by hULK

; This is the HorizontalLine routine for the 320x400 MoDeX
; The difference of this routine is that it uses a trick to speed up
; It writes to the video memory in a way u can't write to your system memory
; How can it be ?
; Now think of clearing the video memory
; What do we do for clearing ? sumthing like this :

;       mov     ax,0A000h
;       mov     es,ax
;       xor     di,di
;       mov     dx,3C4h
;       mov     ax,0f02h        ; select all planes
;       out     dx,ax
;       xor     eax,eax
;       mov     ecx,320*25      ; 8000 times
;       cld
;       rep     stosd           ; 16 pixels / stosd

; Hey we cleared all the video memory by only 8000 stosd
; But 8000*DoubleWord isnt equal to 128000 bytes
; 128000 / (8000 stos) = 16 bytes / stosd
; We wrote 16 pixels per each stosd instruction !!!!!!!!
; If we write a routine that takes the advantage of "SELECTING ALL PLANES"
; It would be very fast . It is not over yet !
; The next problem is the "OUT" instruction which is so slow under
; some environments like V86 or even PMode (low privilege)
; Then we must avoid using it much
; The second aim of this routine to minimize the number of "OUT"s executed
; If u use 2 screens in this mode , this will give a very hi performance
; In fact the solutions for these 2 problems are the same

; I wanted to optimize it for the p*ntium processor although i dont have one
; I dont know if i did it right , Anyway it works just fine ( fast! )
; Routine somehow seems long but not all the parts work for each call
; ( Tylisha will u size-optimize this , please ! )
; If u make any improvements to this routine , please lemme know !
; hulk@compclub.itu.edu.tr      or      ee93064@students.itu.edu.tr
; these wont be valid for so long , i am about to graduate

.386                                    ; YESSSSSSSSSSSS !!!!!
        assume  cs:CSEG,ds:CSEG,ss:STAC
;------------------------------------------------------------------------------
CSEG segment dword public use16 'CODE'

        public  main
main    proc    near

;Initialization

        mov     ax,CSEG
        mov     ds,ax

;Start-----------------------------------------------------------------------

        call    set320x400

        mov     ax,0A000h
        mov     es,ax           ; Clear the memory
        mov     ax,0f02h        ; Select all planes
        mov     dx,3C4h
        out     dx,ax
        cld
        xor     di,di
        mov     ecx,16000       ; Thiz is enough
        mov     eax,7d7d7d7dh   ; I love thiz color
        rep     stosd

        xor     ax,ax           ; Wait for a key
        int     16h

;;;;;;; Show something

        mov     cx,80
        xor     di,di

        mov     ax,x1
        mov     dx,x2

laap1:                          ; Just a loop for demonstration

        push    cx
        mov     cx,ax

        pushad
        call    HLine
        popad

        add     di,4            ; Play with these values
        add     dx,3
        add     ax,1

        pop     cx
        dec     cx
        jnz    laap1

;;;;;;

        mov     di,322
        mov     cx,0
        mov     dx,320
laap2:                          ; Just another loop for demonstration

        cmp     di,400
        je      laap2end

        pushad
        call    HLine
        popad

        sub     dx,1
        add     cx,1
        add     di,1

        jmp     laap2

laap2end:

;End-------------------------------------------------------------------------

Exit2Dos:

        xor     ax,ax
        int     16h
        mov     ax,3
        int     10h

        mov     ax,4C00h ; Call DOS
        int     21h     ; to terminate

x1              dw      80      ; Start position for the first loop
x2              dw      81      ; End position
COLOR           dw      25

main    endp

;HLine  begins ----------------------------------------------------------------
;Input :        CX:x1   DX:x2   DI:y    (let it be EDI)

HLine   proc      near

        lea     edi,[edi+edi*4]         ; EDI = EDI * 5
        mov     bx,cx                   ; Save CX in BX
        shl     di,4                    ; DI = DI * 16 (now by 80)
        shr     bx,2                    ; Find the 1st plane
        sub     dx,cx                   ; Length is in DX now
        add     di,bx                   ; Adjust DI
        cmp     dx,3                    ; Length < 3 ?
        jl      HLine_less_than_3       ; It matters a lot !

        neg     cx                      ; Prepare for the 1st OUT
        and     cx,11b                  ; Still prepare
        jz      HLine_16lar_no3         ; x1 % 4 = 0 ?

        mov     ax,1111000000000010b    ; Prepare the AX
        sub     dx,cx                   ; How many left ?
        shr     ah,cl                   ; Which planes ?
        mov     cx,dx                   ; Keep it in CX
        and     ah,00001111b            ; Only low-4 bits

        mov     dx,3c4h                 ; It's time
        mov     bx,COLOR                ; BL = COLOR
        out     dx,ax                   ; 2 OUT
        mov     es:[di],bl              ; Store it !
        inc     di                      ; Next group
        test    cx,cx                   ; No more left ?
        jne     HLine_16lar_continue    ; Still some pixels
        ret

HLine_16lar_no3:
        mov     cx,dx                   ; If no3 then prepare 2 continue
        mov     bx,COLOR                ; BX = COLOR
        mov     dx,3c4h                 ; DX = port number

HLine_16lar_continue:
        mov     ax,0f02h                ; All planes
        mov     bh,bl                   ; BH = BL (did u know this trick ?!)
        out     dx,ax                   ; Select them now !
        mov     ax,bx                   ; Spoil COLOR into EAX
        mov     dx,cx                   ; DX = left pixels
        shl     eax,16                  ; Fill the upper part first
        shr     cx,3                    ; is there some 4_pixels ?
        mov     ax,bx                   ; EAX contains COLOR in each byte

        jnc     HLine_no_4_pixels       ; if no look for 8_pixels
        stosb                           ; Store 4 pixels at once if needed

HLine_no_4_pixels:
        shr     cx,1                    ; is there some 8_pixels ?
        jnc     HLine_no_8_pixels       ; if no look for 16_pixels
        stosw                           ; Store 8 pixels at once if needed

HLine_no_8_pixels:
        rep     stosd                   ; Store 16 pixels at once !!!
        and     dx,3                    ; Finished ?
        jnz     HLine_1_pixels          ; Still some of them !!!
        ret                             ; Yepppp !!!

HLine_1_pixels:                         ; Not yet !
        mov     ax,0ff02h               ; The last 1,2 or 3 pixels
        mov     cx,dx                   ; How many left ?
        shl     ah,cl                   ; Adjust AX
        mov     dx,3c4h                 ; What port ?
        not     ah                      ; Still Adjust AX
        out     dx,ax                   ; OUT it
        mov     es:[di],bl              ; Store 1,2 or 3 pixels at once !
        ret                             ; The End

HLine_less_than_3:                      ; I hate this part
        mov     ax,0102h                ; So
        and     cx,3                    ; I won't
        mov     bx,COLOR                ; Comment
        shl     ah,cl                   ; optimize thiz part
        dec     dx                      ; pleaseeeee !!!!
        mov     dx,3c4h
        jz      HLine_less_than_3_plot
        cmp     ah,8
        je      HLine_less_than_3_different_DI
        mov     ch,ah
        add     ah,ah
        add     ah,ch

HLine_less_than_3_plot:
        out     dx,ax
        mov     es:[di],bl
        ret

HLine_less_than_3_different_DI:
        out     dx,ax
        mov     es:[di],bl
        mov     ah,1
        inc     di
        out     dx,ax
        mov     es:[di],bl

HLine_end:                              ; Just 2 remind ?!
        ret

HLine   endp

;HLine  in sonu--------------------------------------------------------------

set320x400      proc    near

        mov     ax,0013h
        int     10h

        mov     dx,03CEh
        mov     al,05h
        out     dx,al
        inc     dx
        in      al,dx
        and     al,0EFh
        out     dx,al
        dec     dx
        mov     al,06h
        out     dx,al
        inc     dx
        in      al,dx
        and     al,0FDh
        out     dx,al
        mov     dx,03C4h
        mov     al,04h
        out     dx,al
        inc     dx
        in      al,dx
        and     al,0F7h
        or      al,04h
        out     dx,al
        mov     dx,03D4h
        mov     al,09h
        out     dx,al
        inc     dx
        in      al,dx
        and     al,70h
        out     dx,al
        dec     dx
        mov     al,14h
        out     dx,al
        inc     dx
        in      al,dx
        and     al,0BFh
        out     dx,al
        dec     dx
        mov     al,17h
        out     dx,al
        inc     dx
        in      al,dx
        or      al,40h
        out     dx,al

        ret

set320x400      endp

CSEG    ends

;----------------------------------------------------------------------------

STAC    segment para stack 'STACK'

        db   10  dup   ('S_T_A_C_')

STAC    ends

;----------------------------------------------------------------------------

        end main
