; math.inc - general math functions.

           public rotate        ; rotate using vmatrix
           public rotatenull    ; 16 bit rotate by ematrix
           public make3d        ; calculate 3d ?actual*?/z ( both x and y)
           public make3dx       ; xactual*x/z
           public make3dy       ; yactual*y/z
           public erotate       ; 32 bit rotate using ematrix
           public zsolve        ; solve single equation variable
           public ysolve
           public xsolve
           public cosign
           public sign
           public arctan
           public compound      ; generate rotation matrix (includes camera)
           public setsincose
           public temp_matrix
           public matrix_multiply

           public set_precal7
           public set_precal147
           public frotate
           public z16x
           public z16z

           public fzsolve
           public fxsolve
           public fysolve

           public precal1
           public precal4
           public precal7

           public sqrt          ; eax=sqr(eax), thanks to TRAN!
           public sqrax2bx2     ; ax = sqr(ax^2+bx^2)

           public pre_cal_lambert   ; scan object si and calculate surface normals
           public calc_normal       ; guess...from 3 points, returns vector ebx,ecx,ebp
           public lambert           ; calculate surface normal rotation matrix for object si
           public set_up_all_lambert; scans objects from si to di and calls pre_cal_lambert
           public lrotate           ; given normal for surface, figures out intensity

           public lx1               ; points to load up before calling calc_normal
           public ly1
           public lz1
           public lx2
           public ly2
           public lz2
           public lx3
           public ly3
           public lz3

; point rotation is 16 bit and uses vmatrix
; camera rotation is 32 bit and uses ematrix
; frotate uses rotation along a plane and uses ematrix with precal147

; point rotation
; bx = x   cx = y   bp = z    16 bit rotation!
; clobbers dx,si,ax

; remember , matrix offsets are:
;
;  0 1 2     multiply those by 4 four the doublewords
;  3 4 5
;  6 7 8
;
           align 16

rotate:
           mov ax,vmatrix+4    ; solve x = bx(0)+cx(1)+bp(2)
           imul bp
           shrd ax,dx,14
           movsx edi,ax
           mov ax,vmatrix+2
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax
           mov ax,vmatrix+0
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax   ; di = new x

           mov ax,vmatrix+10   ; solve y = bx(3)+cx(4)+bp(5)
           imul bp
           shrd ax,dx,14
           movsx esi,ax
           mov ax,vmatrix+8
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add esi,eax
           mov ax,vmatrix+6
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add esi,eax   ; si = new y

           mov ax,vmatrix+16   ; solve z = bx(6)+cx(7)+bp(8)
           imul bp
           shrd ax,dx,14
           movsx ebp,ax
           mov ax,vmatrix+14
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add ebp,eax
           mov ax,vmatrix+12
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add ebp,eax   ; bp = new z

           mov ecx,esi
           mov ebx,edi

           ret

; 16 bit rotate by ematrix - used when objects have no rotation/constant angle

           align 16

rotatenull:
           mov ax,w ematrix+4*2   ; solve x = bx(0)+cx(1)+bp(2)
           imul bp
           shrd ax,dx,14
           movsx edi,ax
           mov ax,w ematrix+2*2
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax
           mov ax,w ematrix+0*2
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax   ; di = new x

           mov ax,w ematrix+10*2   ; solve y = bx(3)+cx(4)+bp(5)
           imul bp
           shrd ax,dx,14
           movsx esi,ax
           mov ax,w ematrix+8*2
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add esi,eax
           mov ax,w ematrix+6*2
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add esi,eax   ; si = new y

           mov ax,w ematrix+16*2   ; solve z = bx(6)+cx(7)+bp(8)
           imul bp
           shrd ax,dx,14
           movsx ebp,ax
           mov ax,w ematrix+14*2
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add ebp,eax
           mov ax,w ematrix+12*2
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add ebp,eax   ; bp = new z

           mov ecx,esi
           mov ebx,edi

           ret

           align 16

; fast ratiox and ratioy are 320x and 464y
; multiplication has been substituted with fast lea

; trashes eax,edx,edi

make3d:                            ; bp must always be positive
           cmul eax,ebx,ratiox     ; use fast constant multiply

           idiv ebp
           mov ebx,eax
make3dy:
           cmul eax,ecx,ratioy

           idiv ebp
           mov ecx,eax

           ret

make3dx:                           ; bp must always be positive
           cmul eax,edi,ratiox

           idiv esi
           mov edi,eax

           ret

; checks if a side is visible.
; DI, SI, DX = x's
; BP, DS, ES = y's
; return: cx register...
; cx > 0: side visible...else not...routine courtesy of "RAZOR"
; eg:
;          call checkfront
;          cmp cx,0
;          jng dontdraw

           align 16

checkfront:
           cmp di,si
           jng s cfc2
           mov ax,di
           mov di,si
           mov si,dx
           mov dx,ax
           mov ax,bp
           mov bp,dsq
           mov bx,esq
           mov dsq,bx
           mov esq,ax
cfc:
           cmp di,si
           jng s cfc2
           mov ax,di
           mov di,si
           mov si,dx
           mov dx,ax
           mov ax,bp
           mov bp,dsq
           mov bx,esq
           mov dsq,bx
           mov esq,ax
cfc2:
           mov ax,dx                 ; ax = x3
           sub ax,di                 ; ax = x3 - x1
           mov bx,dsq                ; bx = y2
           sub bx,bp                 ; bx = y2 - y1
           movsx eax,ax              ; modification to allow large checks
           movsx ebx,bx
           imul ebx                  ; ax = (x3-x1)*(y2-y1)
           mov ecx,eax               ; save it...
           mov ax,si                 ; ax = x2
           sub ax,di                 ; ax = x2 - x1
           mov bx,esq                ; bx = y3
           sub bx,bp                 ; bx = y3 - y1
           movsx eax,ax
           movsx ebx,bx
           imul ebx                  ; ax = (x2-x1)*(y3-y1)
           sub ecx,eax               ; cx = (x3-x1)*(y2-y1)-(x2-x1)*(y3-y1)
           ret

; point rotation for eye - solves all x,y,z parameters
; ebx = x   ecx = y   ebp = z   32 bit rotation!
; clobbers dx,si,di,ax

           align 16

erotate:
           mov eax,ematrix+8
           imul ebp
           shrd eax,edx,14
           mov edi,eax
           if usez eq yes
           mov eax,ematrix+4
           imul ecx
           shrd eax,edx,14
           add edi,eax
           endif
           mov eax,ematrix+0
           imul ebx
           shrd eax,edx,14
           add edi,eax   ; di = new x

           mov eax,ematrix+20
           imul ebp
           shrd eax,edx,14
           mov esi,eax
           mov eax,ematrix+16
           imul ecx
           shrd eax,edx,14
           add esi,eax
           mov eax,ematrix+12
           imul ebx
           shrd eax,edx,14
           add esi,eax   ; si = new y

           mov eax,ematrix+32
           imul ebp
           shrd eax,edx,14
           mov ebp,eax
           mov eax,ematrix+28
           imul ecx
           shrd eax,edx,14
           add ebp,eax
           mov eax,ematrix+24
           imul ebx
           shrd eax,edx,14
           add ebp,eax   ; bp = new z

           mov ecx,esi
           mov ebx,edi

           ret

; solve z from ematrix - same as above erotate but only solves z for fast
; test of where object is - result is in esi

           align 16

zsolve:
           mov eax,ematrix+32
           imul ebp
           shrd eax,edx,14
           mov esi,eax
           mov eax,ematrix+28
           imul ecx
           shrd eax,edx,14
           add esi,eax
           mov eax,ematrix+24
           imul ebx
           shrd eax,edx,14
           add esi,eax   ; si = new z
           ret

; if object z test from above routine is positive, this routine will solve
; the rest of the rotation matrix.  this is so we don't waste time solving
; for x and y locations if the object is not within screen parameters.
; saves imuls

           align 16
xsolve:
           mov eax,ematrix+8
           imul ebp
           shrd eax,edx,14
           mov edi,eax
           if usez eq yes
           mov eax,ematrix+4
           imul ecx
           shrd eax,edx,14
           add edi,eax
           endif
           mov eax,ematrix+0
           imul ebx
           shrd eax,edx,14
           add edi,eax   ; di = new x
           ret

           align 16
ysolve:
           mov eax,ematrix+16
           imul ecx
           shrd eax,edx,14
           mov ecx,eax
           mov eax,ematrix+12
           imul ebx
           shrd eax,edx,14
           add ecx,eax
           mov eax,ematrix+20
           imul ebp
           shrd eax,edx,14
           add ecx,eax   ; cx = new y

           mov ebx,edi   ; final test, move into appropriate regs
           mov ebp,esi

           ret

; calculate sign into eax, from ax, smashes bx
; after imul by sign, shr eax,14 to compensate for decimal factor!

           align 16

cosign:
           add ax,4000h
sign:
           shr ax,2
           cmp ax,2000h
           jge s q3o4         ; quadrant 3 or 4

           cmp ax,1000h
           jl s q0            ; quad 1

           mov ebx,1fffh
           sub bx,ax
           jmp s halfsign     ; quad 2
q0:
           movzx ebx,ax
           jmp s halfsign
q3o4:
           cmp ax,3000h
           jl s q3
           mov ebx,3fffh
           sub bx,ax
           call halfsign      ; quad 4
           neg eax
           ret
q3:
           and ax,0fffh
           movzx ebx,ax       ; quad 3
           call halfsign
           neg eax
           ret
halfsign:
           movzx eax,w sinus[ebx*2]
           ret

; arctan, ecx=rise,eax=run, returns ax as angle of triangle
; smashes cx,ax,dx,si

           align 16

arctan:
           cmp eax,0
           jl s qd2or3
           cmp ecx,0
           jge s halftax      ; quadrant 1
           neg ecx            ; quadrant 4, ax=-ax
           call halftan
           neg ax
           shl ax,2
           ret
qd2or3:
           neg eax
           cmp ecx,0
           jge s qd2
           neg ecx            ; quad 3, ax=ax+8192
           call halftan
           add ax,8192
           shl ax,2
           ret
qd2:
           call halftan
           neg ax
           add ax,8192
           shl ax,2
           ret
halftax:
           call halftan
           shl ax,2
           ret

           align 16

halftan:
           xor edx,edx

; cx=rise  positive
; ax=run   positive

           cmp eax,ecx
           jl s opptan        ; greater than 45 degrees, other side...

           xchg ecx,eax       ; ax<cx
           shld edx,eax,11    ; *2048 edx = high dword for divide
           shl eax,11         ; *2048
           div ecx
           movzx esi,ax
           mov ax,w negtan[esi*2] ; resulting angle (0-512 is 0-45) in ax
           ret

           align 16

opptan:
           shld edx,eax,11    ; *2048 edx = high dword for divide
           shl eax,11         ; *2048

           div ecx
           movzx esi,ax       ; ax remainder
           mov cx,w negtan[esi*2]
           mov ax,1000h
           sub ax,cx          ; resulting angle (2048-4096 is 45-90) in ax
           ret

           align 16

; generate object matrix, 12 imul's first

;              x                         y                      z
;
;x=  cz * cy - sx * sy * sz   - sz * cy - sx * sy * cz     - cx * sy
;
;y=         sz * cx                   cx * cz                - sx
;
;z=  cz * sy + sx * sz * cy   - sy * sz + sx * cy * cz       cx * cy
;
;then perform matrix multiply by negative x and z matricies
;
; -x matrix                             -z matrix
;     x       y       z                   x       y       z
;
;x    1       0       0                cz     sz       0
;
;y    0      cx       sx              -sz     cz       0
;
;z    0     -sx       cx                0      0       1
;
; notice original object matrix takes 12 imuls, camera modify takes 24, can
; you do this faster? (less imuls)

compound:
           push esi

           mov ax,vxs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosx,ax
           pop ax
           call sign
           mov vsinx,ax
           mov ebp,eax            ; bp = sx
           neg ax
           mov [vmatrix+10],ax

           mov ax,vzs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosz,ax
           mov edi,eax            ; di = cz
           pop ax
           call sign
           mov vsinz,ax
           mov edx,eax            ; dx = sz

           mov ax,vys[esi*2]
           neg ax
           add ax,eyeay
           push ax
           call cosign
           mov vcosy,ax
           pop ax
           call sign
           mov vsiny,ax           ; ax = sy

           mov bx,dx              ; save sz

           mov cx,ax              ; save sy

           imul bx                ; bx = - sy * sz
           shrd ax,dx,14
           movsx ebx,ax
           neg bx
           mov [vmatrix+14],bx

           mov ax,cx              ; si = cz * sy
           imul di
           shrd ax,dx,14
           mov si,ax
           mov [vmatrix+12],si

           mov ax,vcosy

           imul di                ; di = cy * cz
           shrd ax,dx,14
           mov di,ax
           mov [vmatrix+0],di

           mov ax,vsinz
           mov cx,vcosy

           imul cx                ; cx = - sz * cy
           shrd ax,dx,14
           mov cx,ax
           neg cx
           mov [vmatrix+2],cx

           mov ax,bp
           imul si
           shrd ax,dx,14
           mov si,ax
           neg esi
           add [vmatrix+2],si

           mov ax,bp
           imul di
           shrd ax,dx,14
           mov di,ax
           add [vmatrix+14],di

           mov ax,bp
           imul bx
           shrd ax,dx,14
           mov bx,ax
           add [vmatrix+0],bx

           mov ax,bp
           imul cx
           shrd ax,dx,14
           mov cx,ax
           neg cx
           add [vmatrix+12],cx

           mov si,vcosx

           mov ax,vcosy
           imul si                   ; cx * cy
           shrd ax,dx,14
           mov [vmatrix+16],ax

           mov ax,vsiny
           imul si                   ;-cx * sy
           shrd ax,dx,14
           neg ax
           mov [vmatrix+4],ax

           mov ax,vsinz
           imul si                    ; cx * sz
           shrd ax,dx,14
           mov [vmatrix+6],ax

           mov ax,vcosz
           imul si                    ; cx * cz
           shrd ax,dx,14
           mov [vmatrix+8],ax

           mov edi,ecosx              ; now perform camera x rotation,12 imuls
           mov esi,esinx
           mov ebp,esi
           neg ebp

           mov ax,[vmatrix+6]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+12]
           imul si
           shrd ax,dx,14

           add ecx,eax                ; ecx = new vmatrix+12

           mov ax,[vmatrix+6]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+12]
           imul di
           shrd ax,dx,14

           add ebx,eax                ; ebx = new vmatrix+24

           mov [vmatrix+6],cx
           mov [vmatrix+12],bx

           mov ax,[vmatrix+8]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+14]
           imul si
           shrd ax,dx,14

           add cx,ax                ; ecx = new vmatrix+16

           mov ax,[vmatrix+8]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+14]
           imul di
           shrd ax,dx,14

           add bx,ax                ; ebx = new vmatrix+28

           mov [vmatrix+8],cx
           mov [vmatrix+14],bx

           mov ax,[vmatrix+10]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+16]
           imul si
           shrd ax,dx,14

           add ecx,eax                ; ecx = new vmatrix+20

           mov ax,[vmatrix+10]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+16]
           imul di
           shrd ax,dx,14

           add ebx,eax                ; ebx = new vmatrix+32

           mov [vmatrix+10],cx
           mov [vmatrix+16],bx

           if usez eq yes

           mov edi,ecosz              ; now perform camera z rotation,12 imuls
           mov esi,esinz
           mov ebp,esi
           neg esi

           mov ax,[vmatrix+0]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+6]
           imul si
           shrd ax,dx,14

           add cx,ax

           mov ax,[vmatrix+0]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+6]
           imul di
           shrd ax,dx,14
           movsx eax,ax

           add ebx,eax

           mov [vmatrix+0],cx
           mov [vmatrix+6],bx

           mov ax,[vmatrix+2]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+8]
           imul si
           shrd ax,dx,14

           add ecx,eax

           mov ax,[vmatrix+2]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+8]
           imul di
           shrd ax,dx,14

           add bx,ax

           mov [vmatrix+2],cx
           mov [vmatrix+8],bx

           mov ax,[vmatrix+4]
           imul di
           shrd ax,dx,14
           mov cx,ax

           mov ax,[vmatrix+10]
           imul si
           shrd ax,dx,14

           add ecx,eax

           mov ax,[vmatrix+4]
           imul bp
           shrd ax,dx,14
           mov bx,ax

           mov ax,[vmatrix+10]
           imul di
           shrd ax,dx,14

           add bx,ax

           mov [vmatrix+4],cx
           mov [vmatrix+10],bx

           endif

           pop esi

           ret

; generate rotation matrix for  y,x,z  camera rotation
; called only once every frame.  completed in 12 multiplys
; matrix is also used for objects with no rotation (always angle 0,0,0)
;
; where is my postcard! see readme.doc for info.
;
;              x                    y                    z
;
;x=  cz * cy + sx * sy * sz     -cx * sz     - sy * cz + sx * cy * sz
;
;y=  sz * cy - sx * sy * cz      cx * cz     - sy * sz - sz * cy * cz
;
;z=         cx * sy                 sx                cx * cy
;

;  matrix offsets: (doublewords)
;
;     x  y  z
;
;x    0  4  8
;y   12 16 20
;z   24 28 32

           align 16

setsincose:

           mov ax,eyeax
           call cosign
           mov ecosx,eax          ; ecosx and such are used by object rotation
           mov ax,eyeax           ; ematrix is used to find where object is
           call sign
           mov esinx,eax
           mov [ematrix+28],eax
           mov ebp,eax            ; bp = sx

           if usez eq yes
           mov ax,eyeaz
           call cosign
           mov ecosz,eax
           mov edi,eax            ; di = cz
           mov ax,eyeaz
           call sign
           mov esinz,eax
           mov edx,eax            ; dx = sz
           endif

           if usez eq no
           mov edi,4000h          ; di = cos 0
           mov ecosz,4000h
           xor edx,edx            ; dx = sin 0
           mov esinz,0
           endif

           mov ax,eyeay
           call cosign
           mov ecosy,eax
           mov ax,eyeay
           call sign
           mov esiny,eax          ; ax = sy

           mov ebx,edx            ; save sz

           mov ecx,eax            ; save sy

           imul bx                ; bx = sy * sz
           shrd ax,dx,14
           movsx ebx,ax
           neg ebx
           mov [ematrix+20],ebx
           neg ebx

           mov eax,ecx            ; si = - (cz * sy)
           imul di
           shrd ax,dx,14
           movsx esi,ax
           neg esi
           mov [ematrix+8],esi

           mov eax,ecosy

           imul di                ; di = cy * cz
           shrd ax,dx,14
           movsx edi,ax
           mov [ematrix+0],edi

           mov eax,esinz
           mov ecx,ecosy

           imul cx                ; cx = sz * cy
           shrd ax,dx,14
           movsx ecx,ax
           mov [ematrix+12],ecx

           mov eax,ebp
           imul si
           shrd ax,dx,14
           movsx esi,ax
           add [ematrix+12],esi

           mov eax,ebp
           imul di
           shrd ax,dx,14
           movsx edi,ax
           neg edi
           add [ematrix+20],edi

           mov eax,ebp
           imul bx
           shrd ax,dx,14
           movsx ebx,ax
           add [ematrix+0],ebx

           mov eax,ebp
           imul cx
           shrd ax,dx,14
           movsx ecx,ax
           add [ematrix+8],ecx

           mov esi,ecosx

           mov eax,ecosy
           imul si                    ; cx * cy
           shrd ax,dx,14
           movsx eax,ax
           mov [ematrix+32],eax

           mov eax,esiny
           imul si                    ; cx * sy
           shrd ax,dx,14
           movsx eax,ax
           mov [ematrix+24],eax

           mov eax,esinz
           imul si                    ;-cx * sz
           shrd ax,dx,14
           movsx eax,ax
           neg eax
           mov [ematrix+4],eax

           mov eax,ecosz
           imul si                    ; cx * cz
           shrd ax,dx,14
           movsx eax,ax
           mov [ematrix+16],eax

           neg esinx                  ; reverse angles for object rotation
           neg esiny

           ret

; generate temp matrix, 12 imul's, from object esi

;              x                         y                      z
;
;x=  cz * cy - sx * sy * sz   - sz * cy - sx * sy * cz     - cx * sy
;
;y=         sz * cx                   cx * cz                - sx
;
;z=  cz * sy + sx * sz * cy   - sy * sz + sx * cy * cz       cx * cy
;

temp_matrix:
           push esi

           mov ax,vxs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosx,ax
           pop ax
           call sign
           mov vsinx,ax
           mov ebp,eax            ; bp = sx
           neg ax
           mov [tmatrix+10],ax

           mov ax,vzs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosz,ax
           mov edi,eax            ; di = cz
           pop ax
           call sign
           mov vsinz,ax
           mov edx,eax            ; dx = sz

           mov ax,vys[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosy,ax
           pop ax
           call sign
           mov vsiny,ax           ; ax = sy

           mov bx,dx              ; save sz

           mov cx,ax              ; save sy

           imul bx                ; bx = - sy * sz
           shrd ax,dx,14
           movsx ebx,ax
           neg bx
           mov [tmatrix+14],bx

           mov ax,cx              ; si = cz * sy
           imul di
           shrd ax,dx,14
           mov si,ax
           mov [tmatrix+12],si

           mov ax,vcosy

           imul di                ; di = cy * cz
           shrd ax,dx,14
           mov di,ax
           mov [tmatrix+0],di

           mov ax,vsinz
           mov cx,vcosy

           imul cx                ; cx = - sz * cy
           shrd ax,dx,14
           mov cx,ax
           neg cx
           mov [tmatrix+2],cx

           mov ax,bp
           imul si
           shrd ax,dx,14
           mov si,ax
           neg esi
           add [tmatrix+2],si

           mov ax,bp
           imul di
           shrd ax,dx,14
           mov di,ax
           add [tmatrix+14],di

           mov ax,bp
           imul bx
           shrd ax,dx,14
           mov bx,ax
           add [tmatrix+0],bx

           mov ax,bp
           imul cx
           shrd ax,dx,14
           mov cx,ax
           neg cx
           add [tmatrix+12],cx

           mov si,vcosx

           mov ax,vcosy
           imul si                   ; cx * cy
           shrd ax,dx,14
           mov [tmatrix+16],ax

           mov ax,vsiny
           imul si                   ;-cx * sy
           shrd ax,dx,14
           neg ax
           mov [tmatrix+4],ax

           mov ax,vsinz
           imul si                    ; cx * sz
           shrd ax,dx,14
           mov [tmatrix+6],ax

           mov ax,vcosz
           imul si                    ; cx * cz
           shrd ax,dx,14
           mov [tmatrix+8],ax

           pop esi

           ret

; multiply tmatrix by vmatrix, [vmatrix]=[tmatrix][vmatrix]
;
; [ tmatrix+ 0 tmatrix+ 2 tmatrix+ 4 ] [ vmatrix+ 0 vmatrix+ 2 vmatrix+ 4 ]
; [                                  ] [                                  ]
; [ tmatrix+ 6 tmatrix+ 8 tmatrix+10 ] [ vmatrix+ 6 vmatrix+ 8 vmatrix+10 ]
; [                                  ] [                                  ]
; [ tmatrix+12 tmatrix+14 tmatrix+16 ] [ vmatrix+12 vmatrix+14 vmatrix+16 ]
;

matrix_multiply:

           mov bx,[tmatrix+0]
           mov cx,[tmatrix+2]
           mov bp,[tmatrix+4]

           mov ax,[vmatrix+0]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+6]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+12]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+0

           mov ax,[vmatrix+2]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+8]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+14]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+2

           mov ax,[vmatrix+4]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+10]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+16]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+4

           mov bx,[tmatrix+6]
           mov cx,[tmatrix+8]
           mov bp,[tmatrix+10]

           mov ax,[vmatrix+0]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+6]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+12]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+6

           mov ax,[vmatrix+2]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+8]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+14]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+8

           mov ax,[vmatrix+4]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+10]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+16]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+10

           mov bx,[tmatrix+12]
           mov cx,[tmatrix+14]
           mov bp,[tmatrix+16]

           mov ax,[vmatrix+0]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+6]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+12]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+12

           mov ax,[vmatrix+2]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+8]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+14]
           imul bp
           shrd ax,dx,14
           add si,ax

           push si             ; tmatrix+14

           mov ax,[vmatrix+4]
           imul bx
           shrd ax,dx,14
           mov si,ax

           mov ax,[vmatrix+10]
           imul cx
           shrd ax,dx,14
           add si,ax

           mov ax,[vmatrix+16]
           imul bp
           shrd ax,dx,14
           add si,ax

;          push si             ; tmatrix+16

;          pop si
           mov [vmatrix+16],si
           pop si
           mov [vmatrix+14],si
           pop si
           mov [vmatrix+12],si
           pop si
           mov [vmatrix+10],si
           pop si
           mov [vmatrix+ 8],si
           pop si
           mov [vmatrix+ 6],si
           pop si
           mov [vmatrix+ 4],si
           pop si
           mov [vmatrix+ 2],si
           pop si
           mov [vmatrix+ 0],si
           ret

;getroot:                      ; get square root of ax, where ax = 0-65535
;           cmp ax,0fe01h      ; since ax cannot be negative anyway!
;           jae sqr255         ; routine requires squares tables.
;           mov si,offset squares
;           mov cx,ax
;           inc cx
;           cld
;nextroot:
;           lodsw
;           cmp ax,cx
;           jbe  nextroot      ; jb is exact but jbe is better approximation
;           mov ax,si
;           sub ax,offset squares+3
;           sar ax,1
;           ret
;sqr255:
;           mov ax,255
;           ret

; routine courtesy TRAN
;
; square root
; in:
;   eax - number to take root of
; out:
;   eax - root
;
sqrtbasetbl db 0,1,4,9,16,25,36,49,64,81,100,121,144,169,196,225

           public sqrt

           align 16

sqrt:
           pushad
           mov ebp,eax
           bsr ebx,eax
           jnz short sqrtf0
           xor ebx,ebx
sqrtf0:
           shr ebx,3
           lea eax,[ebx*8]
           mov cl,32
           sub cl,al
           rol ebp,cl
           mov eax,ebp
           movzx eax,al
           mov edi,offset sqrtbasetbl
           mov ecx,10h
sqrtl0:
           scasb
           je short sqrtl0d
           jb short sqrtl0d2
           loop sqrtl0
           inc edi
sqrtl0d2:
           dec edi
           inc cl
sqrtl0d:
           movzx edx,byte ptr [edi-1]
           dec cl
           xor cl,0fh
           mov edi,ecx
           mov ecx,ebx
           jecxz short sqrtdone
           sub eax,edx
sqrtml:
           shld eax,ebp,8
           rol ebp,8
           mov ebx,edi
           shl ebx,5
           xor edx,edx
           mov esi,eax
           div ebx
           rol edi,4
           add edi,eax
           add ebx,eax
sqrtf2:
           imul eax,ebx
           mov edx,eax
           mov eax,esi
           sub eax,edx
           jc short sqrtf1
           loop sqrtml
sqrtdone:
           mov [esp+28],edi
           popad
           ret
sqrtf1:
           dec ebx
           dec edi
           movzx eax,bl
           and al,1fh
           jmp sqrtf2

; solve for z when x = bx (takes a point/object on a y plane and figures
; where it would be if z = 16, y = precal7, only good for square translations)
; uses ematrix as rotation matrix

; cs set if not possible

; formula is inverse of z = bx6+cx7+bp8. where precal7 is y*7
; (16-bx(6)-precal7)/(8)=z

            align 16

precal1     dd 0
precal4     dd 0
precal7     dd 0

z16x:
            cmp ematrix+32,0
            je abort_attempt

            mov eax,ematrix+24
            imul ebx
            shrd eax,edx,14

            neg eax
            add eax,16
            sub eax,precal7

            cdq
            shld edx,eax,14
            mov ebx,ematrix+32
            idiv ebx

            stc
            ret
abort_attempt:
            clc
            ret

; solve for x when z = bp (takes a point/object on a y plane and figures
; where it would be if z = 16, y = precal7, only good for square translations)
; uses ematrix as rotation matrix. output solves for z = 16

; cs set if not possible

; formula is inverse of z = bx6+cx7+bp8. where precal7 is y*7
; (16-bp(8)-precal7)/(6)=x

            align 16

z16z:
            cmp ematrix+24,0
            je abort_attempt

            mov eax,ematrix+32
            imul ebp
            shrd eax,edx,14

            neg eax
            add eax,16
            sub eax,precal7

            cdq
            shld edx,eax,14
            mov ebx,ematrix+24
            idiv ebx

            stc
            ret

; set precal7 for plane transformation - plane is ecx and allows above formulas
; to determine where a point/object would be along that plane if z is negative

; good for runway translations or super huge background polygons - not used
; by regular 3d.asm routines

; how to use: lets say you've got a billion background objects that are
; on the ground (or all on the same y plane).  you call set_precal147
; with that y plane location and use frotate instead of erotate to
; determine where points rotated along that plane will end up.  this
; speeds the routine up by 33% by cutting out 3 imuls.

            align 16

set_precal147:
            if usez eq yes
            mov eax,ecx
            sub eax,eyey
            imul ematrix+4
            shrd eax,edx,14
            mov precal1,eax
            endif

            mov eax,ecx
            sub eax,eyey
            imul ematrix+16
            shrd eax,edx,14
            mov precal4,eax

set_precal7:
            sub ecx,eyey
            mov eax,ecx
            imul ematrix+28
            shrd eax,edx,14
            mov precal7,eax
            ret

            align 16

; fast object/point rotation along pre-calculated y plane

frotate:
           mov eax,ematrix+8
           imul ebp
           shrd eax,edx,14
           mov edi,eax
           if usez eq yes
           add edi,precal1
           endif
           mov eax,ematrix+0
           imul ebx
           shrd eax,edx,14
           add edi,eax   ; di = new x

           mov eax,ematrix+20
           imul ebp
           shrd eax,edx,14
           mov esi,eax
           add esi,precal4
           mov eax,ematrix+12
           imul ebx
           shrd eax,edx,14
           add esi,eax   ; si = new y

           mov eax,ematrix+32
           imul ebp
           shrd eax,edx,14
           mov ebp,eax
           add ebp,precal7
           mov eax,ematrix+24
           imul ebx
           shrd eax,edx,14
           add ebp,eax   ; bp = new z

           mov ecx,esi
           mov ebx,edi

           ret

; fast solve for single matrix variable similar to erotate but uses frotate
; plane matrix with precal147
;
; remember , matrix offsets are:
;
;  0 1 2     multiply those by 4 for the doublewords
;  3 4 5
;  6 7 8
;
           align 16

fzsolve:
           mov eax,ematrix+32      ; solve z = bx(6)+cx(7)+bp(8)
           imul ebp
           shrd eax,edx,14
           mov esi,eax
           add esi,precal7
           mov eax,ematrix+24
           imul ebx
           shrd eax,edx,14
           add esi,eax   ; si = new z
           ret

           align 16
fxsolve:
           mov eax,ematrix+8        ; solve x = bx(0)+cx(1)+bp(2)
           imul ebp
           shrd eax,edx,14
           mov edi,eax
           if usez eq yes
           add edi,precal1
           endif
           mov eax,ematrix+0
           imul ebx
           shrd eax,edx,14
           add edi,eax   ; di = new x
           ret

           align 16
fysolve:
           mov eax,ematrix+20       ; solve y = bx(3)+cx(4)+bp(5)
           imul ebp
           shrd eax,edx,14
           mov ecx,eax
           add ecx,precal4
           mov eax,ematrix+12
           imul ebx
           shrd eax,edx,14
           add ecx,eax   ; cx = new y

           mov ebx,edi
           mov ebp,esi

           ret

; generate lambert shading 1x3 matrix, completed in 6 imuls
;
;z= ( sz ( cx + ( sx * cy )) + cz * sy ) * 45degrees  [x]
;   ( cz ( cx + ( sx * cy )) - sz * sy ) * 45degrees  [y]
;   ( cx * cy - sx ) * 45 degrees                     [z]
;
;note cos45=sin45=2d41h, but we will use 2d00h (99.2% accurate)
; you can change the y angle of the sun/light but not the x angle.
; changing the x angle would require a new formula.
;
lambert:
           mov ax,vxs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosx,ax
           pop ax
           call sign
           mov vsinx,ax
           mov bp,ax            ; bp = sx

           mov ax,vzs[esi*2]
           neg ax
           push ax
           call cosign
           mov vcosz,ax
           mov di,ax            ; di = cz
           pop ax
           call sign
           mov vsinz,ax
           mov dx,ax            ; edx = sz

           mov ax,vys[esi*2]
           neg ax
           add ax,y_angle_of_sun  ; 2000h = 45 degrees y angle for light source
           push ax
           call cosign
           mov vcosy,ax
           mov si,ax            ; si = cy
           pop ax
           call sign
           mov vsiny,ax          ; ax = sy

           mov bx,dx            ; bx = sz

           mov cx,ax            ; cx = sy

           mov ax,bp            ; get sx

           imul si                ; eax = sx * cy
           shrd ax,dx,14
           sub ax,vcosx          ; eax = cx + ( sx * cy)

           push ax

           imul bx                ; cx + ( sx * cy) * sz
           shrd ax,dx,14
           mov [lmatrix+0],ax

           pop ax

           imul di                ; di = cz
           shrd ax,dx,14
           mov [lmatrix+2],ax   ; cx + ( sx * cy) * cz

           mov ax,bx
           imul cx                ; - sz * sy
           shrd ax,dx,14
           sub [lmatrix+2],ax

           mov ax,di
           imul cx                ; cz * sy
           shrd ax,dx,14
           add [lmatrix+0],ax

           mov ax,vcosx          ; (cx * cy - sx) * 45deg
           imul si
           shrd ax,dx,14
           mov bx,ax
           add bx,bp

           movsx ebx,bx
           cmul eax,ebx,2d00h     ; * 45degrees
           shr eax,14
           movsx eax,ax
           mov [lmatrix+4],ax

           movsx ebx,[lmatrix+2]
           cmul eax,ebx,2d00h
           shr eax,14
           movsx eax,ax
           mov [lmatrix+2],ax

           movsx ebx,[lmatrix+0]
           cmul eax,ebx,2d00h
           shr eax,14
           mov [lmatrix+0],ax

           ret

; pre-calculate surface normals for object edi

; x2 = x2 - x1
; y2 = y2 - y1
; z2 = z2 - z1
;
; x3 = x3 - x1
; y3 = y3 - y1
; z3 = z3 - z1
;
; x = y2 * z3 - z2 * y3
; y = z2 * x3 - x2 * z3
; z = x2 * y3 - y2 * x3
;
; a = SQR(x ^ 2 + y ^ 2 + z ^ 2)
;
; x = INT(x / a * 256 + .5)
; y = INT(y / a * 256 + .5)
; z = INT(z / a * 256 + .5)

lx1  dd 0
ly1  dd 0
lz1  dd 0

lx2  dd 0
ly2  dd 0
lz2  dd 0

lx3  dd 0
ly3  dd 0
lz3  dd 0

finx dd 0
finy dd 0
finz dd 0

; precalculate surface normals for object di.  this  is  so  you  don't
; have to type them in when designing new objects. imagine, 400 points,
; with 350 surfaces, calculating them all manually?  this routine  also
; figures out the iteration skip offset (if you have surfaces dependant
; on other surfaces) and also sets bit 1 if it is a line (two  points),
; and sets bit 4 if 1 point.this routine also sets the number of points
; to skip if an iteration is found. it  counts  the  number  of  points
; within iterations (even iterations within iterations)  and  sets  the
; skip value so any iterations skipped will have a pre-calculated point
; offset.  did that make sense?
;
; things done here:
;
; set point command if only 1 connection
; set line  command if only 2 connections
; set normal bit in commands if shading used in texture
; calculate and set shading normals
; calculate offsets for iteration jumps (in case surface not visible)
; calculate number of points to skip for iterations (in case surface not visible)
;
; most of the above is done so the user (you) wont have to calculate this stuff
; yourself - makes object modification much easier.
;
; if you find the routine to be sloppy remember it is only used
; for object initialization.

pre_cal_lambert:
           movzx edi,di           ; in case user is lazy
           mov esi,objbase[edi*4]
more_reses:
           push esi               ; save header offset
           add esi,4
           lodsd
           add esi,eax            ; handle first resolution

           lodsw
           mov numpoints,ax
           lodsw
           mov numsides,ax
           add esi,50             ; skip future use bytes
           mov edi,2              ; edi=2 to skip center of gravity
           mov xp,0
           mov yp,0
           mov zp,0
lam_ap12:
           mov bx,w [esi]         ; load all the points into array
           mov cx,w [esi+2]
           mov bp,w [esi+4]
           mov xp[edi],bx
           mov yp[edi],cx
           mov zp[edi],bp
           add esi,6
           add di,2
           dec numpoints
           jne s lam_ap12         ; esi = address of sides now...

           mov pointindex,di

lam_loadsides:
           mov edi,esi            ; save in case of line adjust (+16)

           mov ax,[esi]           ; get command

           mov bx,ax              ; save command
           test ax,himap          ; test if bitmap
           jz s lam_notmap        ; no, skip through loop
           add esi,8+2            ; yes, bitmap, skip 4 words + command
           jmp s lam_test_iteration ; go to next side
lam_notmap:
           mov ax,[esi+2]         ; get texture for both sides
           or  ax,[esi+4]

           test ax,shade          ; test shading bit
           jnz s lam_calcit       ; yes, calculate shading normal

           add esi,4+4+2          ; skip 2 colour & 2 texture words & command

           lodsw                  ; get first point indexer
           mov cx,ax
           mov dx,0
lam_ldlp:
           lodsw                  ; count number of connection points
           inc dx
           cmp ax,cx
           jne lam_ldlp

           cmp dx,1               ; only 1 point?, set +64
           jne lam_test_line
           or  w [edi+2],point
           or  w [edi+4],point
           jmp lam_test_iteration

lam_test_line:
           cmp dx,2               ; only 2 points?, set +16
           jne lam_test_iteration
           or  w [edi+2],line
           or  w [edi+4],line

lam_test_iteration:
           test bx,iterate        ; test if iteration command used
           jnz lam_do_it          ; yes,solve internal iteration
lam_next:
           dec numsides
           jnz lam_loadsides

           pop esi
           lodsd
           add esi,4
           cmp eax,-1             ; last resolution?
           jne more_reses

           ret

lam_calcit:
           push esi               ; save command location
           add esi,4+4+2          ; skip colour and 2 future use words

           lodsw                  ; first point
           push ax
           movzx edi,ax
           shl edi,1

           movsx ebx,[xp+edi]
           movsx ecx,[yp+edi]
           movsx ebp,[zp+edi]

           mov lx1,ebx
           mov ly1,ecx
           mov lz1,ebp

           lodsw                  ; second point
           movzx edi,ax
           shl edi,1

           movsx ebx,[xp+edi]
           movsx ecx,[yp+edi]
           movsx ebp,[zp+edi]

           mov lx2,ebx
           mov ly2,ecx
           mov lz2,ebp

           lodsw                  ; third point
           movzx edi,ax
           shl edi,1

           movsx ebx,[xp+edi]
           movsx ecx,[yp+edi]
           movsx ebp,[zp+edi]

           mov lx3,ebx
           mov ly3,ecx
           mov lz3,ebp

           push esi

           call calc_normal

           pop esi

           pop dx                 ; now find shading normal storage, pop first connector

lam_ldl2:
           lodsw
           cmp ax,dx
           jne lam_ldl2

           mov edi,esi

           mov ax,bx
           stosw
           mov ax,cx
           stosw
           mov ax,bp
           stosw

           add esi,6

           pop edi         ; get original command location back
           or w [edi],normal
           mov bx,[edi]
           jmp lam_test_iteration

lam_surfc_cnt dw 0

; this finds the total number of points to skip if an iteration fails, dx = #
; remember, this is a pre-calculation routine so it doesn't need to be fast.

lam_do_it:
           mov lam_surfc_cnt,0
           mov dx,0
           push esi        ; this is our return address (continue from here+4)

           lodsw           ; get number of points.
           add dx,ax       ; save as TOTAL number of points to skip
           mov numpoints,ax

           lodsw           ; get number of surfaces
           add lam_surfc_cnt,ax ; count until this is zero
           add esi,25*2

           movzx edi,pointindex
           cmp numpoints,0
           je lam_test_check        ; only sides added, no additional points
lam_ap13:
           mov bx,w [esi]         ; load all the points into array
           mov cx,w [esi+2]       ; for calculation of gourad shadings
           mov bp,w [esi+4]
           mov xp[edi],bx
           mov yp[edi],cx
           mov zp[edi],bp
           add esi,6
           add di,2
           dec numpoints
           jne s lam_ap13         ; esi = address of sides now...

           mov pointindex,di

lam_test_check:
           cmp lam_surfc_cnt,0    ; test if user just wants to add points
           je lam_no_surfs        ; i dont know why anyone would want to do this?

lam_test_until_target:
           lodsw                  ; get command
           mov bx,ax
           test ax,himap          ; test if bitmap
           jz s lam_notmap_it     ; no, skip through loop
           add esi,8              ; yes, bitmap, skip 4 words
           jmp s lam_nog

lam_notmap_it:
           lodsw
           mov bp,ax
           lodsw
           or bp,ax               ; find if shading bit used, add esi,6 if so
           add esi,4              ; skip 2 colour words

           lodsw                  ; get first point indexer
           mov cx,ax
lam_ldl3:
           lodsw
           cmp ax,cx
           jne lam_ldl3

           test bp,shade          ; test if gouraud normal present
           jz lam_nog
           add esi,6              ; skip it if present
lam_nog:
           test bx,iterate        ; test if iteration command used
           jnz lam_re_lam         ; solve internal iteration again...
lam_next_it:
           dec lam_surfc_cnt
           jnz lam_test_until_target
lam_no_surfs:
           mov edi,esi            ; save current location
           pop esi                ; return original start location
           sub edi,esi            ; get difference between them
           sub di,8

           lodsw                  ; get number of points
           mov bx,ax

           lodsw
           add numsides,ax

           mov cx,dx
           mov ax,6
           imul bx
           movzx ebx,ax

           mov ax,di
           mov edi,esi
           stosw                  ; save offset
           mov ax,cx
           stosw                  ; save number of points found in iterations

           add esi,25*2           ; adjust for next load
           add esi,ebx

           jmp lam_next

lam_re_lam:
           lodsw           ; get number of points for recursed iteration
           add dx,ax       ; save as TOTAL number of points to skip
           mov cx,ax

           lodsw           ; get number of surfaces
           add lam_surfc_cnt,ax ; count until this is zero
           add esi,25*2

           mov eax,6
           imul cx
           add esi,eax

           jmp lam_next_it

; calculate surface normal given points l1,l2,l3.  result is
; vector in ebx,ecx,ebp.  this worked for me on the first try!

calc_normal:
           mov ebx,lx1
           mov ecx,ly1
           mov ebp,lz1

           sub lx2,ebx
           sub ly2,ecx
           sub lz2,ebp

           sub lx3,ebx
           sub ly3,ecx
           sub lz3,ebp

           mov eax,ly2
           mov ebx,lz3
           imul ebx
           mov ecx,eax

           mov eax,lz2
           mov ebx,ly3
           imul ebx
           sub ecx,eax

           mov finx,ecx ; save x of normal

           mov eax,lz2
           mov ebx,lx3
           imul ebx
           mov ecx,eax

           mov eax,lx2
           mov ebx,lz3
           imul ebx
           sub ecx,eax

           mov finy,ecx ; save y of normal

           mov eax,lx2
           mov ebx,ly3
           imul ebx
           mov ecx,eax

           mov eax,ly2
           mov ebx,lx3
           imul ebx
           sub ecx,eax

           mov finz,ecx ; save z of normal

calc_testloop:
           cmp finx,32768 ; make sure (normal^2)*2 is < 2^32
           jge calc_shrit
           cmp finz,32768
           jge calc_shrit
           cmp finz,32768
           jge calc_shrit

           cmp finx,-32768
           jle calc_shrit
           cmp finz,-32768
           jle calc_shrit
           cmp finz,-32768
           jg  ok_2_bite_dust

calc_shrit:
           shr finx,1   ; calculations will be too large if squared, div by 2
           test finx,40000000h
           jz no_neg_calc1
           or   finx,80000000h
no_neg_calc1:
           shr finy,1
           test finy,40000000h
           jz no_neg_calc2
           or   finy,80000000h
no_neg_calc2:
           shr finz,1
           test finz,40000000h
           jz no_neg_calc3
           or   finz,80000000h
no_neg_calc3:
           jmp calc_testloop

ok_2_bite_dust:
           mov eax,finx ; x^2
           mov edi,eax  ; objects
           imul edi
           mov edi,eax

           mov eax,finy ; y^2
           mov esi,eax
           imul esi
           mov esi,eax

           mov eax,finz ; z^2
           mov ebp,eax
           imul ebp

           add eax,esi
           add eax,edi

           call sqrt    ; get square root of number

           mov ecx,eax
           cmp ecx,0
           je lam_abort ; should never happen!

           mov eax,finx
           cdq
           shl eax,8    ; set unit vector to 256
           idiv ecx
           mov finx,eax

           mov eax,finy
           cdq
           shl eax,8
           idiv ecx
           mov finy,eax

           mov eax,finz
           cdq
           shl eax,8
           idiv ecx
           mov finz,eax

           mov ebx,finx
           mov ecx,finy
           mov ebp,finz

lam_abort:
           ret

; set up all lambert normals from object si to object di

set_up_all_lambert:
           movzx edi,di ; in case user is lazy
           movzx esi,si

           xchg edi,esi
set_lop:
           push esi edi
           call pre_cal_lambert
           pop edi esi
           inc edi
           cmp edi,esi
           jb  set_lop

           ret

; rotate surface normal through lambert matrix

lrotate:
           mov ax,w lmatrix+4    ; solve edi = bx(0)+cx(4)+bp(8)
           imul bp
           shrd ax,dx,14
           movsx edi,ax
           mov ax,w lmatrix+2
           imul cx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax
           mov ax,w lmatrix+0
           imul bx
           shrd ax,dx,14
           movsx eax,ax
           add edi,eax          ; di = new colour 0=255

           ret

; fast pathagorean solve, sqr(ax^2+bx^2) works well for numbers ax<90, bx<128
; uses huge table.  limit of solve is for 16 bit values:eg sqr(65535^2+65535^2)

pathagorean:
;          include pathagor.inc ; dont include this routine if you don't need it
                                ; 'cause it's a real big table.
sqrax2bx2:
           cmp ax,bx            ; set ax = smallest
           ja s pa_smal
           xchg ax,bx
pa_smal:
           cmp ax,90            ; check if parameters of triangle are within
           jae s pa_imul        ; table or are too big for fast load.
           cmp bx,128
           jae s pa_imul

           shl bx,7             ; *128
           add ax,bx            ; ax = bx*128+ax
           movzx esi,ax
           xor eax,eax
           mov al,b pathagorean[esi]
           ret
pa_imul:
           movzx eax,ax         ; variables too large, do mathematically
           movzx ebx,bx
           imul eax,eax
           imul ebx,ebx
           add eax,ebx
           jmp sqrt

