;PPMPX2P.ASM 3.3(for DPMI)
;	MACHINE LANGUAGE SUBROUTINES
;	FOR PPMPQS
;	PROTECT MODE part
;	1991-97 by YUJI KIDA
;
.386P
pmode	segment para public use32
	assume	cs:pmode,ds:pmode

	include	ppmpx.h


	org	100h
start:
pmodesetR1R2:
	; loop for primes

	mov	esi,ds:[_primeadr]
	add	esi,2*primeunitbytes	;skip sign & 2
	mov	ecx,ds:[_primes]
	sub	ecx,2			;skip sign & 2
	mov	ebx,2
R1R2loop:
	push	ecx
	movzx	eax,word ptr [esi]
	add	ebx,eax

	call	setparam
	add	esi,primeunitbytes
	pop	ecx
	dec	ecx
	jnz	R1R2loop

	mov	ecx,ds:[_powersof2]
	jecxz	R1R2loopBout
R1R2loopB:
	push	ecx
	movzx	ebx,word ptr [esi]
	call	setparamB
	add	esi,primeunitbytes
	pop	ecx
	dec	ecx
	jnz	R1R2loopB
R1R2loopBout:

	mov	ecx,ds:[_primepowers]
	sub	ecx,ds:[_powersof2]
	jz	R1R2loopCout
R1R2loopC:
	push	ecx
	movzx	ebx,word ptr [esi]
	call	setparam
	add	esi,primeunitbytes
	pop	ecx
	dec	ecx
	jnz	R1R2loopC
R1R2loopCout:

	; return to real mode

	db	66h
	retf


	;data structure
	; prime diff	2bytes
	; log(P) 	2bytes(higher=0)
	; sqrt(WN) @ P	4bytes
	; start1	4 bytes
	; start2	4 bytes
	; ( total	10h bytes=primeunitbytes)

	; calc parameters for each prime
setparam:
	push	esi

	mov	esi,_D+12
	xor	edx,edx
	std
  rept 3
	lodsd
	div	ebx
  endm
	cld
	mov	eax,edx
	shl	eax,1
	mul	edx
	div	ebx
	mov	eax,edx

	call	ax_modinv_bx
	mov	ds:[_MIA2P],eax	;set 1/(2A) @ P

	mov	esi,_B+20
	xor	edx,edx
	std
  rept 5
  	lodsd
	div	ebx
  endm
	cld

	pop	esi
	mov	eax,[esi+4]		;get R = modsqrt(WN,p)
	add	edx,eax
	cmp	edx,ebx
	jbe	short setparamj1
	sub	edx,ebx
setparamj1:
	mov	eax,ebx
	sub	eax,edx			;(-B-R)@P

	mul	dword ptr ds:[_MIA2P]
	div	ebx			;edx = (-B-R)@P/2A@P

	mov	eax,edx
	add	eax,ds:[_sievewidth]
	xor	edx,edx
	div	ebx
	mov	ecx,edx	;((-B-R)@P/2A+M)@P

	mov	eax,[esi+4]
	shl	eax,1
	mul	dword ptr ds:[_MIA2P]	;2R/2A
	div	ebx		;edx = 2R/2A@P
	mov	eax,ds:[_sievetop]
	add	edx,ecx
	cmp	edx,ebx
	jb	short setparamj3	;if edx > ecx

	sub	edx,ebx
	add	ecx,eax
	add	edx,eax
	mov	[esi+8],ecx
	mov	[esi+12],edx	;((-B+R)@P/2A+M)@P
	ret
setparamj3:
	add	edx,eax
	add	ecx,eax
	mov	[esi+8],edx	;((-B+R)@P/2A+M)@P
	mov	[esi+12],ecx
	ret


setparamB:			;for powers of 2
	push	esi

	mov	esi,_D+4
	mov	eax,[esi]
	mul	eax
	dec	ebx
	and	eax,ebx
	inc	ebx
	call	ax_modinv_bx
	mov	ds:[_MIA2P],eax		;set 1/A @ P

	mov	esi,_B+4
	mov	eax,[esi]
	dec	ebx
	and	eax,ebx
	inc	ebx
	mov	edx,eax

	pop	esi
	mov	eax,[esi+4]		;get R = modsqrt(WN,p)
	add	edx,eax
	cmp	edx,ebx
	jbe	short setparamBj1
	sub	edx,ebx
setparamBj1:
	mov	eax,ebx
	sub	eax,edx			;(-B-R)@P
	shr	eax,1			;(-B-R)@P/2

	dec	ebx
	mul	dword ptr ds:[_MIA2P]
	and	eax,ebx			;eax = (-B-R)@P/2A@P
	add	eax,ds:[_sievewidth]
	and	eax,ebx
	inc	ebx
	mov	ecx,eax			;((-B-R)@P/2A+M)@P

	mov	eax,[esi+4]
	dec	ebx
	mul	dword ptr ds:[_MIA2P]	;R/A
	and	eax,ebx			;R/A@P
	inc	ebx
	mov	edx,eax
	mov	eax,ds:[_sievetop]
	add	edx,ecx
	cmp	edx,ebx
	jb	short setparamBj3	;if edx > ecx

	sub	edx,ebx
	add	ecx,eax
	add	edx,eax
	mov	[esi+8],ecx
	mov	[esi+12],edx	;((-B+R)@P/2A+M)@P
	ret
setparamBj3:
	add	edx,eax
	add	ecx,eax
	mov	[esi+8],edx	;((-B+R)@P/2A+M)@P
	mov	[esi+12],ecx
	ret


	org	280h

	; * sieve process

pmodesieve:
	; set initial value

	push	ebp
	mov	ecx,ds:[_sieveBsize]
	mov	edi,ds:[_sievetop]
	mov	ebp,ecx
	shr	ecx,2
	inc	ecx
	mov	eax,ds:[_inilog]
	rep	stosd

	; sieve main1 start

	mov	esi,ds:[_primeadr]
	add	esi,2*primeunitbytes	;skip sign & 2
	mov	ebx,2			;start from 2

	mov	edx,ds:[_sieveover]

	mov	ecx,ds:[_primes]
	sub	ecx,ds:[_primes4]	;cut primes > sieveBsize
	sub	ecx,2			;skip sign & 2

primeloop:
	movzx	eax,word ptr [esi]
	push	esi		;/*
	add	ebx,eax		;ebx = prime

	mov	al,[esi+2]	;log
	mov	edi,[esi+8]	;start address1
	mov	esi,[esi+12]	;start address2

	align 4
setlogp10:
	sub	[edi],al
	lea	edi,[edi+ebx]
	sub	[esi],al
	lea	esi,[esi+ebx]
	cmp	edi,edx
	jae	setlogp20	;if (esi <=) edi < edx
	sub	[edi],al
	lea	edi,[edi+ebx]
	sub	[esi],al
	lea	esi,[esi+ebx]
	cmp	edi,edx
	jb	setlogp10	;if (esi <=) edi < edx
setlogp20:

setlogp30:
	cmp	esi,edx
	jae	short setlogp40	;if esi >= edx too
	sub	[esi],al
	add	esi,ebx
	xchg	esi,edi
setlogp40:
	mov	eax,esi
	pop	esi		;*/
	sub	edi,ebp		;ebp = [_sieveBsize]
	sub	eax,ebp
	mov	[esi+8],edi
	mov	[esi+12],eax

	dec	ecx
	lea	esi,[esi+primeunitbytes]
	jnz	primeloop

	; sieve main1 end

	; sieve main5 start
	; 0 or 1 time

sievemain5:
	mov	ecx,ds:[_primes4]
	jecxz	short sievepowers
align 4
primeloop5:
	xor	edi,edi
	mov	eax,[esi]
	mov	di,ax
	shr	eax,16		;al=[esi+2]=log
	add	ebx,edi		;ebx = prime

	push	esi		;/*
	mov	edi,[esi+8]	;start address1
	mov	esi,[esi+12]	;start address2
	cmp	edi,edx
	jae	short setlogp510
	sub	[edi],al
	add	edi,ebx
setlogp510:
	cmp	esi,edx
	jae	short setlogp520
	sub	[esi],al
	add	esi,ebx
	xchg	esi,edi
setlogp520:
	mov	eax,esi
	pop	esi		;*/
	sub	edi,ebp		;ebp = [_sieveBsize]
	sub	eax,ebp
	mov	[esi+8],edi
	mov	[esi+12],eax

	add	esi,primeunitbytes
	dec	ecx
	jnz	primeloop5

	; sieve main5 end

sievepowers:
	mov	cx,ds:[_primepowers]
	jecxz	short sievedone
primeloopB:
	xor	ebx,ebx
	mov	eax,[esi]
	mov	bx,ax		;ebx = primepower
	shr	eax,16		;al=[esi+2]=log

	push	esi		;/*
	mov	edi,[esi+8]	;start address1
	mov	esi,[esi+12]	;start address2

	align 4
setlogpB10:
	sub	[edi],al
	add	edi,ebx
	sub	[esi],al
	add	esi,ebx
	cmp	edi,edx
	jb	setlogpB10

	cmp	esi,edx
	jae	short setlogpB20
	sub	[esi],al
	add	esi,ebx
	xchg	esi,edi
setlogpB20:
	mov	eax,esi
	pop	esi		;*/
	sub	edi,ebp		;ebp = [_sieveBsize]
	sub	eax,ebp
	mov	[esi+8],edi
	mov	[esi+12],eax

	add	esi,primeunitbytes
	dec	ecx
	jnz	primeloopB

	; sieve mainB end

sievedone:
	mov	al,ds:[_cutlog]
	mov	edx,ds:[_sievetop]

	mov	ebx,sieveansarea
	mov	word ptr [ebx],0
	mov	esi,sieveansarea+4
	mov	edi,edx			;ds:[_sievetop]
	mov	ecx,ebp			;ds:[_sieveBsize]
align 4
sieveanslp:
	scasb
	ja	short sieveansfind
sieveansnext:
	loop	sieveanslp

	; return to real mode
sieveansret:
	pop	ebp

	db	66h
	retf


sieveansfind:
	sub	edi,edx		;ds:[_sievetop]
	dec	edi
	mov	[esi],edi	;result
	add	edi,edx		;ds:[_sievetop]
	inc	edi
	add	esi,4
	inc	word ptr [ebx]	;# of results
	jmp	sieveansnext


	org	480h
decompose:
	push	ebp

	mov	esi,ds:[_absQ]	;absolute adr of W#
	push	esi		;*

	mov	ebx,_W
	mov	edi,ebx		;set in _W by dword format
	xor	eax,eax
	lodsw
	mov	ecx,eax
	inc	eax
	shr	eax,1
	stosd
	rep	movsw
	xor	eax,eax
	stosw

	; divide by p=2
decomp2:
	cmp	dword ptr [ebx+4],0
	jne	short decomp4
	mov	ecx,[ebx]
	dec	ecx
	mov	[ebx],ecx
	lea	edi,[ebx+4]
	lea	esi,[edi+4]
	rep	movsd
	jmp	decomp2
decomp4:
	std

	mov	eax,[ebx+4]
	mov	ebp,1
decomp5:
	shl	ebp,1
	shr	eax,1
	jnc	decomp5
	shr	ebp,1
	cmp	ebp,1
	je	short decomp8

	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
	lodsd
	div	ebp
	push	eax		;new highest value
	jmp	short decomp7
align 4
decomp6:
	lodsd
	div	ebp
decomp7:
	mov	[esi+4],eax
	loop	decomp6
	pop	eax
	or	eax,eax
	jnz	short decomp75
	dec	dword ptr [ebx]
decomp75:
	mov	eax,[ebx]
	or	eax,[ebx+4]
	dec	eax
	jz	decompout	;if result=1

	; divide by odd primes
decomp8:
	mov	eax,ds:[_sieveConst]
	sub	eax,ds:[_result]
	mov	ds:[_result],ebx
	mov	ebx,eax

	mov	edi,ds:[_primeadr]
	mov	ecx,ds:[_primes]
	add	edi,2*primeunitbytes	;skip sign & 2
	sub	ecx,2			;
	sub	ecx,ds:[_primes4]
	mov	ebp,2
decomp10A:
	movzx	eax,word ptr [edi]
	xor	edx,edx
	add	ebp,eax
	mov	eax,[edi+8]
	add	eax,ebx
	div	ebp
	or	edx,edx
	jz	godecompA	;divide exactly

	xor	edx,edx
	mov	eax,[edi+12]
	add	eax,ebx
	div	ebp
	or	edx,edx
	jz	godecompA	;divide exactly
decomp40A:
	add	edi,primeunitbytes
	loop	decomp10A

	mov	ecx,ds:[_primes4]
	jecxz	decomp100

decomp10B:
	movzx	eax,word ptr [edi]
;	xor	edx,edx
	add	ebp,eax
	mov	eax,[edi+8]
	add	eax,ebx
;	div	ebp
;	or	edx,edx
cmp	eax,ebp
	jz	godecompB	;divide exactly

;	xor	edx,edx
	mov	eax,[edi+12]
	add	eax,ebx
;	div	ebp
;	or	edx,edx
cmp	eax,ebp
	jz	godecompB	;divide exactly
decomp40B:
	add	edi,primeunitbytes
	loop	decomp10B

decomp100:
	mov	ebx,ds:[_result]

decompout:
	cld

	mov	esi,ebx
	pop	ebx		;*transfer to 16bit format
	mov	edi,ebx
	lodsd
	shl	eax,1
	mov	ecx,eax		;word length
	stosw
	rep	movsw
	cmp	word ptr [edi-2],0
	jne	short decomp50	;if highest not 0
	dec	word ptr [ebx]
decomp50:
	pop	ebp

	db	66h
	retf


;decompse by small primes

decomp90:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx

  rept	ansDword
	lodsd
	div	ebp
	dec	ecx
	jz	short decomp96
  endm

decomp96:
	or	edx,edx
	jz	short decomp200
	pop	ecx
	pop	ebx
	jmp	decomp40A
align 4
godecompA:
	push	ebx
	push	ecx
	mov	ebx,ds:[_result]
decomp200:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
	lodsd
	div	ebp
	push	eax		;new highest value

	mov	[esi+4],eax
	dec	ecx
	jz	short decomp125

  rept	ansDword
	lodsd
	div	ebp
	mov	[esi+4],eax
	dec	ecx
	jz	short decomp125
  endm

decomp125:
	pop	eax
	or	eax,eax
	jnz	short decomp130	;if same length
	dec	dword ptr [ebx]
decomp130:
	mov	eax,[ebx]
	or	eax,[ebx+4]
	dec	eax
	jz	short decomp140	;if result=1
	cmp	ebp,1024
	jbe	decomp90	;check divisible ^2,^3 ?
	pop	ecx
	pop	ebx
	jmp	decomp40A
decomp140:
	pop	eax		;dummy
	pop	eax		;dummy
	jmp	decompout	;if result=1

;decompose by large primes

decomp90B:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx

  rept	ansDword
	lodsd
	div	ebp
	dec	ecx
	jz	short decomp96B
  endm

decomp96B:
	or	edx,edx
	jz	short decomp200B
	pop	ecx
	pop	ebx
	jmp	decomp40B

align 4
godecompB:
	push	ebx
	push	ecx
	mov	ebx,ds:[_result]
decomp200B:
	mov	esi,ebx
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax		;highest dword adr
	xor	edx,edx
	lodsd
	div	ebp
	push	eax		;new highest value

	mov	[esi+4],eax
	dec	ecx
	jz	short decomp125B

  rept	ansDword
	lodsd
	div	ebp
	mov	[esi+4],eax
	dec	ecx
	jz	short decomp125B
  endm

decomp125B:
	pop	eax
	or	eax,eax
	jnz	short decomp130B	;if same length
	dec	dword ptr [ebx]
decomp130B:
	mov	eax,[ebx]
	or	eax,[ebx+4]
	dec	eax
	jz	short decomp140B	;if result=1
	cmp	ebp,1024
	jbe	decomp90	;check divisible ^2,^3 ?
	pop	ecx
	pop	ebx
	jmp	decomp40B
decomp140B:
	pop	eax		;dummy
	pop	eax		;dummy
	jmp	decompout	;if result=1


;
;  long integer @ 32bit integer
;  edx = [esi] @ ebx
;  [esi] > 0 must not be 0
; destroy : eax,ecx,edx

longmod_bx:
	mov	eax,[esi]
	mov	ecx,eax
	shl	eax,2
	add	esi,eax
	xor	edx,edx
	std
	cmp	ecx,5
	je	short longmod50
	cmp	ecx,6
	je	short longmod60
	cmp	ecx,4
	jz	short longmod40
	dec	ecx
	jz	short longmod10
	dec	ecx
	jz	short longmod20
	dec	ecx
	jz	short longmod30
	jmp	short longmodret
longmod60:
	lodsd
	div	ebx
longmod50:
	lodsd
	div	ebx
longmod40:
	lodsd
	div	ebx
longmod30:
	lodsd
	div	ebx
longmod20:
	lodsd
	div	ebx
longmod10:
	lodsd
	div	ebx
longmodret:
	cld
	ret

;
; inverse modulo prime
; 
; inp : eax
; out : eax = 1/eax mod ebx
; destroy : ecx,esi,edi

ax_modinv_bx:
	cmp	eax,1
	je	short modinvret
	mov	ecx,ebx
	cmp	eax,ebx
	jb	short modinv10
	xor	edx,edx
	div	ebx
	mov	eax,edx
	cmp	eax,1
	je	short modinvret
modinv10:
	xchg	eax,ecx
	xor	esi,esi		;coef1
	mov	edi,1		;coef2
modinv20:
	xor	edx,edx
	div	ecx
	shl	edx,1
	cmp	edx,ecx
	jbe	short modinv60

	shr	edx,1
	sub	edx,ecx
	neg	edx
	push	edx		;new remainder
	inc	eax		;inc quotient

	mul	edi
	div	ebx
	mov	eax,edx
	sub	eax,esi
	jae	short modinv30
	add	eax,ebx
modinv30:
	mov	esi,edi		;new coef1=old coef2
	mov	edi,eax		;new coef2=quotient*coef2-coef1

	mov	eax,ecx
	pop	ecx
	cmp	ecx,1
	jne	modinv20	;GCD must 1 otherwise endlessloop
	mov	eax,edi
modinvret:
	ret

modinv60:
	shr	edx,1
	push	edx		;remainder

	mul	edi
	div	ebx
	mov	eax,esi
	sub	eax,edx
	jae	short modinv70
	add	eax,ebx
modinv70:
	mov	esi,edi		;new coef1=old coef2
	mov	edi,eax		;new coef2=coef1-quotient*coef2

	mov	eax,ecx
	pop	ecx
	cmp	ecx,1
	jne	modinv20	;GCD must 1 otherwise endlessloop
	mov	eax,edi
	ret

pmode	ends

end	start
