/* NVClock 0.6.2 - Linux overclocker for NVIDIA cards
 *
 * site: http://nvclock.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
 */

#include "backend.h"
#include "nvclock.h"

#define SDR 0
#define DDR 1

int base_freq;


typedef enum
{
    SETMEMSPEED = 2,
    SETNVSPEED  = 3,
} Action;


float CalcSpeed(int m, int n, int p)
{
    return (float)((int)((float)n / (float)m * base_freq) >> p);
}

int ChangeSpeeds(float memclk, float nvclk, int number, int debug)
{
    SetMemorySpeed((int)(memclk*1000), number, debug);
    SetCoreSpeed((int)(nvclk*1000), number, debug);
    return 0;
}

/* Calculate the requested speed. */
static void ClockSelect(int clockIn, int *PLL, Action action)
{
    float m, n, p, speed, bestm, bestn;
    int diff, diff2, mlow, mhigh, nlow, nhigh;
    p = 0;
    speed = (float)clockIn;
    diff2 = (int)speed;

    if(base_freq == 14318)
    {
	mlow = 8;
	mhigh = 14;
	nlow = 14;
	nhigh = 255;
    }
    
    else
    {
	mlow = 7;
	mhigh = 13;
	nlow = 14;
	nhigh = 255;
    }

    if(clockIn > 380000)
    {
        mlow = 1;
        mhigh = 2;
        nlow = 14;
        nhigh = 93;
    }
    
    /*
    Calculate the m and n values. There are a lot of values which give the same speed;
    We choose the speed for which the difference with the request speed is as small as possible.
    */
    for(m = mlow; m <= mhigh; m++)
    {
	for(n = nlow; n <= nhigh; n++)
	{
	    diff = (int)(speed - (n/m * base_freq));
	    
	    if(abs(diff) < abs(diff2))
	    {
		diff2 = diff;
		bestm = m;
		bestn = n;
	    }	    
	}
    }

    *PLL = ((int)p << 16) + ((int)bestn << 8) + bestm;
}

/* Overclock the memory or core. */
int SetCoreSpeed(unsigned int clk, int number, int debug)
{
    unsigned long base;
    int m, n, p, newPLL, fd;
    riva_hw nv_card;
    const struct card *nv_info;
    
    nv_info = &card[number];
    
    base = (long)nv_info->reg_address;

    if( (fd = openDevMem(number)) == -1 )
    {
	printf("Can't open /dev/mem or /dev/nvidia%d\n", number);
    	return -1;
    }
    
    /* Map the registers of the nVidia chip */
    nv_card.PEXTDEV = mapDevMem(fd, base + 0x101000, 0x1000);
    nv_card.PFB     = mapDevMem(fd, base + 0x100000, 0x1000);
    nv_card.PRAMDAC = mapDevMem(fd, base + 0x680000, 0x2000);
    
    /* find out the reference frequency */    
    if(nv_info->device_id >= 0x100)
    {
	/* The Geforce 4 and Quadro 4 use a base frequency of 27MHz */
	base_freq = (nv_card.PEXTDEV[0x0000/4] &(1<<6) ) ? 14318 : (nv_card.PEXTDEV[0x0000/4] & (1<<22)) ? 27000 : 13500;
    }

    else
    {
	base_freq = (nv_card.PEXTDEV[0x0000/4] & 0x40) ? 14318 : 13500;
    }

    /* HERE the new clocks are selected (in KHz). */
    ClockSelect(clk, &newPLL, SETNVSPEED);

    /* Overclock */
    nv_card.PRAMDAC[0x500/4] = newPLL;


    return 0;
}

/* Overclock the memory or core. */
int SetMemorySpeed(unsigned int clk, int number, int debug)
{
    unsigned long base;
    int m, n, p, newPLL, fd, mem_type;
    riva_hw nv_card;
    const struct card *nv_info;
    
    nv_info = &card[number];

    base = (long)nv_info->reg_address;

    if( (fd = openDevMem(number)) == -1 )
    {
	printf("Can't open /dev/mem or /dev/nvidia%d\n", number);
    	return -1;
    }
    
    /* Map the registers of the nVidia chip */
    nv_card.PEXTDEV = mapDevMem(fd, base + 0x101000, 0x1000);
    nv_card.PFB     = mapDevMem(fd, base + 0x100000, 0x1000);
    nv_card.PRAMDAC = mapDevMem(fd, base + 0x680000, 0x2000);
    

    /* Check if memory is SDR or DDR, needed for some problematic GF2MX/GF4MX cards */
    mem_type = (nv_card.PFB[0x200/4] & 0x01) ? DDR : SDR;

    /* find out the reference frequency */    
    if(nv_info->device_id >= 0x100)
    {
	/* The Geforce 4 and Quadro 4 use a base frequency of 27MHz */
	base_freq = (nv_card.PEXTDEV[0x0000/4] &(1<<6) ) ? 14318 : (nv_card.PEXTDEV[0x0000/4] & (1<<22)) ? 27000 : 13500;
    }

    else
    {
	base_freq = (nv_card.PEXTDEV[0x0000/4] & 0x40) ? 14318 : 13500;
    }

    /* This is a workaround meant for some Geforce2 MX/Geforce4 MX cards
    *  using SDR memory. Gf2MX/Gf4MX cards use 4x16 SDR memory report
    *  twice as high clockspeeds. I call that "fake ddr".
    *  By detecting the memory type, pci id and clockspeed we check
    *  if this occurs. It is a workaround.
    */
    if(mem_type == SDR && ( nv_info->device_id == 0x110 || 
nv_info->device_id == 0x111 
	|| nv_info->device_id == 0x172 || nv_info->device_id == 0x17a))
    {
        int m, n, p;

        m = nv_card.PRAMDAC[0x504/4] & 0xff;
        n = (nv_card.PRAMDAC[0x504/4] >> 8) & 0xff;
        p = (nv_card.PRAMDAC[0x504/4] >> 16) & 0x0f;

	
	if(CalcSpeed(m, n, p) > 280000)	clk *= 2;
    }

    /* HERE the new clocks are selected (in KHz). */
    ClockSelect(clk, &newPLL, SETMEMSPEED);

    /* Overclock */
    nv_card.PRAMDAC[0x504/4] = newPLL;


    return 0;
}

/* Get the memory or core speed. */
int GetSpeeds(float *clkout, float *clkout2, int number, int debug)
{
    unsigned long base;
    int m, mm, n, nn, p, pp, newPLL, fd, mem_type;
    riva_hw nv_card;
    const struct card *nv_info;
    int factor = 1;
    
    nv_info = &card[number];
    
    base = (long)nv_info->reg_address;

    if( (fd = openDevMem(number)) == -1 )
    {
	printf("Can't open /dev/mem or /dev/nvidia%d\n", number);
    	return -1;
    }

    /* Map the registers of the nVidia chip */
    nv_card.PEXTDEV = mapDevMem(fd, base + 0x101000, 0x1000);
    nv_card.PFB     = mapDevMem(fd, base + 0x100000, 0x1000);
    nv_card.PRAMDAC = mapDevMem(fd, base + 0x680000, 0x2000);


    /* Check if memory is SDR or DDR, needed for some problematic GF2MX/GF4MX cards */
    mem_type = (nv_card.PFB[0x200/4] & 0x01) ? DDR : SDR;
    mem_type = 0;

    /* find out the reference frequency */    
    if(nv_info->device_id > 0x29)
    {
	/* The Geforce 4 and Quadro 4 use a base frequency of 27MHz */
	base_freq = (nv_card.PEXTDEV[0x0000/4] &(1<<6) ) ? 14318 : (nv_card.PEXTDEV[0x0000/4] & (1<<22)) ? 27000 : 13500;
    }

    else
    {
	base_freq = (nv_card.PEXTDEV[0x0000/4] & 0x40) ? 14318 : 13500;
    }

    /* m, n and p for the core */
    m = nv_card.PRAMDAC[0x500/4] & 0xff;
    n = (nv_card.PRAMDAC[0x500/4] >> 8) & 0xff;
    p = (nv_card.PRAMDAC[0x500/4] >> 16) & 0x0f;

    /* m, n and p for the memory */
    mm = nv_card.PRAMDAC[0x504/4] & 0xff;
    nn = (nv_card.PRAMDAC[0x504/4] >> 8) & 0xff;
    pp = (nv_card.PRAMDAC[0x504/4] >> 16) & 0x0f;

	    
    /* This is a workaround meant for some Geforce2 MX/Geforce4 MX cards
    *  using SDR memory. Gf2MX/Gf4MX cards use 4x16 SDR memory report
    *  twice as high clockspeeds. I call that "fake ddr".
    *  By detecting the memory type, pci id and clockspeed we check
    *  if this occurs. It is a workaround. We divide the memclk later by 2.
    */
    if(mem_type == SDR && ( nv_info->device_id == 0x110 || 
nv_info->device_id == 0x111 ||
    nv_info->device_id == 0x172 || nv_info->device_id == 0x17a))
    {
	int memclk = CalcSpeed(mm, nn, pp);
	if(memclk > 280000)
	{
	    factor = 2;
	}
    }

    if(debug == 1)
    {
	int nvclk = CalcSpeed(m, n, p);
	int memclk = CalcSpeed(mm, nn, pp);
	printf("MPLL=  %i    m=%i n=%i p=%i\n", memclk, mm, nn, pp);
	printf("NVPLL=  %i    m=%i n=%i p=%i\n", nvclk, 
m, n, p);

    }
    
    *clkout = ( CalcSpeed(mm,nn,pp) / factor ) / 1000;
    *clkout2 = CalcSpeed(m,n,p) / 1000;

    return 0;
}
