/*
 *  linux/mm/mlock.c
 *
 *  Copyright (C) 1995, Ralf Haller
 */

/*
 *  This file contains functions to lock pages in memory as described
 *  in POSIX.4
 */

#include <linux/mm.h>
#include <linux/sched.h>
#include <linux/head.h>
#include <linux/kernel.h>
#include <linux/kernel_stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/fs.h>
#include <linux/mlock.h>
#include <linux/malloc.h>

#include <asm/dma.h>
#include <asm/system.h> /* for cli()/sti() */
#include <asm/bitops.h>
#include <asm/pgtable.h>


/*
 *  print the vma-struct (for debugging only)
 */
static void printk_list (struct vm_area_struct * vma)
{
  pgd_t *pgd;
  pmd_t *pmd;
  pte_t *pte;
  unsigned long page;

  while (vma) {
    printk("pid:%d %08lX-%08lX flags:0x%X inode:0x%p offset:%ld", 
	   vma->vm_task->pid,
	   vma->vm_start,vma->vm_end,vma->vm_flags,
	   vma->vm_inode,vma->vm_offset);

    pgd = pgd_offset( current, vma->vm_start+1024 );
    pmd = pmd_offset( pgd, vma->vm_start+1024 );
    pte = pte_offset( pmd, vma->vm_start+1024 );
    page = pte_page(*pte);
    printk(" mem:%u lock:%u page:%lu",
	   mem_map[MAP_NR(page)],lock_map[MAP_NR(page)],MAP_NR(page));

    printk("\n");
    vma = vma->vm_next;
    if (!vma)
      break;
  }
}

/*
 *  change the flags and the protection of the whole vma-struct
 */
static inline int mlock_fixup_all(struct vm_area_struct * vma,
	int newflags, pgprot_t prot)
{
	vma->vm_flags = newflags;
	vma->vm_page_prot = prot;
	return 0;
}

/*
 *  split up the vma-struct in two pieces and change the flags 
 *  and the protection of the first part
 */
static inline int mlock_fixup_start(struct vm_area_struct * vma,
	unsigned long end,
	int newflags, pgprot_t prot)
{
	struct vm_area_struct * n;

	n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
	if (!n)
		return -ENOMEM;
	*n = *vma;
	vma->vm_start = end;
	n->vm_end = end;
	vma->vm_offset += vma->vm_start - n->vm_start;
	n->vm_flags = newflags;
	n->vm_page_prot = prot;
	if (n->vm_inode)
		n->vm_inode->i_count++;
	if (n->vm_ops && n->vm_ops->open)
		n->vm_ops->open(n);
	insert_vm_struct(current, n);
	return 0;
}

/*
 *  split up the vma-struct in two pieces and change the flags 
 *  and the protection of the last part
 */
static inline int mlock_fixup_end(struct vm_area_struct * vma,
	unsigned long start,
	int newflags, pgprot_t prot)
{
	struct vm_area_struct * n;

	n = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
	if (!n)
		return -ENOMEM;
	*n = *vma;
	vma->vm_end = start;
	n->vm_start = start;
	n->vm_offset += n->vm_start - vma->vm_start;
	n->vm_flags = newflags;
	n->vm_page_prot = prot;
	if (n->vm_inode)
		n->vm_inode->i_count++;
	if (n->vm_ops && n->vm_ops->open)
		n->vm_ops->open(n);
	insert_vm_struct(current, n);
	return 0;
}

/*
 *  split up the vma-struct in three pieces and change the flags 
 *  and the protection of the middle part
 */
static inline int mlock_fixup_middle(struct vm_area_struct * vma,
	unsigned long start, unsigned long end,
	int newflags, pgprot_t prot)
{
	struct vm_area_struct * left, * right;

	left = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
	if (!left)
		return -ENOMEM;
	right = (struct vm_area_struct *) kmalloc(sizeof(struct vm_area_struct), GFP_KERNEL);
	if (!right) {
		kfree(left);
		return -ENOMEM;
	}
	*left = *vma;
	*right = *vma;
	left->vm_end = start;
	vma->vm_start = start;
	vma->vm_end = end;
	right->vm_start = end;
	vma->vm_offset += vma->vm_start - left->vm_start;
	right->vm_offset += right->vm_start - left->vm_start;
	vma->vm_flags = newflags;
	vma->vm_page_prot = prot;
	if (vma->vm_inode)
		vma->vm_inode->i_count += 2;
	if (vma->vm_ops && vma->vm_ops->open) {
		vma->vm_ops->open(left);
		vma->vm_ops->open(right);
	}
	insert_vm_struct(current, left);
	insert_vm_struct(current, right);
	return 0;
}

/*
 * lock/unlock the page containing address in physical memory
 *
 * address: the address
 * vma    : vma-struct of the process
 * mode   : ML_LOCK for locking the memory, ML_UNLOCK for unlocking it
 */
static void lock_page( unsigned long address, struct vm_area_struct *vma, int mode )
{
  pgd_t *pgd;
  pmd_t *pmd;
  pte_t *pte;
  unsigned long page;

  /*
   * Find the matching page entry...
   */
  pgd = pgd_offset( current, address );
  pmd = pmd_offset( pgd, address );
  pte = pte_offset( pmd, address );
  
  page = pte_page(*pte);
    
  /*
   * Load pages in physical memory. This is achieved by simply
   * faking page faults.
   */
  if( mode == ML_LOCK ) {

    /*
     * copy on write ?
     */
    if( !(vma->vm_flags & VM_DENYWRITE) && !(pte_val(*pte) & _PAGE_RW) ) {
      do_wp_page(vma, address, _PAGE_PRESENT|_PAGE_RW|_PAGE_USER );
      page = pte_page(*pte);
    }
    
    /*
     * page not present ?
     */
    if( pte_none(*pte) || !pte_present(*pte) ) {
      do_no_page(vma, address, _PAGE_RW|_PAGE_USER );
      page = pte_page(*pte);
    }
  
    /*
     * Increment the lock count only if page is not already locked.
     */
    if( !(vma->vm_flags & VM_LOCKED) ) {
      lock_map[MAP_NR(page)]++;
    }
  }
  else if( (vma->vm_flags & VM_LOCKED) && lock_map[MAP_NR(page)]) {
    lock_map[MAP_NR(page)]--;
  }
}

/*
 * Lock and unlock some pages in physical memory...
 *
 * addr: the starting address of the memory to lock/unlock
 * len : the length of the memory to lock/unlock
 * mode: ML_LOCK for locking the memory, ML_UNLOCK for unlocking it
 *
 * return: error code
 */ 
int do_mlock( const void *addr, size_t len, int mode )
{
  struct vm_area_struct *vma=NULL;
  unsigned long start, end, address;
  unsigned int newflags;
  struct sysinfo i;
  pgprot_t newprot;
  int error;

  /*
   *  get system information
   */
  si_meminfo(&i);
  si_swapinfo(&i);

  /*
   * Check if superuser...
   */
  if( !suser() )
    return -EPERM;

  /*
   * Check if memory valid...
   */
  if( verify_area(VERIFY_READ, addr, len) )
	return -ENOMEM;

  /*
   * Check len...
   */
  if( !len )
    return 0;

  start = (unsigned long)addr;
  end = start + (unsigned long)len;

  /*
   * Align to page boundaries...
   */
  start = start & PAGE_MASK;
  end = (end + PAGE_SIZE-1) & PAGE_MASK;

  /*
   * Test for memory a process wants to lock. If it's more than 50%
   * of the total physical memory, an error occurs. If a process tries
   * to lock more than 50% of the available physical memory, an error
   * is produced too.
   */
  if( mode == ML_LOCK && end-start > i.totalram*0.5 )
    return -ENOMEM;
  if( mode == ML_LOCK && end-start > (i.totalram-i.lockedram)*0.5 )
    return -EAGAIN;

  /*
   * Set process swappable again. This is done because it is faster
   * to set the process unswappable than only locking all its pages
   */
  if( mode == ML_UNLOCK )
    current->mm->swappable = 1;

  /*
   * Run through the whole locking range.
   */
  for( address=start ; address < end ; address += PAGE_SIZE ) {

    /*
     * Get new vma if we need one. This is the case in the first pass
     * of the loop or if we passed the end of the old vma.
     * In addition the new flags are set and the vma's are spilt up
     * if the locked memory starts or ends within them
     */
    if( vma == NULL || address >= vma->vm_end ) {

      vma = find_vma(current, address);

      /*
       * lock/unlock the page containing address in physical memory
       */
      lock_page( address, vma, mode );

      /*
       * set the flags...
       */
      newflags = vma->vm_flags;
      if( mode == ML_LOCK ) 
	newflags |= VM_LOCKED;
      else if( mode == ML_UNLOCK )
	newflags &= ~VM_LOCKED;

      newprot = protection_map[newflags & 0xf];

      if (start <= vma->vm_start)
	if (end >= vma->vm_end)
	  error = mlock_fixup_all(vma, newflags, newprot);
	else
	  error = mlock_fixup_start(vma, end, newflags, newprot);
      else if (end >= vma->vm_end)
	error = mlock_fixup_end(vma, start, newflags, newprot);
      else
	error = mlock_fixup_middle(vma, start, end, newflags, newprot);

      if (error)
	return error;
	
    } /* if */
    else {

      /*
       * lock/unlock the page containing address in physical memory
       */
      lock_page( address, vma, mode );
    }    

  } /* for */

  /*
   *  merge matching vma-structs
   */
  merge_segments(current, start, end);

  return 0;
}

/*
 * Lock and unlock the memmap of the current process in physical
 * memory
 *
 * task: the task to lock/unlock
 * how : MCL_CURRENT for locking all current pages and MCL_FUTURE for
 *       all future pages
 * mode: ML_LOCK for locking the memory, ML_UNLOCK for unlocking it
 *
 * return: error code
 */
int do_mlockall( struct task_struct *task, int how, int mode )
{
  struct vm_area_struct *vma;
  unsigned long address;
  struct sysinfo i;

  /*
   * task[1] has no locked memory...
   */
  if( task == NULL || task->pid == 1 )
    return 0;

  /*
   *  get system information
   */
  si_meminfo(&i);
  si_swapinfo(&i);

  /*
   * Check if superuser...
   */
  if( !suser() )
    return -EPERM;

  /*
   *  print debug information
   */
  if(how & MCL_VMA) printk_list(task->mm->mmap);
  if(how & MCL_PROCESS) {
    printk("maj_flt:%ld min_flt:%ld\n", 
	   task->mm->maj_flt,task->mm->min_flt);
    printk("start_code:0x%lx end_code:0x%lx end_data:0x%lx brk:0x%lx
start_stack:0x%lx %lx\n",
	   task->mm->start_code, task->mm->end_code,
	   task->mm->end_data, task->mm->brk,
	   task->mm->start_stack,task->kernel_stack_page);
  }
  if(how & MCL_MEMORY) {
    printk("nr_swap_pages:%d nr_free_pages:%d\n",
	   nr_swap_pages,nr_free_pages);
    printk("totalmem:%ld freemem:%ld lockedmem:%ld sharedmem:%ld\n",
	   i.totalram,i.freeram,i.lockedram,i.sharedram);
    printk("totalswap:%ld freeswap:%ld\n",
	   i.totalswap,i.freeswap);
  }

  /*
   * Check parameter how.
   */
  if( mode == ML_LOCK &&
      ((how & ~(MCL_CURRENT|MCL_FUTURE)) || 
      !(how & (MCL_CURRENT|MCL_FUTURE))) )
      return -EINVAL;

  /*
   *  set future lock
   */
  if( mode == ML_LOCK && (how & MCL_FUTURE) )
    task->flags |= PF_FUTURE;

  /*
   *  clear future lock
   */
  if( mode == ML_UNLOCK )
    task->flags &= ~PF_FUTURE;

  if( mode == ML_LOCK && !(how & MCL_CURRENT) )
    return 0;

  /*
   * Test for memory a process wants to lock. If it's more than 50%
   * of the total physical memory, an error occurs. If a process tries
   * to lock more than 50% of the available physical memory, an error
   * is produced too.
   */
  if( mode == ML_LOCK && task->mm->brk > i.totalram*0.5 )
    return -ENOMEM;
  if( mode == ML_LOCK && task->mm->brk > (i.totalram-i.lockedram)*0.5 )
    return -EAGAIN;

  /*
   * Set process unswappable, to allow faster paging.
   */
  if( mode == ML_LOCK )
    task->mm->swappable = 0;
  else if( mode == ML_UNLOCK )
    task->mm->swappable = 1;

  vma = task->mm->mmap;

  /*
   * Run through all vma-struct of the process
   */
  while( vma ) {

    for( address = vma->vm_start ; address < vma->vm_end ; address += PAGE_SIZE ) {

      /*
       * lock/unlock the page containing address in physical memory
       */
      lock_page( address, vma, mode );

    }

    /*
     * lock/unlock the vma...
     */
    if( mode == ML_LOCK )
      vma->vm_flags |= VM_LOCKED;
    if( mode == ML_UNLOCK )
      vma->vm_flags &= ~VM_LOCKED;

    vma = vma->vm_next;
  }

  return 0;
}

int sys_mlock( const void *addr, size_t len )
{
  return do_mlock( addr, len, ML_LOCK );
}

int sys_munlock( const void *addr, size_t len )
{
  return do_mlock( addr, len, ML_UNLOCK );
}

int sys_mlockall( int how )
{
  return do_mlockall( current, how, ML_LOCK );
}

int sys_munlockall( void )
{
  return do_mlockall( current, 0, ML_UNLOCK );
}
