/*
 *  linux/fs/super.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
 *  super.c contains code to handle: - mount structures
 *                                   - super-block tables.
 *                                   - mount systemcall
 *                                   - umount systemcall
 *
 *  Version: $Id: super.c,v 1.2 1994/01/09 10:45:34 mvw Exp mvw $
 */

#include <linux/config.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mount.h>
#include <linux/malloc.h>
#include <linux/major.h>
#include <linux/stat.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/locks.h>

#include <asm/system.h>
#include <asm/segment.h>
 
extern struct file_system_type file_systems[];
extern struct file_operations *get_blkfops(unsigned int);

extern void wait_for_keypress(void);
extern void fcntl_init_locks(void);

extern int root_mountflags;

static int do_remount_sb(struct super_block *sb, int flags, char *data);

/* this is initialized in init/main.c */
dev_t ROOT_DEV = 0;
 
struct super_block super_blocks[NR_SUPER];
static struct vfsmount *vfsmntlist = (struct vfsmount *)0,
                       *vfsmnttail = (struct vfsmount *)0,
                       *mru_vfsmnt = (struct vfsmount *)0;

/* 
 * This part handles the management of the list of mounted filesystems.
 */
struct vfsmount *lookup_vfsmnt(dev_t dev)
{
   register struct vfsmount *lptr;

   if (vfsmntlist == (struct vfsmount *) 0)
      return ((struct vfsmount *) 0);

   if (mru_vfsmnt != (struct vfsmount *) 0 && mru_vfsmnt->mnt_dev == dev)
         return (mru_vfsmnt);

   for (lptr = vfsmntlist; lptr != (struct vfsmount *)0; lptr = lptr->mnt_next)
      if (lptr->mnt_dev == dev)
         return (lptr);

   return ((struct vfsmount *) 0);
   /* NOTREACHED */
}

static struct vfsmount *add_vfsmnt(dev_t dev, const char *dev_name,
                                   const char *dir_name)
{
   register struct vfsmount *lptr;
   char *tmp;

   if ((lptr = (struct vfsmount *) kmalloc(sizeof(struct vfsmount),
               GFP_KERNEL)) == (struct vfsmount *) 0)
      return ((struct vfsmount *) 0);
   memset(lptr, 0, sizeof(struct vfsmount));
   lptr->mnt_dev = dev;
   if (dev_name) {
      if (!getname(dev_name, &tmp)) {
         if ((lptr->mnt_devname =
             (char *) kmalloc(strlen(tmp), GFP_KERNEL)) != (char *)0)
            strcpy(lptr->mnt_devname, tmp);
         putname(tmp);
      }
   }
   if (dir_name) {
      if (!getname(dir_name, &tmp)) {
         if ((lptr->mnt_dirname =
             (char *) kmalloc(strlen(tmp), GFP_KERNEL)) != (char *)0)
            strcpy(lptr->mnt_dirname, tmp);
         putname(tmp);
      }
   }
   if (vfsmntlist == (struct vfsmount *)0)
      vfsmntlist = vfsmnttail = lptr;
   else {
      vfsmnttail->mnt_next = lptr;
      vfsmnttail = lptr;
   }
   return (lptr);
}

static void remove_vfsmnt(dev_t dev)
{
   register struct vfsmount *lptr, *tofree;

   if (vfsmntlist == (struct vfsmount *) 0)
      return;
   lptr = vfsmntlist;
   if (lptr->mnt_dev == dev) {
      tofree = lptr;
      vfsmntlist = lptr->mnt_next;
   } else {
      while (lptr->mnt_next != (struct vfsmount *) 0) {
         if (lptr->mnt_next->mnt_dev == dev)
            break;
         lptr = lptr->mnt_next;
      }
      tofree = lptr->mnt_next;
      if (vfsmnttail->mnt_dev == dev)
         vfsmnttail = lptr;
      lptr->mnt_next = lptr->mnt_next->mnt_next;
   }
   kfree(tofree->mnt_devname);
   kfree(tofree->mnt_dirname);
   kfree_s(tofree, sizeof(struct vfsmount));
}

/* 
 * This part handles the management of superblocks.
 */
struct file_system_type *get_fs_type(char *name)
{
   int cnt;
   
   if (!name)
      return &file_systems[0];
   for(cnt = 0 ; file_systems[cnt].read_super ; cnt++)
      if (!strcmp(name,file_systems[cnt].name))
         return(&file_systems[cnt]);
   return NULL;
}

void __wait_on_super(struct super_block *sb)
{
   struct wait_queue wait = { current, NULL };

   add_wait_queue(&sb->s_wait, &wait);
repeat:
   current->state = TASK_UNINTERRUPTIBLE;
   if (sb->s_lock) {
      schedule();
      goto repeat;
   }
   remove_wait_queue(&sb->s_wait, &wait);
   current->state = TASK_RUNNING;
}

void sync_supers(dev_t dev)
{
   struct super_block *sb;

   for (sb = super_blocks + 0 ; sb < super_blocks + NR_SUPER ; sb++) {
      if (!sb->s_dev)
         continue;
      if (dev && sb->s_dev != dev)
         continue;
      wait_on_super(sb);
      if (!sb->s_dev || !sb->s_dirt)
         continue;
      if (dev && (dev != sb->s_dev))
         continue;
      if (sb->s_op && sb->s_op->write_super)
         sb->s_op->write_super(sb);
   }
}

static struct super_block *get_super(dev_t dev)
{
   struct super_block *sb;

   if (!dev)
      return (struct super_block *)NULL;
   sb = 0 + super_blocks;
   while (sb < NR_SUPER + super_blocks)
      if (sb->s_dev == dev) {
         wait_on_super(sb);
         if (sb->s_dev == dev)
            return sb;
         sb = 0 + super_blocks;
      } else
         sb++;
   return (struct super_block *)NULL;
}

void put_super(dev_t dev)
{
   struct super_block *sb;

   if (dev == ROOT_DEV) {
      printk("VFS: Root device %d/%d: prepare for armageddon\n",
             MAJOR(dev), MINOR(dev));
      return;
   }
   if (!(sb = get_super(dev)))
      return;
   if (sb->s_covered) {
      printk("VFS: Mounted device %d/%d - tssk, tssk\n",
             MAJOR(dev), MINOR(dev));
      return;
   }
   if (sb->s_op && sb->s_op->put_super)
      sb->s_op->put_super(sb);
}

static struct super_block *read_super(dev_t dev,char *name,int flags,
                                      void *data, int silent)
{
   struct super_block *sb;
   struct file_system_type *type;

   if (!dev)
      return NULL;
   check_disk_change(dev);
   sb = get_super(dev);
   if (sb)
      return sb;
   if (!(type = get_fs_type(name))) {
      printk("VFS: on device %d/%d: get_fs_type(%s) failed\n",
              MAJOR(dev), MINOR(dev), name);
      return (struct super_block *)NULL;
   }
   for (sb = 0 + super_blocks; ; sb++) {
      if (sb >= NR_SUPER + super_blocks)
         return (struct super_block *)NULL;
      if (!sb->s_dev)
         break;
   }
   sb->s_dev = dev;
   sb->s_flags = flags;
   if (!type->read_super(sb, data, silent)) {
      sb->s_dev = 0;
      return (struct super_block *)NULL;
   }
   sb->s_dev = dev;
   sb->s_covered = NULL;
   sb->s_rd_only = 0;
   sb->s_dirt = 0;
   return sb;
}

/*
 * This part handles the mount/umount systemcall.
 *
 * Unnamed block devices are dummy devices used by virtual
 * filesystems which don't use real block-devices.  -- jrs
 */
static char unnamed_dev_in_use[256];

static dev_t get_unnamed_dev(void)
{
   static int first_use = 0;
   int cnt;

   if (first_use == 0) {
      first_use = 1;
      memset(unnamed_dev_in_use, 0, sizeof(unnamed_dev_in_use));
      unnamed_dev_in_use[0] = 1; /* minor 0 (nodev) is special */
   }
   for (cnt = 0; cnt < sizeof(unnamed_dev_in_use) /
        sizeof(unnamed_dev_in_use[0]); cnt++) {
      if (!unnamed_dev_in_use[cnt]) {
         unnamed_dev_in_use[cnt] = 1;
         return (UNNAMED_MAJOR << 8) | cnt;
      }
   }
   return 0;
}

static void put_unnamed_dev(dev_t dev)
{
   if (!dev)
      return;
   if (!unnamed_dev_in_use[dev]) {
      printk("VFS: put_unnamed_dev: freeing unused device %d/%d\n",
             MAJOR(dev), MINOR(dev));
      return;
   }
   unnamed_dev_in_use[dev] = 0;
}

static int do_umount(dev_t dev)
{
   struct super_block *sb;
   int retval;
   
#ifdef CONFIG_QUOTA
   quota_off(dev, -1); /* Turn off all quotas first */
#endif
   if (dev == ROOT_DEV) {
      /*
       * Special case for "unmounting" root. We just try to remount
       * it readonly, and sync() the device.
       */
      if (!(sb=get_super(dev)))
         return -ENOENT;
      if (!(sb->s_flags & MS_RDONLY)) {
         fsync_dev(dev);
         retval = do_remount_sb(sb, MS_RDONLY, 0);
         if (retval)
            return retval;
      }
      return 0;
   }
   if (!(sb=get_super(dev)) || !(sb->s_covered))
      return -ENOENT;
   if (!sb->s_covered->i_mount)
      printk("VFS: umount(%d/%d): mounted inode has i_mount=NULL\n",
             MAJOR(dev), MINOR(dev));
   if (!fs_may_umount(dev, sb->s_mounted))
      return -EBUSY;
   sb->s_covered->i_mount = NULL;
   iput(sb->s_covered);
   sb->s_covered = NULL;
   iput(sb->s_mounted);
   sb->s_mounted = NULL;
   if (sb->s_op && sb->s_op->write_super && sb->s_dirt)
      sb->s_op->write_super(sb);
   put_super(dev);
   remove_vfsmnt(dev);
   return 0;
}

/*
 * Now umount can handle mount points as well as block devices.
 * This is important for filesystems which use unnamed block devices.
 *
 * There is a little kludge here with the dummy_inode.  The current
 * vfs release functions only use the r_dev field in the inode so
 * we give them the info they need without using a real inode.
 * If any other fields are ever needed by any block device release
 * functions, they should be faked here.  -- jrs
 */
asmlinkage int sys_umount(char *name)
{
   struct inode *inode;
   dev_t dev;
   int retval;
   struct inode dummy_inode;
   struct file_operations *fops;

   if (!suser())
      return -EPERM;
   retval = namei(name,&inode);
   if (retval) {
      retval = lnamei(name,&inode);
      if (retval)
         return retval;
   }
   if (S_ISBLK(inode->i_mode)) {
      dev = inode->i_rdev;
      if (IS_NODEV(inode)) {
         iput(inode);
         return -EACCES;
      }
   } else {
      if (!inode || !inode->i_sb || inode != inode->i_sb->s_mounted) {
         iput(inode);
         return -EINVAL;
      }
      dev = inode->i_sb->s_dev;
      iput(inode);
      memset(&dummy_inode, 0, sizeof(dummy_inode));
      dummy_inode.i_rdev = dev;
      inode = &dummy_inode;
   }
   if (MAJOR(dev) >= MAX_BLKDEV) {
      iput(inode);
      return -ENXIO;
   }
   if (!(retval = do_umount(dev)) && dev != ROOT_DEV) {
      fops = get_blkfops(MAJOR(dev));
      if (fops && fops->release)
         fops->release(inode,NULL);
      if (MAJOR(dev) == UNNAMED_MAJOR)
         put_unnamed_dev(dev);
   }
   if (inode != &dummy_inode)
      iput(inode);
   if (retval)
      return retval;
   fsync_dev(dev);
   return 0;
}

/*
 * do_mount() does the actual mounting after sys_mount has done the ugly
 * parameter parsing. When enough time has gone by, and everything uses the
 * new mount() parameters, sys_mount() can then be cleaned up.
 *
 * We cannot mount a filesystem if it has active, used, or dirty inodes.
 * We also have to flush all inode-data for this device, as the new mount
 * might need new info.
 */
static int do_mount(dev_t dev, const char *dev_name, const char *dir_name,
                    char *type, int flags, void *data)
{
   struct vfsmount *vfsmnt;
   struct super_block *sb;
   struct inode *dir_i;
   int error;

   if ((error = namei(dir_name,&dir_i)))
      return error;
   if (dir_i->i_count != 1 || dir_i->i_mount) {
      iput(dir_i);
      return -EBUSY;
   }
   if (!S_ISDIR(dir_i->i_mode)) {
      iput(dir_i);
      return -EPERM;
   }
   if (!fs_may_mount(dev)) {
      iput(dir_i);
      return -EBUSY;
   }
   sb = read_super(dev, type, flags, data, 0);
   if (!sb || sb->s_covered) {
      iput(dir_i);
      return -EBUSY;
   }
   sb->s_covered = dir_i;
   dir_i->i_mount = sb->s_mounted;
   if ((vfsmnt = add_vfsmnt(dev, dev_name, dir_name)) != (struct vfsmount *)0)
         vfsmnt->mnt_sb = sb;
   return 0;      /* we don't iput(dir_i) - see umount */
}


/*
 * Alters the mount flags of a mounted file system. Only the mount point
 * is used as a reference - file system type and the device are ignored.
 * FS-specific mount options can't be altered by remounting.
 */
static int do_remount_sb(struct super_block *sb, int flags, char *data)
{
   int retval;
   
   /*
    * If we are remounting RDONLY, make sure there are no rw files open
    */
   if ((flags & MS_RDONLY) && !(sb->s_flags & MS_RDONLY))
      if (!fs_may_remount_ro(sb->s_dev))
         return -EBUSY;
   if (sb->s_op && sb->s_op->remount_fs) {
      retval = sb->s_op->remount_fs(sb, &flags, data);
      if (retval)
         return retval;
   }
   sb->s_flags = (sb->s_flags & ~MS_RMT_MASK) |
      (flags & MS_RMT_MASK);
   return 0;
}

static int do_remount(const char *dir,int flags,char *data)
{
   struct inode *dir_i;
   int retval;

   retval = namei(dir,&dir_i);
   if (retval)
      return retval;
   if (dir_i != dir_i->i_sb->s_mounted) {
      iput(dir_i);
      return -EINVAL;
   }
   retval = do_remount_sb(dir_i->i_sb, flags, data);
   iput(dir_i);
   return retval;
}

static int copy_mount_options (char *data, unsigned long *where)
{
   int i;
   unsigned long page;
   struct vm_area_struct *vma;

   *where = 0;
   if (!data)
      return 0;

   for (vma = current->mmap ; ; ) {
      if (!vma || (unsigned long) data < vma->vm_start) {
         return -EFAULT;
      }
      if ((unsigned long) data < vma->vm_end)
         break;
      vma = vma->vm_next;
   }
   i = vma->vm_end - (unsigned long) data;
   if (PAGE_SIZE <= (unsigned long) i)
      i = PAGE_SIZE-1;
   if (!(page = __get_free_page(GFP_KERNEL))) {
      return -ENOMEM;
   }
   memcpy_fromfs((void *) page,data,i);
   *where = page;
   return 0;
}

/*
 * Flags is a 16-bit value that allows up to 16 non-fs dependent flags to
 * be given to the mount() call (ie: read-only, no-dev, no-suid etc).
 *
 * data is a (void *) that can point to any structure up to
 * PAGE_SIZE-1 bytes, which can contain arbitrary fs-dependent
 * information (or be NULL).
 *
 * NOTE! As old versions of mount() didn't use this setup, the flags
 * has to have a special 16-bit magic number in the hight word:
 * 0xC0ED. If this magic word isn't present, the flags and data info
 * isn't used, as the syscall assumes we are talking to an older
 * version that didn't understand them.
 */
asmlinkage int sys_mount(char * dev_name, char * dir_name, char * type,
                         unsigned long new_flags, void * data)
{
   struct file_system_type *fstype;
   struct file_operations *fops;
   struct inode *inode;
   ulong flags = 0;
   ulong page = 0;
   dev_t dev;
   int retval;
   char *t;

   if (!suser())
      return -EPERM;
   page = 0;
   if ((new_flags &
       (MS_MGC_MSK | MS_REMOUNT)) == (MS_MGC_VAL | MS_REMOUNT)) {
      retval = copy_mount_options (data, &page);
      if (retval < 0)
         return retval;
      retval = do_remount(dir_name,
                          new_flags & ~MS_MGC_MSK & ~MS_REMOUNT,
                          (char *) page);
      free_page(page);
      return retval;
   }
   retval = copy_mount_options(type, &page);
   if (retval < 0)
      return retval;
   fstype = get_fs_type((char *) page);
   free_page(page);
   if (!fstype)      
      return -ENODEV;
   t = fstype->name;
   if (fstype->requires_dev) {
      retval = namei(dev_name,&inode);
      if (retval)
         return retval;
      if (!S_ISBLK(inode->i_mode)) {
         iput(inode);
         return -ENOTBLK;
      }
      if (IS_NODEV(inode)) {
         iput(inode);
         return -EACCES;
      }
      dev = inode->i_rdev;
      if (MAJOR(dev) >= MAX_BLKDEV) {
         iput(inode);
         return -ENXIO;
      }
   } else {
      if (!(dev = get_unnamed_dev()))
         return -EMFILE;
      inode = NULL;
   }
   fops = get_blkfops(MAJOR(dev));
   if (fops && fops->open) {
      retval = fops->open(inode, NULL);
      if (retval) {
         iput(inode);
         return retval;
      }
   }
   if ((new_flags & MS_MGC_MSK) == MS_MGC_VAL) {
      flags = new_flags & ~MS_MGC_MSK;
      retval = copy_mount_options(data, &page);
      if (retval < 0) {
         iput(inode);
         return retval;
      }
   }
   retval = do_mount(dev, dev_name, dir_name, t, flags, (void *)page);
   free_page(page);
   if (retval && fops && fops->release)
      fops->release(inode,NULL);
   iput(inode);
   return retval;
}

void mount_root(void)
{
   struct file_system_type *fs_type;
   struct vfsmount *vfsmnt;
   struct super_block *sb;
   struct inode * inode;

   memset(super_blocks, 0, sizeof(super_blocks));
   fcntl_init_locks();
   if (MAJOR(ROOT_DEV) == FLOPPY_MAJOR) {
      printk(KERN_NOTICE "VFS: Insert root floppy and press ENTER\n");
      wait_for_keypress();
   }
   for (fs_type = file_systems; fs_type->read_super; fs_type++) {
      if (!fs_type->requires_dev)
         continue;
      sb = read_super(ROOT_DEV,fs_type->name,root_mountflags,NULL,1);
      if (sb) {
         inode = sb->s_mounted;
         inode->i_count += 3 ; /* NOTE! it is logically used 4 times, not 1 */
         sb->s_covered = inode;
         sb->s_flags = root_mountflags;
         current->pwd = inode;
         current->root = inode;
         printk("VFS: Mounted root (%s filesystem)%s.\n",
                fs_type->name,
               (sb->s_flags & MS_RDONLY) ? " readonly" : "");
         if ((vfsmnt = add_vfsmnt(ROOT_DEV, "rootfs", "/")) != (struct vfsmount *)0)
            vfsmnt->mnt_sb = sb;
         return;
      }
   }
   panic("VFS: Unable to mount root");
}
