diff -urNp vanilla/linux-2.6.11/fs/Kconfig linux-2.6.11-fumount2/fs/Kconfig --- vanilla/linux-2.6.11/fs/Kconfig 2005-03-01 23:38:10.000000000 -0800 +++ linux-2.6.11-fumount2/fs/Kconfig 2005-08-17 13:27:16.000000000 -0700 @@ -429,6 +429,14 @@ config AUTOFS4_FS local network, you probably do not need an automounter, and can say N here. +config FUMOUNT + bool "Forced Unmount support" + help + This option gives the ability to (really) force unmount a file + system. It closes all the open files, flushes their contents, + releases file locks and tears down memory maps for the files. + If unsure, say N. + menu "CD-ROM/DVD Filesystems" config ISO9660_FS diff -urNp vanilla/linux-2.6.11/fs/coda/pioctl.c linux-2.6.11-fumount2/fs/coda/pioctl.c --- vanilla/linux-2.6.11/fs/coda/pioctl.c 2005-03-01 23:38:13.000000000 -0800 +++ linux-2.6.11-fumount2/fs/coda/pioctl.c 2005-08-17 13:27:16.000000000 -0700 @@ -14,6 +14,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif #include #include #include @@ -66,6 +69,9 @@ static int coda_pioctl(struct inode * in * Look up the pathname. Note that the pathname is in * user memory, and namei takes care of this */ +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ if ( data.follow ) { error = user_path_walk(data.path, &nd); } else { @@ -73,6 +79,9 @@ static int coda_pioctl(struct inode * in } if ( error ) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } else { target_inode = nd.dentry->d_inode; @@ -81,6 +90,9 @@ static int coda_pioctl(struct inode * in /* return if it is not a Coda inode */ if ( target_inode->i_sb != inode->i_sb ) { path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return -EINVAL; } @@ -90,6 +102,9 @@ static int coda_pioctl(struct inode * in error = venus_pioctl(inode->i_sb, &(cnp->c_fid), cmd, &data); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } diff -urNp vanilla/linux-2.6.11/fs/dcache.c linux-2.6.11-fumount2/fs/dcache.c --- vanilla/linux-2.6.11/fs/dcache.c 2005-03-01 23:37:48.000000000 -0800 +++ linux-2.6.11-fumount2/fs/dcache.c 2005-08-17 13:27:16.000000000 -0700 @@ -1027,10 +1027,17 @@ struct dentry * d_lookup(struct dentry * struct dentry * dentry = NULL; unsigned long seq; - do { - seq = read_seqbegin(&rename_lock); - dentry = __d_lookup(parent, name); - if (dentry) +#ifdef CONFIG_FUMOUNT + if (unlikely(parent == NULL)) { + DEBUG_FUMOUNT; + return dentry; + } +#endif + + do { + seq = read_seqbegin(&rename_lock); + dentry = __d_lookup(parent, name); + if (dentry) break; } while (read_seqretry(&rename_lock, seq)); return dentry; @@ -1361,6 +1368,17 @@ static char * __d_path( struct dentry *d *--end = '\0'; buflen--; +#ifdef CONFIG_FUMOUNT + if (unlikely(!dentry || !vfsmnt)) { + DEBUG_FUMOUNT; + buflen -= 6; + end -= 6; + memcpy(end, "(null)", 6); + retval = end; + return retval; + } +#endif + if (!IS_ROOT(dentry) && d_unhashed(dentry)) { buflen -= 10; end -= 10; @@ -1469,6 +1487,21 @@ asmlinkage long sys_getcwd(char __user * read_lock(¤t->fs->lock); pwdmnt = mntget(current->fs->pwdmnt); + +#ifdef CONFIG_FUMOUNT + if (unlikely(pwdmnt == NULL)) { + unsigned long len = 2; + char * root_dir = "/"; + DEBUG_FUMOUNT; + if (copy_to_user(buf, root_dir, len)) + error = -EFAULT; + else + error = len; + read_unlock(¤t->fs->lock); + goto out_freepage; + } +#endif + pwd = dget(current->fs->pwd); rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); @@ -1503,6 +1536,9 @@ out: mntput(pwdmnt); dput(root); mntput(rootmnt); +#ifdef CONFIG_FUMOUNT +out_freepage: +#endif free_page((unsigned long) page); return error; } diff -urNp vanilla/linux-2.6.11/fs/dnotify.c linux-2.6.11-fumount2/fs/dnotify.c --- vanilla/linux-2.6.11/fs/dnotify.c 2005-03-01 23:37:53.000000000 -0800 +++ linux-2.6.11-fumount2/fs/dnotify.c 2005-08-17 13:27:16.000000000 -0700 @@ -36,6 +36,37 @@ static void redo_inode_mask(struct inode inode->i_dnotify_mask = new_mask; } +#ifdef CONFIG_FUMOUNT +void fumount_dnotify_flush(struct file *filp) +{ + struct dnotify_struct *dn; + struct dnotify_struct **prev; + struct inode *inode; + + DEBUG_FUMOUNT; + + if (!filp || !filp->f_dentry || !filp->f_dentry->d_inode) + return; + + inode = filp->f_dentry->d_inode; + if (!S_ISDIR(inode->i_mode)) + return; + + spin_lock(&inode->i_lock); + prev = &inode->i_dnotify; + while ((dn = *prev) != NULL) { + if ( dn->dn_filp == filp ) { + *prev = dn->dn_next; + redo_inode_mask(inode); + kmem_cache_free(dn_cache, dn); + break; + } + prev = &dn->dn_next; + } + spin_unlock(&inode->i_lock); +} +#endif + void dnotify_flush(struct file *filp, fl_owner_t id) { struct dnotify_struct *dn; @@ -160,16 +191,19 @@ void dnotify_parent(struct dentry *dentr if (!dir_notify_enable) return; - spin_lock(&dentry->d_lock); - parent = dentry->d_parent; - if (parent->d_inode->i_dnotify_mask & event) { - dget(parent); - spin_unlock(&dentry->d_lock); - __inode_dir_notify(parent->d_inode, event); - dput(parent); - } else { - spin_unlock(&dentry->d_lock); - } + /* Skip this if compiled with forced unmount and the dentry is NULL */ + if (dentry) { + spin_lock(&dentry->d_lock); + parent = dentry->d_parent; + if (parent->d_inode->i_dnotify_mask & event) { + dget(parent); + spin_unlock(&dentry->d_lock); + __inode_dir_notify(parent->d_inode, event); + dput(parent); + } else { + spin_unlock(&dentry->d_lock); + } + } } EXPORT_SYMBOL_GPL(dnotify_parent); diff -urNp vanilla/linux-2.6.11/fs/exec.c linux-2.6.11-fumount2/fs/exec.c --- vanilla/linux-2.6.11/fs/exec.c 2005-03-01 23:38:06.000000000 -0800 +++ linux-2.6.11-fumount2/fs/exec.c 2005-08-17 13:27:16.000000000 -0700 @@ -39,6 +39,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif /* CONFIG_FUMOUNT */ #include #include #include @@ -123,6 +126,9 @@ asmlinkage long sys_uselib(const char __ struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ nd.intent.open.flags = FMODE_READ; error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); if (error) @@ -162,6 +168,9 @@ asmlinkage long sys_uselib(const char __ } fput(file); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; exit: path_release(&nd); diff -urNp vanilla/linux-2.6.11/fs/fcntl.c linux-2.6.11-fumount2/fs/fcntl.c --- vanilla/linux-2.6.11/fs/fcntl.c 2005-03-01 23:38:13.000000000 -0800 +++ linux-2.6.11-fumount2/fs/fcntl.c 2005-08-17 13:27:16.000000000 -0700 @@ -122,10 +122,24 @@ asmlinkage long sys_dup2(unsigned int ol int err = -EBADF; struct file * file, *tofree; struct files_struct * files = current->files; +#ifdef CONFIG_FUMOUNT + struct vfsmount *mnt = NULL; + int sem_flag = 0; +#endif spin_lock(&files->file_lock); if (!(file = fcheck(oldfd))) goto out_unlock; + +#ifdef CONFIG_FUMOUNT + if (unlikely(file->f_mode & FMODE_FUMOUNT)) { + DEBUG_FUMOUNT; + /* allow no new references to this file */ + err = -ENXIO; + goto out_unlock; + } +#endif + err = newfd; if (newfd == oldfd) goto out_unlock; @@ -156,10 +170,23 @@ asmlinkage long sys_dup2(unsigned int ol FD_CLR(newfd, files->close_on_exec); spin_unlock(&files->file_lock); +#ifdef CONFIG_FUMOUNT + /* this is a backdoor to close, so we need the close semaphore */ + mnt = file->f_vfsmnt; + if(mnt) { + down_read(&mnt->mnt_close_sem); + sem_flag = 1; + } +#endif + if (tofree) filp_close(tofree, files); err = newfd; out: +#ifdef CONFIG_FUMOUNT + if(sem_flag) + up_read(&mnt->mnt_close_sem); +#endif return err; out_unlock: spin_unlock(&files->file_lock); diff -urNp vanilla/linux-2.6.11/fs/file_table.c linux-2.6.11-fumount2/fs/file_table.c --- vanilla/linux-2.6.11/fs/file_table.c 2005-03-01 23:37:47.000000000 -0800 +++ linux-2.6.11-fumount2/fs/file_table.c 2005-08-17 13:27:16.000000000 -0700 @@ -16,6 +16,15 @@ #include #include #include +#include + +#ifdef CONFIG_FUMOUNT +static LIST_HEAD(defunct_list); + +extern int remove_file_mappings(struct file *); +extern void remove_file_locks(struct file *); +static struct file * clone_filp(struct file * source_file); +#endif /* sysctl tunables... */ struct files_stat_struct files_stat = { @@ -146,6 +155,19 @@ void fastcall __fput(struct file *file) mntput(mnt); } +#ifdef CONFIG_FUMOUNT +void fastcall fumount_fput(struct file * file) +{ + DEBUG_FUMOUNT; + + /* fput has already been called on this file. */ + if (atomic_dec_and_test(&file->f_count)) { + file_free(file); + } + return; +} +#endif + struct file fastcall *fget(unsigned int fd) { struct file *file; @@ -153,14 +175,54 @@ struct file fastcall *fget(unsigned int spin_lock(&files->file_lock); file = fcheck_files(files, fd); - if (file) + if (file) { +#ifdef CONFIG_FUMOUNT + if(!(file->f_mode & FMODE_FUMOUNT)) { + get_file(file); + } + else { + DEBUG_FUMOUNT; + file = NULL; + } +#else get_file(file); +#endif + } spin_unlock(&files->file_lock); return file; } EXPORT_SYMBOL(fget); +#ifdef CONFIG_FUMOUNT +/* Find an unused file structure and clone the existing file. Returns NULL, if + * there are no more free file structures or we run out of memory. */ +static struct file * clone_filp(struct file * source_file) +{ + struct file * new_file; + + new_file = get_empty_filp(); + if (new_file) { + /* Copy all file stats, flags etc. */ + new_file->f_version = source_file->f_version; + new_file->f_dentry = source_file->f_dentry; + new_file->f_vfsmnt = source_file->f_vfsmnt; + new_file->f_op = source_file->f_op; + new_file->f_flags = source_file->f_flags; + new_file->f_mode = source_file->f_mode; + new_file->f_pos = source_file->f_pos; + memcpy(&new_file->f_ra, &source_file->f_ra, sizeof(struct file_ra_state)); + new_file->f_uid = source_file->f_uid; + new_file->f_gid = source_file->f_gid; + new_file->f_error = source_file->f_error; + new_file->private_data = source_file->private_data; + return new_file; + } + printk(KERN_WARNING "VFS FUMOUNT: filp allocation failed\n"); + return NULL; +} +#endif + /* * Lightweight file lookup - no refcnt increment if fd table isn't shared. * You can use this only if it is guranteed that the current task already @@ -176,12 +238,31 @@ struct file fastcall *fget_light(unsigne *fput_needed = 0; if (likely((atomic_read(&files->count) == 1))) { file = fcheck_files(files, fd); + +#ifdef CONFIG_FUMOUNT + if (unlikely(file && (file->f_mode & FMODE_FUMOUNT))) { + DEBUG_FUMOUNT; + file = NULL; + } +#endif + } else { spin_lock(&files->file_lock); file = fcheck_files(files, fd); if (file) { +#ifdef CONFIG_FUMOUNT + if(!(file->f_mode & FMODE_FUMOUNT)) { + get_file(file); + *fput_needed = 1; + } + else { + DEBUG_FUMOUNT; + file = NULL; + } +#else get_file(file); *fput_needed = 1; +#endif } spin_unlock(&files->file_lock); } @@ -207,6 +288,30 @@ void file_move(struct file *file, struct file_list_unlock(); } +#ifdef CONFIG_FUMOUNT +/* file_move_test is same as file_move, but is used to complete open + operations under the lock only if MNT_FUMOUNT is not set. + This makes sure that additional file objects are not placed on the + sb open file list when a FORCED umount is pending. */ +int file_move_test(struct file *file, struct super_block *sb) +{ + int return_code; + struct list_head *list = &(sb->s_files); + + if (list) { + if (!(file->f_vfsmnt->mnt_flags & MNT_FUMOUNT)) { + file_move(file, list); + return_code = 0; + } else { + DEBUG_FUMOUNT; + return_code = -ENXIO; + } + } else + return_code = 0; + return return_code; +} +#endif + void file_kill(struct file *file) { if (!list_empty(&file->f_list)) { @@ -241,6 +346,179 @@ too_bad: return 0; } +#ifdef CONFIG_FUMOUNT +void fs_fumount_mark_files(struct vfsmount *mnt) +{ + struct list_head *p; + struct super_block *sb = mnt->mnt_sb; + struct file *file; + + DEBUG_FUMOUNT; + + /* get this lock - prevents problems with sys_flock */ + lock_kernel(); + /* Mark all files on the sb->s_files list for unmount if f_vfsmnt == mnt */ + list_for_each(p, &sb->s_files) { + file = list_entry(p, struct file, f_list); + if (file->f_vfsmnt == mnt) + file->f_mode |= FMODE_FUMOUNT; + + } + unlock_kernel(); + return; +} + +/* Forced Unmount code and comments originally from Monta Vista: +I've added a semaphore that will prevent the fumount code from colliding +with the normal syscall sys_close. This seems necessary, as I'm about to +clone the file object for open files and try to force a close - +that can be tricky, as the close code wants to run in the context of the +process that originally opened the file, and there may also be more than +one owner of the file object at any given time, due to the fork and dup calls. + +Before cloning the file, it is necessary to unmap any areas that have been +mmapped using this file descriptor. Each mmap against a file increments the +file object reference count. So find the inode and check for mappings before +the clone. FMODE_FUMOUNT has made the mapping unalterable by the actual owner, +as the sys calls have been walled off. + +The only syscall that is allowed to succeed following the setting of +FMODE_FUMOUNT is the close call, and that is protected by the new close_sem +semaphore. + +In any event, I don't want to have a file object that I'm forcing close on to +suddenly disappear when the real owner gets around to closing it. So we clone +under the lock, moving the file resources into a cloned file object, and +leaving the previous owner with the husk only. Somewhere along the line, we +need to find any locks associated with the file object, and release them. + +After cloning the file object, release the lock and then close the cloned file +object however many times required to drive the use f_count to 0. I can't use +the syscall, but it looks like most of the routines are already there, just +needing some tweaking to take my arguments. The file locking seems to be the +only thing requiring the process context of the original owner(s). +*/ +int fs_fumount_clone_list(struct super_block *sb, struct vfsmount *mnt) +{ + struct list_head *p, *temp; + int return_code = 0; + struct file *cloned_file; + struct file *file; + + DEBUG_FUMOUNT; + + down_write(&mnt->mnt_close_sem); + + file_list_lock(); + /* go through all the open files for this superblock */ + list_for_each_safe(p, temp, &sb->s_files) { + + file = list_entry(p, struct file, f_list); + + if (!((file->f_mode & FMODE_FUMOUNT) + && (file->f_vfsmnt == mnt)) ) { + continue; + } + + file_list_unlock(); + + /* get reference count so file doesn't vanish */ + get_file(file); + /* drop lock to let sys_close progress + - I have the file reference to hold the + object until I'm done removing the + mmaps and locks */ + up_write(&mnt->mnt_close_sem); + + /* check for mmappings and undo, if any */ + return_code = remove_file_mappings(file); + + /* Similarly, remove the file locks associated with this file + object. + */ + remove_file_locks(file); + + down_write(&mnt->mnt_close_sem); + if ( file_count(file) == 1) { + /* okay, fumount holds last reference, so file will go + * away when we fput the file, removing it from the sb + * list. We hold the close semaphore, so the next list + * item will still be valid if we get it before this + * file object is released. And, if we are terminating + * the use of this file object, then there is nothing + * else to do for this file, so no need to clone it. */ + cloned_file = file; + fput(file); + file_list_lock(); + continue; + } + fput(file); + cloned_file = clone_filp(file); /* clone the file */ + /* + * clone_filp() can sleep due to get_empty_filp() calls + * kmem_cache_alloc with GFP_KENREL flag. therefore we are grabbing + * the lock after that + */ + file_list_lock(); + if (!cloned_file) { + return_code = -ENOMEM; + break; + } + /* we now have a duplicated file object - change some of the + * fields to reflect that we stole the resources from the old + * file object */ + file->f_op = NULL; + file->f_dentry = (struct dentry *)NULL; + file->f_vfsmnt = (struct vfsmount *)NULL; + + /* Set defunct flag for cleanup with sys_close */ + file->f_mode |= FMODE_DEFUNCT; + + /* put the clone onto the sb list for further processing */ + cloned_file->f_mode &= ~FMODE_FUMOUNT; + cloned_file->f_mode |= FMODE_DEFUNCT; + + list_move(&cloned_file->f_list, &defunct_list); + } + file_list_unlock(); + up_write(&mnt->mnt_close_sem); + return return_code; +} + +void fs_fumount_close( void ) +{ + struct list_head *p,*n; + struct file *file; + + DEBUG_FUMOUNT; + + file_list_lock(); + + /* the for looks like it always starts over, but the first list entry + * is removed by the body of the loop if it is a fumount closable item. + * Otherwise, it is closed again, until it finally goes away. We hold the + * file list lock in case of the unlikely event of a concurrent fumount */ + + /* We are deleting entries underneath ourself, so list_for_each_safe */ + list_for_each_safe(p, n, &defunct_list) { + file = list_entry(p, struct file, f_list); + /* drop the file list lock, since we have the + only reachable reference(s) to this file object */ + file_list_unlock(); + + if (!(file->f_mode & FMODE_DEFUNCT) ) { + /* have run into an invalid list */ + BUG(); + } + + fumount_close( file ); + file_list_lock(); + } + file_list_unlock(); + return; +} +#endif /* CONFIG_FUMOUNT */ + void __init files_init(unsigned long mempages) { int n; diff -urNp vanilla/linux-2.6.11/fs/ioctl.c linux-2.6.11-fumount2/fs/ioctl.c --- vanilla/linux-2.6.11/fs/ioctl.c 2005-03-01 23:37:50.000000000 -0800 +++ linux-2.6.11-fumount2/fs/ioctl.c 2005-08-17 13:27:16.000000000 -0700 @@ -170,6 +170,15 @@ asmlinkage long sys_ioctl(unsigned int f if (error) goto out_fput; +#ifdef CONFIG_FUMOUNT + if (unlikely(filp->f_mode & FMODE_FUMOUNT)) { + DEBUG_FUMOUNT; + /* allow no new references to this file */ + error = -ENXIO; + goto out; + } +#endif + error = vfs_ioctl(filp, fd, cmd, arg); out_fput: fput_light(filp, fput_needed); diff -urNp vanilla/linux-2.6.11/fs/locks.c linux-2.6.11-fumount2/fs/locks.c --- vanilla/linux-2.6.11/fs/locks.c 2005-03-01 23:38:26.000000000 -0800 +++ linux-2.6.11-fumount2/fs/locks.c 2005-08-17 13:27:16.000000000 -0700 @@ -524,6 +524,19 @@ static void locks_wake_up_blocks(struct } } +#ifdef CONFIG_FUMOUNT +/* count the waiters bound to file_ptr */ +static void +locks_waiters_count(struct file_lock *fl, int *count, struct file *file_ptr) +{ + struct file_lock *waiter; + list_for_each_entry(waiter, &fl->fl_block, fl_block) { + if (waiter->fl_file == file_ptr) + (*count)++; + } +} +#endif + /* Insert file lock fl into an inode's lock list at the position indicated * by pos. At the same time add the lock to the global file lock list. */ @@ -635,6 +648,12 @@ static int locks_block_on_timeout(struct int result; locks_insert_block(blocker, waiter); result = interruptible_sleep_on_locked(&waiter->fl_wait, time); +#ifdef CONFIG_FUMOUNT + if (unlikely(waiter->fl_file->f_mode & FMODE_FUMOUNT)) { + DEBUG_FUMOUNT; + result = -ENXIO; + } +#endif __locks_delete_block(waiter); return result; } @@ -754,6 +773,79 @@ out: return error; } +#ifdef CONFIG_FUMOUNT +/* + * This function is called to chase out lock waiters attached + * to a given file object. It waits to ensure the waiters + * attached to file_ptr have exited the kernel before continuing. + * Locks associated with file_ptr are removed from the inode list. + * Hold BKL before calling + */ +static void +locks_unblock_remove_file(struct inode *inode_ptr, struct file *file_ptr) +{ + int count; + int starting_f_count; + struct file_lock *fl; + struct file_lock **before; + struct file_lock **next_before; + + down_write(&file_ptr->f_vfsmnt->mnt_close_sem); + + if (!inode_ptr->i_flock) { + up_write(&file_ptr->f_vfsmnt->mnt_close_sem); + return; + } + + before = &inode_ptr->i_flock; + count = 0; + starting_f_count = file_count(file_ptr); + + while ((fl = *before) != NULL) { + locks_waiters_count(fl, &count, file_ptr); + next_before = &fl->fl_next; + if (fl->fl_file == file_ptr) { + locks_delete_lock(before); + } + else { + locks_wake_up_blocks(fl); + } + before = next_before; + } + up_write(&file_ptr->f_vfsmnt->mnt_close_sem); +} + +/* remove_file_locks is part of fumount. This routine acquires the BKL, and + * examines the inode for the file structure passed as the argument. For every + * fl_lock on the inode list, locks_wake_up_blocks is called with a wait = + * TRUE. This unblocks all of the waiters, causing them to check for fumount + * as they resume execution. The fumount check causes the lock to fail, + * generally with -ENXIO. + * + * Once all of the waiters have been flushed from the syscalls, a version of + * locks_remove_* is called for all locks on the inode. This removes all of + * the outstanding file locks resulting from all file objects. At that point, + * the file is safe to clone for fumount closing. +*/ + +void +remove_file_locks( struct file *filp ) +{ + struct inode *inode_ptr; + struct dentry *dentry_ptr; + + lock_kernel(); + + if ( (dentry_ptr = filp->f_dentry) ) { + if ( (inode_ptr = dentry_ptr->d_inode) ) { + locks_unblock_remove_file( inode_ptr, filp ); + } + } + + unlock_kernel(); +} +#endif /* CONFIG_FUMOUNT */ + EXPORT_SYMBOL(posix_lock_file); static int __posix_lock_file(struct inode *inode, struct file_lock *request) @@ -1456,7 +1548,18 @@ int flock_lock_file_wait(struct file *fi int error; might_sleep(); for (;;) { +#ifdef CONFIG_FUMOUNT + if(unlikely(filp->f_mode & FMODE_FUMOUNT)) { + DEBUG_FUMOUNT; + error = -ENXIO; + break; + } + else { + error = flock_lock_file(filp, fl); + } +#else error = flock_lock_file(filp, fl); +#endif if ((error != -EAGAIN) || !(fl->fl_flags & FL_SLEEP)) break; error = wait_event_interruptible(fl->fl_wait, !fl->fl_next); diff -urNp vanilla/linux-2.6.11/fs/namei.c linux-2.6.11-fumount2/fs/namei.c --- vanilla/linux-2.6.11/fs/namei.c 2005-03-01 23:37:55.000000000 -0800 +++ linux-2.6.11-fumount2/fs/namei.c 2005-08-17 13:27:16.000000000 -0700 @@ -28,6 +28,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif /* CONFIG_FUMOUNT */ #include #include @@ -510,6 +513,15 @@ static inline int __do_follow_link(struc static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd) { int err = -ELOOP; + +#ifdef CONFIG_FUMOUNT + if (unlikely(!nd->mnt) ) { + DEBUG_FUMOUNT; + err = -ENXIO; + return err; + } +#endif + if (current->link_count >= MAX_NESTED_LINKS) goto loop; if (current->total_link_count >= 40) @@ -559,8 +571,22 @@ static int follow_mount(struct vfsmount int res = 0; while (d_mountpoint(*dentry)) { struct vfsmount *mounted = lookup_mnt(*mnt, *dentry); - if (!mounted) + if (!mounted) { +#ifdef CONFIG_FUMOUNT + if(S_ISBLK((*dentry)->d_inode->i_mode)) { + DEBUG_FUMOUNT; + res = -ENXIO; + } +#endif + break; + } +#ifdef CONFIG_FUMOUNT + if (unlikely(mounted->mnt_flags & MNT_FUMOUNT)) { + DEBUG_FUMOUNT; + res = -ENXIO; break; + } +#endif mntput(*mnt); *mnt = mounted; dput(*dentry); @@ -579,11 +605,27 @@ static inline int __follow_down(struct v mounted = lookup_mnt(*mnt, *dentry); if (mounted) { +#ifdef CONFIG_FUMOUNT + struct dentry *old_dentry = *dentry; + DEBUG_FUMOUNT; + if( (*mnt = mntget(mounted)) ) + *dentry = dget(mounted->mnt_root); + else + *dentry = (struct dentry *) NULL; + dput(old_dentry); + mntput(mounted->mnt_parent); + if ( *mnt ) + return 1; + else + return 0; +#else mntput(*mnt); *mnt = mounted; dput(*dentry); *dentry = dget(mounted->mnt_root); return 1; + +#endif /* CONFIG_FUMOUNT */ } return 0; } @@ -645,6 +687,17 @@ static int do_lookup(struct nameidata *n { struct vfsmount *mnt = nd->mnt; struct dentry *dentry = __d_lookup(nd->dentry, name); +#ifdef CONFIG_FUMOUNT + int err; + + /* Uh oh. Walked into a pending FUMOUNT - follow_down + has released parent mnt and dentry, so just bail */ + if (unlikely(!nd->mnt)) { + DEBUG_FUMOUNT; + err = -ENXIO; + return err; + } +#endif if (!dentry) goto need_lookup; @@ -680,6 +733,18 @@ fail: * into the final dentry. * * We expect 'base' to be positive and a directory. + * FUMOUNT: + * - bad expectation, since the error returns from mntget and + * - path init are not always checked. Add check up front to + * - ensure that the main routine doesn't fall off of a NULL + * - mount or dentry. If nothing else, the FUMOUNT will cause + * - NULL mount pointers. The point is for FUMOUNT to not allow + * - a path lookup into a pending FUMOUNT file system. This + * - barrier prevents the reference counts from incrementing when + * - FUMOUNT is trying to clean everything up. I will also add + * - similar checks whenever this routine attempts to take another + * - mount structure reference. + * */ int fastcall link_path_walk(const char * name, struct nameidata *nd) { @@ -687,6 +752,13 @@ int fastcall link_path_walk(const char * struct inode *inode; int err; unsigned int lookup_flags = nd->flags; + +#ifdef CONFIG_FUMOUNT + if (unlikely(!nd->mnt || name == NULL)) { + DEBUG_FUMOUNT; + return -ENXIO; /* outa' here if bad init_path */ + } +#endif while (*name=='/') name++; @@ -741,6 +813,13 @@ int fastcall link_path_walk(const char * if (this.name[1] != '.') break; follow_dotdot(&nd->mnt, &nd->dentry); +#ifdef CONFIG_FUMOUNT + if (unlikely(!nd->mnt)) { + DEBUG_FUMOUNT; + err = -ENXIO; + goto return_err; + } +#endif inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -761,8 +840,16 @@ int fastcall link_path_walk(const char * if (err) break; /* Check mountpoints.. */ +#ifdef CONFIG_FUMOUNT + if (unlikely(follow_mount(&next.mnt, &next.dentry) < 0)) { + DEBUG_FUMOUNT; + err = -ENXIO; + break; + } +#else follow_mount(&next.mnt, &next.dentry); - +#endif + err = -ENOENT; inode = next.dentry->d_inode; if (!inode) @@ -773,6 +860,13 @@ int fastcall link_path_walk(const char * if (inode->i_op->follow_link) { mntget(next.mnt); +#ifdef CONFIG_FUMOUNT + if (unlikely(next.mnt == NULL)) { + DEBUG_FUMOUNT; + err = -ENXIO; + goto return_err; + } +#endif err = do_follow_link(next.dentry, nd); dput(next.dentry); mntput(next.mnt); @@ -809,6 +903,13 @@ last_component: if (this.name[1] != '.') break; follow_dotdot(&nd->mnt, &nd->dentry); +#ifdef CONFIG_FUMOUNT + if (unlikely(!nd->mnt) ) { + DEBUG_FUMOUNT; + err = -ENXIO; + goto return_err; + } +#endif inode = nd->dentry->d_inode; /* fallthrough */ case 1: @@ -822,11 +923,28 @@ last_component: err = do_lookup(nd, &this, &next); if (err) break; + +#ifdef CONFIG_FUMOUNT + if (unlikely(follow_mount(&next.mnt, &next.dentry) < 0)) { + DEBUG_FUMOUNT; + err = -ENXIO; + break; + } +#else follow_mount(&next.mnt, &next.dentry); +#endif + inode = next.dentry->d_inode; if ((lookup_flags & LOOKUP_FOLLOW) && inode && inode->i_op && inode->i_op->follow_link) { mntget(next.mnt); +#ifdef CONFIG_FUMOUNT + if (unlikely(next.mnt == NULL)) { + DEBUG_FUMOUNT; + err = -ENXIO; + goto return_err; + } +#endif err = do_follow_link(next.dentry, nd); dput(next.dentry); mntput(next.mnt); @@ -924,6 +1042,8 @@ static int __emul_lookup_dentry(const ch return 1; } +/* Just release old altroot and associated mount and replace with new + values (NULL unless __emul_prefix is non-NULL) */ void set_fs_altroot(void) { char *emul = __emul_prefix(); @@ -1437,6 +1557,15 @@ do_last: if (flag & O_NOFOLLOW) goto exit_dput; while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry)); +#ifdef CONFIG_FUMOUNT + /* Uh oh. Walked into a pending FUMOUNT - follow_down + has released parent mnt and dentry, so just bail */ + if (unlikely(!nd->mnt)) { + DEBUG_FUMOUNT; + error = -ENXIO; + return error; + } +#endif } error = -ENOENT; if (!dentry->d_inode) @@ -1575,7 +1704,10 @@ asmlinkage long sys_mknod(const char __u tmp = getname(filename); if (IS_ERR(tmp)) return PTR_ERR(tmp); - + +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; @@ -1607,6 +1739,9 @@ asmlinkage long sys_mknod(const char __u up(&nd.dentry->d_inode->i_sem); path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(tmp); return error; @@ -1647,6 +1782,10 @@ asmlinkage long sys_mkdir(const char __u struct dentry *dentry; struct nameidata nd; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ + error = path_lookup(tmp, LOOKUP_PARENT, &nd); if (error) goto out; @@ -1661,6 +1800,9 @@ asmlinkage long sys_mkdir(const char __u up(&nd.dentry->d_inode->i_sem); path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(tmp); } @@ -1744,6 +1886,9 @@ asmlinkage long sys_rmdir(const char __u if(IS_ERR(name)) return PTR_ERR(name); +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = path_lookup(name, LOOKUP_PARENT, &nd); if (error) goto exit; @@ -1770,6 +1915,9 @@ asmlinkage long sys_rmdir(const char __u exit1: path_release(&nd); exit: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(name); return error; } @@ -1822,6 +1970,9 @@ asmlinkage long sys_unlink(const char __ if(IS_ERR(name)) return PTR_ERR(name); +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = path_lookup(name, LOOKUP_PARENT, &nd); if (error) goto exit; @@ -1848,6 +1999,9 @@ asmlinkage long sys_unlink(const char __ exit1: path_release(&nd); exit: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(name); return error; @@ -1895,6 +2049,9 @@ asmlinkage long sys_symlink(const char _ struct dentry *dentry; struct nameidata nd; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = path_lookup(to, LOOKUP_PARENT, &nd); if (error) goto out; @@ -1907,6 +2064,9 @@ asmlinkage long sys_symlink(const char _ up(&nd.dentry->d_inode->i_sem); path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(to); } putname(from); @@ -1973,6 +2133,9 @@ asmlinkage long sys_link(const char __us if (IS_ERR(to)) return PTR_ERR(to); +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = __user_walk(oldname, 0, &old_nd); if (error) goto exit; @@ -1994,6 +2157,9 @@ out_release: out: path_release(&old_nd); exit: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ putname(to); return error; @@ -2155,6 +2321,9 @@ static inline int do_rename(const char * struct dentry * trap; struct nameidata oldnd, newnd; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); if (error) goto exit; @@ -2220,6 +2389,9 @@ exit2: exit1: path_release(&oldnd); exit: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } diff -urNp vanilla/linux-2.6.11/fs/namespace.c linux-2.6.11-fumount2/fs/namespace.c --- vanilla/linux-2.6.11/fs/namespace.c 2005-03-01 23:38:13.000000000 -0800 +++ linux-2.6.11-fumount2/fs/namespace.c 2005-08-17 13:27:16.000000000 -0700 @@ -62,6 +62,9 @@ struct vfsmount *alloc_vfsmnt(const char INIT_LIST_HEAD(&mnt->mnt_mounts); INIT_LIST_HEAD(&mnt->mnt_list); INIT_LIST_HEAD(&mnt->mnt_fslink); +#ifdef CONFIG_FUMOUNT + init_rwsem(&mnt->mnt_close_sem); +#endif if (name) { int size = strlen(name)+1; char *newname = kmalloc(size, GFP_KERNEL); @@ -120,6 +123,9 @@ static void detach_mnt(struct vfsmount * list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); old_nd->dentry->d_mounted--; +#ifdef CONFIG_FUMOUNT + init_rwsem(&mnt->mnt_close_sem); +#endif } static void attach_mnt(struct vfsmount *mnt, struct nameidata *nd) @@ -175,6 +181,12 @@ clone_mnt(struct vfsmount *old, struct d void __mntput(struct vfsmount *mnt) { struct super_block *sb = mnt->mnt_sb; +#ifdef CONFIG_FUMOUNT + if(unlikely(mnt == NULL)) { + DEBUG_FUMOUNT; + return; + } +#endif dput(mnt->mnt_root); free_vfsmnt(mnt); deactivate_super(sb); @@ -367,6 +379,9 @@ void umount_tree(struct vfsmount *mnt) static int do_umount(struct vfsmount *mnt, int flags) { struct super_block * sb = mnt->mnt_sb; +#ifdef CONFIG_FUMOUNT + int wait_count; +#endif int retval; retval = security_sb_umount(mnt, flags); @@ -379,7 +394,7 @@ static int do_umount(struct vfsmount *mn * (1) the mark is already set (the mark is cleared by mntput()) * (2) the usage count == 1 [parent vfsmount] + 1 [sys_umount] */ - if (flags & MNT_EXPIRE) { + if ((flags & MNT_EXPIRE) && !(flags & MNT_FFORCE) ) { if (mnt == current->fs->rootmnt || flags & (MNT_FORCE | MNT_DETACH)) return -EINVAL; @@ -402,8 +417,13 @@ static int do_umount(struct vfsmount *mn */ lock_kernel(); - if( (flags&MNT_FORCE) && sb->s_op->umount_begin) +#ifdef CONFIG_FUMOUNT + if (unlikely((flags & (MNT_FORCE | MNT_FFORCE)) && sb->s_op->umount_begin)) { +#else + if ((flags & MNT_FORCE) && sb->s_op->umount_begin) { +#endif sb->s_op->umount_begin(sb); + } unlock_kernel(); /* @@ -434,6 +454,9 @@ static int do_umount(struct vfsmount *mn down_write(¤t->namespace->sem); spin_lock(&vfsmount_lock); +#ifdef CONFIG_FUMOUNT +umount_retry: +#endif if (atomic_read(&sb->s_active) == 1) { /* last instance - try to be smart */ spin_unlock(&vfsmount_lock); @@ -450,6 +473,145 @@ static int do_umount(struct vfsmount *mn umount_tree(mnt); retval = 0; } + +#ifdef CONFIG_FUMOUNT + /* Code and comments originally written by Monta Vista for the + * 2.4.x kernel. + + * Now for the dreaded FORCE unmount. The idea here is that if + * this isn't the root fs, and FUMOUNT is requested, and we + * aren't good to go with a normal unmount, and we haven't been + * through here before (you only go around once!), and there + * are no child mounts (if there are children, we expect the + * administrator to clean those up first, rather than trying to + * force the umount recursively - why - because this is an ugly + * thing to do to a running system, and I choose to make the + * admin know what they are doing!) then find the references + * that make the mount point busy and eliminate them. */ + if (unlikely(mnt != current->fs->rootmnt + && (flags & MNT_FFORCE) + && (retval != 0) + && !(mnt->mnt_flags & MNT_FUMOUNT) + && (list_empty(&mnt->mnt_mounts))) ) { + + DEBUG_FUMOUNT; + /* stop additional references to the mount by setting + * the MNT_FUMOUNT flag on the vfs mount context and modifying + * fget to fail if the flag is set. The syscalls that + * attack the file system via a name string generally + * end up returning -ENXIO. The alternative is to + * allow the mount reference count to fluctuate and + * check after the reference, but this was rejected, + * since the objective is to drive the ref count to the + * magic number to allow unmounting. Once the mount point has + * been marked, we can safely give up namespace semaphore until + * later, as any attempt to mount on or below the pending unmount + * will fail on the attempt to walk the path. + */ + mnt->mnt_flags |= MNT_FUMOUNT; + + /* mark the files as subject to a fumount - this + * prevents further syscalls from starting with the + * file - instead causing the sys_calls to return + * -ENXIO. Hopefully, the processes will get the + * message, and close the files after a brief wait - + * note that we hold onto the namespace semaphore - last + * thing we need is for something to mount on the + * subtree while trying to clean this up. + */ + fs_fumount_mark_files(mnt); + up_write(¤t->namespace->sem); + spin_unlock(&vfsmount_lock); + + /* wait a bit, in hopes that the processes will take + * their errors, close out their files (and hope + * against hope, satify any sleeps that have occurred + * in the vfs - that is, bd reads will complete, and + * locks will be released). It would also be nice if + * the processes would get out of related working + * directories, but I'm dreaming. If all that happens, + * then the forced cleanup is easy, and probably safe. + * NB - the really proper way to do this is to compute + * the correct magic number for each file object - that + * is, search the process table to find the number of + * opens associated with the file object and wait for + * the file object reference count to fall below this + * number - then everything is back out of the kernel + * sys_calls, deterministically. + */ + printk(KERN_DEBUG "Mount reference count = %d\n", + atomic_read(&mnt->mnt_count) ); + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(5*HZ); + + printk(KERN_DEBUG "Back from delay, looking for open files\n"); + printk(KERN_DEBUG "Mount reference count = %d\n", + atomic_read(&mnt->mnt_count) ); + do { + /* clone the open list - this is in a loop, + * since we may run out of file objects, and + * the fs_mount_close() releases them back to + * the pool. */ + retval = fs_fumount_clone_list(sb, mnt); + fs_fumount_close(); + } while (retval); + printk(KERN_DEBUG "after clone mount reference count = %d\n", + atomic_read(&mnt->mnt_count) ); + + /* Having removed all the file objects from the mount, + * we can then, at our leisure, it seems, go through + * the task list and remove all cwdmnt references to + * the mount. This will leave process without a + * relative working directory, but it can recover by cd + * to a rooted path not on the mount. At that point + * the mount count should be at the magic number, and + * we will repeat the normal umount process. */ + if ( atomic_read(&mnt->mnt_count) > 2 ) { + struct task_struct *task_ptr; + + read_lock( &tasklist_lock ); + for_each_process(task_ptr) { + if ( task_ptr->fs ) { + if ( task_ptr->fs->pwdmnt == mnt ) { + set_fs_pwd( task_ptr->fs, + (struct vfsmount *)NULL, + (struct dentry *)NULL ); + } + } + if ( atomic_read(&mnt->mnt_count) == 2 ) + break; + } + read_unlock( &tasklist_lock ); + } + wait_count = 100; + printk(KERN_DEBUG "Mount reference count = %d\n", + atomic_read(&mnt->mnt_count) ); + + while (atomic_read(&mnt->mnt_count) > 2 && wait_count >0 ) { + current->state = TASK_UNINTERRUPTIBLE; + schedule_timeout(5*HZ); + wait_count--; + printk(KERN_DEBUG "Mount reference count = %d wait counter = " + "%d\n", atomic_read(&mnt->mnt_count), wait_count ); + } + down_write(¤t->namespace->sem); + + if ( atomic_read(&mnt->mnt_count) > 2) + printk(KERN_WARNING "Losing resources!\n"); + while ( atomic_read(&mnt->mnt_count) > 2 ) { + /* Okay, can't find all of the references - + * just drive the count down. This may leave + * dangling resources, but too bad. We are + * going to fumount! */ + mntput(mnt); + } + printk(KERN_DEBUG "Mount reference count = %d\n", + atomic_read(&mnt->mnt_count) ); + spin_lock(&vfsmount_lock); + goto umount_retry; + } +#endif /* CONFIG_FUMOUNT */ + spin_unlock(&vfsmount_lock); if (retval) security_sb_umount_busy(mnt); @@ -708,6 +870,11 @@ static int do_move_mount(struct nameidat down_write(¤t->namespace->sem); while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) ; +#ifdef CONFIG_FUMOUNT + err = -ENXIO; + if (unlikely(!nd->mnt)) + goto out; +#endif /*CONFIG_FUMOUNT*/ err = -EINVAL; if (!check_mnt(nd->mnt) || !check_mnt(old_nd.mnt)) goto out; @@ -792,6 +959,11 @@ int do_add_mount(struct vfsmount *newmnt /* Something was mounted here while we slept */ while(d_mountpoint(nd->dentry) && follow_down(&nd->mnt, &nd->dentry)) ; +#ifdef CONFIG_FUMOUNT + err = -ENXIO; + if (unlikely(!nd->mnt)) + goto unlock; +#endif /*CONFIG_FUMOUNT*/ err = -EINVAL; if (!check_mnt(nd->mnt)) goto unlock; @@ -887,7 +1059,9 @@ void mark_mounts_for_expiry(struct list_ list_del_init(&mnt->mnt_child); list_del_init(&mnt->mnt_hash); mnt->mnt_mountpoint->d_mounted--; - +#ifdef CONFIG_FUMOUNT + init_rwsem(&mnt->mnt_close_sem); +#endif xdentry = mnt->mnt_mountpoint; mnt->mnt_mountpoint = mnt->mnt_root; xdmnt = mnt->mnt_parent; @@ -1221,8 +1395,10 @@ void set_fs_pwd(struct fs_struct *fs, st write_lock(&fs->lock); old_pwd = fs->pwd; old_pwdmnt = fs->pwdmnt; - fs->pwdmnt = mntget(mnt); - fs->pwd = dget(dentry); + if ( (fs->pwdmnt = mntget(mnt)) ) + fs->pwd = dget(dentry); + else + fs->pwd = (struct dentry *)NULL; write_unlock(&fs->lock); if (old_pwd) { diff -urNp vanilla/linux-2.6.11/fs/open.c linux-2.6.11-fumount2/fs/open.c --- vanilla/linux-2.6.11/fs/open.c 2005-03-01 23:37:47.000000000 -0800 +++ linux-2.6.11-fumount2/fs/open.c 2005-08-17 13:27:16.000000000 -0700 @@ -15,6 +15,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif /* CONFIG_FUMOUNT */ #include #include #include @@ -122,6 +125,9 @@ asmlinkage long sys_statfs(const char __ struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); if (!error) { struct statfs tmp; @@ -130,6 +136,9 @@ asmlinkage long sys_statfs(const char __ error = -EFAULT; path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -220,6 +229,9 @@ static inline long do_sys_truncate(const if (length < 0) /* sorry, but loff_t says... */ goto out; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); if (error) goto out; @@ -267,6 +279,9 @@ static inline long do_sys_truncate(const dput_and_out: path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -357,6 +372,9 @@ asmlinkage long sys_utime(char __user * struct inode * inode; struct iattr newattrs; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(filename, &nd); if (error) goto out; @@ -397,6 +415,9 @@ asmlinkage long sys_utime(char __user * dput_and_out: path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -413,6 +434,9 @@ long do_utimes(char __user * filename, s struct inode * inode; struct iattr newattrs; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(filename, &nd); if (error) @@ -450,6 +474,9 @@ long do_utimes(char __user * filename, s dput_and_out: path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -498,6 +525,9 @@ asmlinkage long sys_access(const char __ else current->cap_effective = current->cap_permitted; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ res = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_ACCESS, &nd); if (!res) { res = permission(nd.dentry->d_inode, mode, &nd); @@ -508,6 +538,9 @@ asmlinkage long sys_access(const char __ path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ current->fsuid = old_fsuid; current->fsgid = old_fsgid; current->cap_effective = old_cap; @@ -634,6 +667,9 @@ asmlinkage long sys_chmod(const char __u int error; struct iattr newattrs; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(filename, &nd); if (error) goto out; @@ -658,6 +694,9 @@ asmlinkage long sys_chmod(const char __u dput_and_out: path_release(&nd); out: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -701,11 +740,18 @@ asmlinkage long sys_chown(const char __u struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(filename, &nd); if (!error) { error = chown_common(nd.dentry, user, group); path_release(&nd); } + +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -714,11 +760,17 @@ asmlinkage long sys_lchown(const char __ struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(filename, &nd); if (!error) { error = chown_common(nd.dentry, user, group); path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -794,7 +846,19 @@ struct file *dentry_open(struct dentry * f->f_vfsmnt = mnt; f->f_pos = 0; f->f_op = fops_get(inode->i_fop); + +#ifdef CONFIG_FUMOUNT + error = file_move_test(f, inode->i_sb); + if (error) { + DEBUG_FUMOUNT; +#if 0 + printk(KERN_DEBUG "Disallowed file open due to pending unmount\n"); +#endif + goto cleanup_all; + } +#else file_move(f, &inode->i_sb->s_files); +#endif if (f->f_op && f->f_op->open) { error = f->f_op->open(inode,f); @@ -944,10 +1008,16 @@ asmlinkage long sys_open(const char __us fd = get_unused_fd(); if (fd >= 0) { struct file *f = filp_open(tmp, flags, mode); +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = PTR_ERR(f); if (IS_ERR(f)) goto out_error; fd_install(fd, f); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ } out: putname(tmp); @@ -955,6 +1025,9 @@ out: return fd; out_error: +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ put_unused_fd(fd); fd = error; goto out; @@ -998,14 +1071,56 @@ int filp_close(struct file *filp, fl_own retval = err; } +#ifdef CONFIG_FUMOUNT + if (!(filp->f_mode & FMODE_DEFUNCT) ) { + dnotify_flush(filp, id); + locks_remove_posix(filp, id); + fput(filp); + } + else + fumount_fput(filp); +#else dnotify_flush(filp, id); locks_remove_posix(filp, id); fput(filp); +#endif + return retval; } EXPORT_SYMBOL(filp_close); +#ifdef CONFIG_FUMOUNT +/* + * fumount_close is similar to filp_close. However, we don't call + * locks_remove_posix, since we have lost the files id. We have + * previously chased the locks out of the file object so + * we assume that the locks are not in effect. We also use a special + * version of dnotify_flush that doesn't care about matching the + * id of the caller - it just flushes everything associated with + * the filp. + */ +void fumount_close(struct file *filp) +{ + int retval; + + DEBUG_FUMOUNT; + + if (!file_count(filp)) { + printk(KERN_ERR "VFS: Close: file count is 0\n"); + return; + } + retval = 0; + if (filp->f_op && filp->f_op->flush) { + lock_kernel(); + retval = filp->f_op->flush(filp); + unlock_kernel(); + } + fumount_dnotify_flush(filp); + fput(filp); +} +#endif /* CONFIG_FUMOUNT */ + /* * Careful here! We test whether the file pointer is NULL before * releasing the fd. This ensures that one clone task can't release @@ -1013,8 +1128,13 @@ EXPORT_SYMBOL(filp_close); */ asmlinkage long sys_close(unsigned int fd) { - struct file * filp; + struct file * filp = NULL; struct files_struct *files = current->files; + int ret_code; +#ifdef CONFIG_FUMOUNT + int semaphore_flag = 0; + struct vfsmount *mnt = NULL; +#endif spin_lock(&files->file_lock); if (fd >= files->max_fds) @@ -1026,11 +1146,42 @@ asmlinkage long sys_close(unsigned int f FD_CLR(fd, files->close_on_exec); __put_unused_fd(files, fd); spin_unlock(&files->file_lock); - return filp_close(filp, files); + +#ifdef CONFIG_FUMOUNT + mnt = filp->f_vfsmnt; + if(mnt) { + down_read(&filp->f_vfsmnt->mnt_close_sem); + semaphore_flag = 1; + } + + if (!(filp->f_mode & FMODE_DEFUNCT) + && !(S_ISFUMOUNTABLE(filp->f_dentry->d_inode->i_mode))) { + /* Release semaphore so system doesn't pause behind non-fumountable + * file types. For example, a tape drive could take a long time + * to close if it does a rewind before completing the close + * action on the final fput + */ + if(semaphore_flag) { + semaphore_flag = 0; + up_read(&mnt->mnt_close_sem); + } + } +#endif + + ret_code = filp_close(filp, files); + +exit_sys_close: +#ifdef CONFIG_FUMOUNT + if (semaphore_flag) { + up_read(&mnt->mnt_close_sem); + } +#endif + return ret_code; out_unlock: spin_unlock(&files->file_lock); - return -EBADF; + ret_code = -EBADF; + goto exit_sys_close; } EXPORT_SYMBOL(sys_close); diff -urNp vanilla/linux-2.6.11/fs/stat.c linux-2.6.11-fumount2/fs/stat.c --- vanilla/linux-2.6.11/fs/stat.c 2005-03-01 23:37:49.000000000 -0800 +++ linux-2.6.11-fumount2/fs/stat.c 2005-08-17 13:27:16.000000000 -0700 @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif #include #include @@ -68,11 +71,17 @@ int vfs_stat(char __user *name, struct k struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(name, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -83,11 +92,17 @@ int vfs_lstat(char __user *name, struct struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(name, &nd); if (!error) { error = vfs_getattr(nd.mnt, nd.dentry, stat); path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -268,6 +283,9 @@ asmlinkage long sys_readlink(const char if (bufsiz <= 0) return -EINVAL; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(path, &nd); if (!error) { struct inode * inode = nd.dentry->d_inode; @@ -282,6 +300,9 @@ asmlinkage long sys_readlink(const char } path_release(&nd); } +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } diff -urNp vanilla/linux-2.6.11/fs/xattr.c linux-2.6.11-fumount2/fs/xattr.c --- vanilla/linux-2.6.11/fs/xattr.c 2005-03-01 23:38:07.000000000 -0800 +++ linux-2.6.11-fumount2/fs/xattr.c 2005-08-17 13:27:16.000000000 -0700 @@ -13,6 +13,9 @@ #include #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif #include #include #include @@ -74,11 +77,17 @@ sys_setxattr(char __user *path, char __u struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); if (error) return error; error = setxattr(nd.dentry, name, value, size, flags); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -89,11 +98,21 @@ sys_lsetxattr(char __user *path, char __ struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = setxattr(nd.dentry, name, value, size, flags); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -164,11 +183,21 @@ sys_getxattr(char __user *path, char __u struct nameidata nd; ssize_t error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = getxattr(nd.dentry, name, value, size); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -179,11 +208,21 @@ sys_lgetxattr(char __user *path, char __ struct nameidata nd; ssize_t error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = getxattr(nd.dentry, name, value, size); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -245,11 +284,21 @@ sys_listxattr(char __user *path, char __ struct nameidata nd; ssize_t error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = listxattr(nd.dentry, list, size); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -259,11 +308,21 @@ sys_llistxattr(char __user *path, char _ struct nameidata nd; ssize_t error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = listxattr(nd.dentry, list, size); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -315,11 +374,21 @@ sys_removexattr(char __user *path, char struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = removexattr(nd.dentry, name); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } @@ -329,11 +398,21 @@ sys_lremovexattr(char __user *path, char struct nameidata nd; int error; +#ifdef CONFIG_FUMOUNT + down_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ error = user_path_walk_link(path, &nd); - if (error) + if (error) { +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; + } error = removexattr(nd.dentry, name); path_release(&nd); +#ifdef CONFIG_FUMOUNT + up_read(¤t->namespace->sem); +#endif /* CONFIG_FUMOUNT */ return error; } diff -urNp vanilla/linux-2.6.11/include/linux/dnotify.h linux-2.6.11-fumount2/include/linux/dnotify.h --- vanilla/linux-2.6.11/include/linux/dnotify.h 2005-03-01 23:37:31.000000000 -0800 +++ linux-2.6.11-fumount2/include/linux/dnotify.h 2005-08-17 13:27:16.000000000 -0700 @@ -22,6 +22,9 @@ struct dnotify_struct { #ifdef CONFIG_DNOTIFY +#ifdef CONFIG_FUMOUNT +extern void fumount_dnotify_flush(struct file *filp); +#endif extern void __inode_dir_notify(struct inode *, unsigned long); extern void dnotify_flush(struct file *, fl_owner_t); extern int fcntl_dirnotify(int, struct file *, unsigned long); diff -urNp vanilla/linux-2.6.11/include/linux/file.h linux-2.6.11-fumount2/include/linux/file.h --- vanilla/linux-2.6.11/include/linux/file.h 2005-03-01 23:38:34.000000000 -0800 +++ linux-2.6.11-fumount2/include/linux/file.h 2005-08-17 13:27:16.000000000 -0700 @@ -36,6 +36,9 @@ struct files_struct { extern void FASTCALL(__fput(struct file *)); extern void FASTCALL(fput(struct file *)); +#ifdef CONFIG_FUMOUNT +extern void FASTCALL(fumount_fput(struct file *)); +#endif static inline void fput_light(struct file *file, int fput_needed) { if (unlikely(fput_needed)) diff -urNp vanilla/linux-2.6.11/include/linux/fs.h linux-2.6.11-fumount2/include/linux/fs.h --- vanilla/linux-2.6.11/include/linux/fs.h 2005-03-01 23:37:50.000000000 -0800 +++ linux-2.6.11-fumount2/include/linux/fs.h 2005-08-17 13:27:16.000000000 -0700 @@ -10,6 +10,16 @@ #include #include +#ifdef CONFIG_FUMOUNT +#include +#undef FUDEBUG +#ifdef FUDEBUG +#define DEBUG_FUMOUNT printk("FUmount: (%s, %d), %s\n", __FILE__, __LINE__, __func__); +#else +#define DEBUG_FUMOUNT +#endif /* FUDEBUG */ +#endif /* CONFIG_FUMOUNT */ + /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change * the file limit at runtime and only root can increase the per-process @@ -63,6 +73,11 @@ extern int dir_notify_enable; #define FMODE_LSEEK 4 #define FMODE_PREAD 8 #define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ +/* next two mode flags are for forced umount */ +#define FMODE_FUMOUNT 16 /* fumount is forcing this file to fail */ +#define FMODE_DEFUNCT 32 /* fumount has taken the resources away from this file */ + + #define RW_MASK 1 #define RWA_MASK 2 @@ -608,6 +623,10 @@ extern spinlock_t files_lock; #define get_file(x) atomic_inc(&(x)->f_count) #define file_count(x) atomic_read(&(x)->f_count) +#ifdef CONFIG_FUMOUNT +extern void fumount_close(struct file *); +extern void fs_fumount_close( void ); +#endif #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes @@ -745,6 +764,7 @@ extern int send_sigurg(struct fown_struc #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ +#define MNT_FFORCE 0x00000008 /* Really forcibily umount - no prisoners */ extern struct list_head super_blocks; extern spinlock_t sb_lock; @@ -1329,6 +1349,10 @@ extern struct file_operations rdwr_pipe_ extern int fs_may_remount_ro(struct super_block *); +#ifdef CONFIG_FUMOUNT +extern int fs_fumount_clone_list(struct super_block *, struct vfsmount *); +#endif + /* * return READ, READA, or WRITE */ @@ -1444,6 +1468,9 @@ static inline void insert_inode_hash(str extern struct file * get_empty_filp(void); extern void file_move(struct file *f, struct list_head *list); +#ifdef CONFIG_FUMOUNT +extern int file_move_test(struct file *f, struct super_block *sb); +#endif extern void file_kill(struct file *f); struct bio; extern void submit_bio(int, struct bio *); diff -urNp vanilla/linux-2.6.11/include/linux/mount.h linux-2.6.11-fumount2/include/linux/mount.h --- vanilla/linux-2.6.11/include/linux/mount.h 2005-03-01 23:37:48.000000000 -0800 +++ linux-2.6.11-fumount2/include/linux/mount.h 2005-08-17 13:27:16.000000000 -0700 @@ -14,11 +14,15 @@ #include #include +#ifdef CONFIG_FUMOUNT +#include +#endif #include #define MNT_NOSUID 1 #define MNT_NODEV 2 #define MNT_NOEXEC 4 +#define MNT_FUMOUNT 1<<16 /* set when forcibly removing mount */ struct vfsmount { @@ -36,14 +40,39 @@ struct vfsmount struct list_head mnt_list; struct list_head mnt_fslink; /* link in fs-specific expiry list */ struct namespace *mnt_namespace; /* containing namespace */ +#ifdef CONFIG_FUMOUNT + struct rw_semaphore mnt_close_sem; +#endif }; +#ifdef CONFIG_FUMOUNT +extern void fs_fumount_mark_files(struct vfsmount *); + +/* mntget checks that the parameter is not NULL, and now checks to + see that the mount is not subject to a + pending forced unmount. If both checks pass, then the reference + count for the mount structure is atomically incremented and the + mount structure pointer is returned. Otherwise, NULL is returned. +*/ + +static inline struct vfsmount *mntget(struct vfsmount *mnt) +{ + if ( (mnt) && !( mnt->mnt_flags & MNT_FUMOUNT ) ){ + atomic_inc(&mnt->mnt_count); + } + else { + mnt = (struct vfsmount *) NULL; + } + return mnt; +} +#else static inline struct vfsmount *mntget(struct vfsmount *mnt) { if (mnt) atomic_inc(&mnt->mnt_count); return mnt; } +#endif /*CONFIG_FUMOUNT*/ extern void __mntput(struct vfsmount *mnt); diff -urNp vanilla/linux-2.6.11/include/linux/stat.h linux-2.6.11-fumount2/include/linux/stat.h --- vanilla/linux-2.6.11/include/linux/stat.h 2005-03-01 23:37:55.000000000 -0800 +++ linux-2.6.11-fumount2/include/linux/stat.h 2005-08-17 13:27:16.000000000 -0700 @@ -28,6 +28,10 @@ #define S_ISBLK(m) (((m) & S_IFMT) == S_IFBLK) #define S_ISFIFO(m) (((m) & S_IFMT) == S_IFIFO) #define S_ISSOCK(m) (((m) & S_IFMT) == S_IFSOCK) +#ifdef CONFIG_FUMOUNT +#define S_ISFUMOUNTABLE(m) ( (S_ISLNK(m)) || (S_ISREG(m)) || (S_ISDIR(m))\ + || (S_ISFIFO(m)) ) +#endif #define S_IRWXU 00700 #define S_IRUSR 00400 diff -urNp vanilla/linux-2.6.11/kernel/fork.c linux-2.6.11-fumount2/kernel/fork.c --- vanilla/linux-2.6.11/kernel/fork.c 2005-03-01 23:37:48.000000000 -0800 +++ linux-2.6.11-fumount2/kernel/fork.c 2005-08-17 13:27:16.000000000 -0700 @@ -505,8 +505,10 @@ static inline struct fs_struct *__copy_f read_lock(&old->lock); fs->rootmnt = mntget(old->rootmnt); fs->root = dget(old->root); - fs->pwdmnt = mntget(old->pwdmnt); - fs->pwd = dget(old->pwd); + if ( (fs->pwdmnt = mntget(old->pwdmnt))) + fs->pwd = dget(old->pwd); + else + fs->pwd = (struct dentry *)NULL; if (old->altroot) { fs->altrootmnt = mntget(old->altrootmnt); fs->altroot = dget(old->altroot); diff -urNp vanilla/linux-2.6.11/mm/mmap.c linux-2.6.11-fumount2/mm/mmap.c --- vanilla/linux-2.6.11/mm/mmap.c 2005-03-01 23:38:12.000000000 -0800 +++ linux-2.6.11-fumount2/mm/mmap.c 2005-08-17 13:27:16.000000000 -0700 @@ -1878,6 +1878,204 @@ static inline void verify_mm_writelocked #endif } +#ifdef CONFIG_FUMOUNT +/* Each time a mapping is found that matches the file object, we get + * the mm_struct associated with the mapping, lock the mm_struct by + * incrementing the mm_count. Then we take the mmap_sem semaphore. + * We search the vma list for the mm space, and remove all mappings + * associated with the file. + * This avoids having to search all of the process mms for file + * matches, while still appearing to be safe. If the process + * terminates, then the vma list will be empty by the time I acquire + * the mm semaphore, since I added code in exit_mmap to take the + * semaphore before stealing all of the vmas. It is held until all of + * the vmas are released, so finding an empty vma area means that the + * file references have been removed, which is the point of this whole + * exercise. Once done, we drop the mmap_sem and mm_count and restart + * our search. We are only done with the mappings for a given file + * when we traverse both the map lists without working on a mapping for + * a particular file object. + */ +static int remove_file_map(struct file *file, struct mm_struct *mm_ptr) +{ + int ret_code; + + if (mm_ptr) { + struct vm_area_struct *next_vma_ptr; + struct vm_area_struct *vma_ptr; + + atomic_inc(&mm_ptr->mm_count); + down_write(&mm_ptr->mmap_sem); + + for (vma_ptr = mm_ptr->mmap; vma_ptr; vma_ptr = next_vma_ptr) { + next_vma_ptr = vma_ptr->vm_next; + if (vma_ptr->vm_file == file) { + ret_code = do_munmap(mm_ptr, vma_ptr->vm_start, + (size_t)(vma_ptr->vm_end + - vma_ptr->vm_start)); + if (ret_code) { + /* Low memory condition. Retry built into + * the caller */ + up_write(&mm_ptr->mmap_sem); + atomic_dec(&mm_ptr->mm_count); + return ret_code; + } + } + } + up_write(&mm_ptr->mmap_sem); + mmdrop(mm_ptr); + } + return 0; +} + +static int remove_shared_file_mappings(struct file *file, + struct address_space *addr_space_ptr) +{ + struct mm_struct *mm_ptr; + struct vm_area_struct *vma_ptr; + struct prio_tree_iter iter; + int ret_code = 0; + + DEBUG_FUMOUNT; + if (prio_tree_empty(&addr_space_ptr->i_mmap)) + return 0; + spin_lock(&addr_space_ptr->i_mmap_lock); + + while(!prio_tree_empty(&addr_space_ptr->i_mmap)) { + /* + * I think the r_index (aka begin) should be 0 and h_index + * (aka end should be ULONG_MAX to search the entire tree. + * Hopefully I'm right.... + */ + vma_prio_tree_foreach(vma_ptr, &iter, + &addr_space_ptr->i_mmap, 0, ULONG_MAX) { + if(vma_ptr->vm_file == file) + break; + } + + if(vma_ptr) + { + printk(KERN_DEBUG "found shared map\n"); + mm_ptr = vma_ptr->vm_mm; + spin_unlock(&addr_space_ptr->i_mmap_lock); + ret_code = remove_file_map(file, mm_ptr); + if (ret_code) + break; + + spin_lock(&addr_space_ptr->i_mmap_lock); + } + else + { + printk(KERN_WARNING "%s - VM area found without mm\n", __func__); + } + } + + spin_unlock(&addr_space_ptr->i_mmap_lock); + return ret_code; +} + +/* + */ +static int remove_nonlinear_mappings( struct file *file, + struct address_space *addr_space_ptr) +{ + struct list_head *ptr, *temp; + struct vm_area_struct *vma_ptr = NULL; + struct mm_struct *mm_ptr; + int ret_code; + + DEBUG_FUMOUNT; + + spin_lock(&addr_space_ptr->i_mmap_lock); + + while (!list_empty(&addr_space_ptr->i_mmap_nonlinear)) { + list_for_each_safe(ptr, temp, &addr_space_ptr->i_mmap_nonlinear) { + vma_ptr = list_entry(ptr, struct vm_area_struct, anon_vma_node); + if((ptr == &addr_space_ptr->i_mmap_nonlinear) || + (vma_ptr->vm_file == file)) + break; + } + + if (vma_ptr->vm_file != file) + break; + + printk(KERN_DEBUG "found anon map\n"); + mm_ptr = vma_ptr->vm_mm; + + spin_unlock(&addr_space_ptr->i_mmap_lock); + ret_code = remove_file_map(file, mm_ptr); + if (ret_code) + return ret_code; + spin_lock(&addr_space_ptr->i_mmap_lock); + } + + spin_unlock(&addr_space_ptr->i_mmap_lock); + return 0; +} + +/* remove_file_mappings is a back door to do_munmap when the file object is + * known but the context may be different from the process context that created + * the mapping in the first place. Used by fumount to remove the mappings and + * release the associated file reference prior to forcing the file object + * closed. + */ +int remove_file_mappings(struct file *file) +{ + struct address_space *addr_space_ptr; + struct dentry *dentry_ptr; + struct inode *inode_ptr; + + int ret_code; + + DEBUG_FUMOUNT; + printk(KERN_DEBUG "Remove_file_mappings called.\n"); + if (!file ) + return -EBADF; + + dentry_ptr = file->f_dentry; + if (!dentry_ptr) + return -EBADF; + + inode_ptr = dentry_ptr->d_inode; + if (!inode_ptr) + return -EBADF; + + addr_space_ptr = file->f_mapping; +check_for_maps: + ret_code = 0; + if (!addr_space_ptr) + return -EBADF; + + if (!prio_tree_empty(&addr_space_ptr->i_mmap)) { + ret_code = remove_shared_file_mappings(file, addr_space_ptr); + if (ret_code) { + printk(KERN_DEBUG "%s: incomplete shared map removal, retry\n", __func__); + goto check_for_maps; + } + } + + if (!list_empty(&addr_space_ptr->i_mmap_nonlinear) ) { + ret_code = remove_nonlinear_mappings(file, addr_space_ptr); + if (ret_code) { + printk(KERN_DEBUG "%s: incomplete nonlinear map removal, retry\n", __func__); + goto check_for_maps; + } + } + + addr_space_ptr = file->f_mapping; + if(!prio_tree_empty(&addr_space_ptr->i_mmap) || + !list_empty(&addr_space_ptr->i_mmap_nonlinear)) + { + printk("%s: there are still mappings\n", __func__); + goto check_for_maps; + } + + printk(KERN_DEBUG "%s: no more mappings\n", __func__); + return ret_code; +} + +#endif /* CONFIG_FUMOUNT */ + /* * this is really a simplified "do_mmap". it only handles * anonymous maps. eventually we may be able to do some