G-safe-unlink.patch Fixes long-standing problem in reiserfs, when disk space gets leaked if crash occurred when some process hold a reference to unlinked file. diff -rup linux-2.4.6/fs/reiserfs/inode.c linux-2.4.6.cleanup/fs/reiserfs/inode.c --- linux-2.4.6/fs/reiserfs/inode.c Wed Jul 11 23:05:13 2001 +++ linux-2.4.6.cleanup/fs/reiserfs/inode.c Wed Jul 11 21:48:10 2001 @@ -44,14 +44,21 @@ void reiserfs_delete_inode (struct inode journal_end(&th, inode->i_sb, jbegin_count) ; up (&inode->i_sem); + + /* all items of file are deleted, so we can remove "save" link */ + remove_save_link (inode); + } else { /* no object items are in the tree */ ; } + clear_inode (inode); /* note this must go after the journal_end to prevent deadlock */ - unlock_kernel() ; + + unlock_kernel (); } + static void _make_cpu_key (struct cpu_key * key, int version, __u32 dirid, __u32 objectid, loff_t offset, int type, int length) { @@ -1664,11 +1671,21 @@ void reiserfs_truncate_file(struct inode ** (it will unmap bh if it packs). */ prevent_flush_page_lock(page, p_s_inode) ; - journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ; + + /* it is enough to reserve space in transaction for 2 balancings: one for + "save" link adding and another for the first cut_from_item. 1 is for + update_sd */ + journal_begin(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1) ; windex = push_journal_writer("reiserfs_vfs_truncate_file") ; + + reiserfs_do_truncate (&th, p_s_inode, page, update_timestamps) ; pop_journal_writer(windex) ; - journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 ) ; + journal_end(&th, p_s_inode->i_sb, JOURNAL_PER_BALANCE_CNT * 2 + 1 ) ; + + if (update_timestamps) + remove_save_link (p_s_inode); + allow_flush_page_lock(page, p_s_inode) ; if (page) { diff -rup linux-2.4.6/fs/reiserfs/namei.c linux-2.4.6.cleanup/fs/reiserfs/namei.c --- linux-2.4.6/fs/reiserfs/namei.c Wed Jul 11 23:05:13 2001 +++ linux-2.4.6.cleanup/fs/reiserfs/namei.c Wed Jul 11 21:48:10 2001 @@ -711,11 +711,14 @@ int reiserfs_rmdir (struct inode * dir, struct inode * inode; int windex ; struct reiserfs_transaction_handle th ; - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + int jbegin_count; INITIALIZE_PATH (path); struct reiserfs_dir_entry de; + /* we will be doing 2 balancings and update 2 stat data */ + jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + 2; + journal_begin(&th, dir->i_sb, jbegin_count) ; windex = push_journal_writer("reiserfs_rmdir") ; @@ -756,6 +759,9 @@ int reiserfs_rmdir (struct inode * dir, dir->i_blocks = ((dir->i_size + 511) >> 9); reiserfs_update_sd (&th, dir); + /* prevent empty directory from getting lost */ + add_save_link (&th, inode); + pop_journal_writer(windex) ; journal_end(&th, dir->i_sb, jbegin_count) ; reiserfs_check_path(&path) ; @@ -787,7 +793,16 @@ int reiserfs_unlink (struct inode * dir, INITIALIZE_PATH (path); int windex ; struct reiserfs_transaction_handle th ; - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + int jbegin_count; + + inode = dentry->d_inode; + + /* in this transaction we will be doing at least one balancing and update + two stat datas */ + jbegin_count = JOURNAL_PER_BALANCE_CNT + 2; + if (inode->i_nlink < 2) + /* reserve space in the transaction for adding "save" link */ + jbegin_count += JOURNAL_PER_BALANCE_CNT; journal_begin(&th, dir->i_sb, jbegin_count) ; windex = push_journal_writer("reiserfs_unlink") ; @@ -797,7 +812,6 @@ int reiserfs_unlink (struct inode * dir, retval = -ENOENT; goto end_unlink; } - inode = dentry->d_inode; if (de.de_objectid != inode->i_ino) { // FIXME: compare key of an object and a key found in the @@ -825,6 +839,10 @@ int reiserfs_unlink (struct inode * dir, dir->i_ctime = dir->i_mtime = CURRENT_TIME; reiserfs_update_sd (&th, dir); + if (!inode->i_nlink) + /* prevent file from getting lost */ + add_save_link (&th, inode); + pop_journal_writer(windex) ; journal_end(&th, dir->i_sb, jbegin_count) ; reiserfs_check_path(&path) ; @@ -1027,9 +1045,14 @@ int reiserfs_rename (struct inode * old_ struct inode * old_inode, * new_inode; int windex ; struct reiserfs_transaction_handle th ; - int jbegin_count = JOURNAL_PER_BALANCE_CNT * 3; + int jbegin_count ; + /* two balancings: old name removal, new name insertion or "save" link, + stat data updates: old directory and new directory and maybe block + containing ".." of renamed directory */ + jbegin_count = JOURNAL_PER_BALANCE_CNT * 3 + 3; + old_inode = old_dentry->d_inode; new_inode = new_dentry->d_inode; @@ -1137,13 +1160,6 @@ int reiserfs_rename (struct inode * old_ reiserfs_restore_prepared_buffer (old_inode->i_sb, new_de.de_bh); if (S_ISDIR(old_inode->i_mode)) reiserfs_restore_prepared_buffer (old_inode->i_sb, dot_dot_de.de_bh); -#if 0 - // FIXME: do we need this? shouldn't we simply continue? - run_task_queue(&tq_disk); - current->policy |= SCHED_YIELD; - /*current->counter = 0;*/ - schedule(); -#endif continue; } @@ -1166,9 +1182,10 @@ int reiserfs_rename (struct inode * old_ new_dir->i_ctime = new_dir->i_mtime = CURRENT_TIME; if (new_inode) { - // adjust link number of the victim + /* if it is empty directory or file with link count == 1 - we have to + "save" link it to garantee file body removal */ if (S_ISDIR(new_inode->i_mode)) { - DEC_DIR_INODE_NLINK(new_inode) + new_inode->i_nlink = 0; } else { new_inode->i_nlink--; } @@ -1176,21 +1193,17 @@ int reiserfs_rename (struct inode * old_ } if (S_ISDIR(old_inode->i_mode)) { - //if (dot_dot_de.de_bh) { - // adjust ".." of renamed directory + /* adjust ".." of renamed directory */ set_ino_in_dir_entry (&dot_dot_de, INODE_PKEY (new_dir)); journal_mark_dirty (&th, new_dir->i_sb, dot_dot_de.de_bh); - DEC_DIR_INODE_NLINK(old_dir) - if (new_inode) { - if (S_ISDIR(new_inode->i_mode)) { - DEC_DIR_INODE_NLINK(new_inode) - } else { - new_inode->i_nlink--; - } - } else { + if (!new_inode) + /* there (in new_dir) was no directory, so it got new link (".." + of renamed directory) */ INC_DIR_INODE_NLINK(new_dir) - } + + /* this is removal of ".." of the renames dir */ + DEC_DIR_INODE_NLINK(old_dir); } // looks like in 2.3.99pre3 brelse is atomic. so we can use pathrelse @@ -1208,8 +1221,12 @@ int reiserfs_rename (struct inode * old_ reiserfs_update_sd (&th, old_dir); reiserfs_update_sd (&th, new_dir); - if (new_inode) + + if (new_inode) { + if (new_inode->i_nlink == 0) + add_save_link (&th, new_inode); reiserfs_update_sd (&th, new_inode); + } pop_journal_writer(windex) ; journal_end(&th, old_dir->i_sb, jbegin_count) ; diff -rup linux-2.4.6/fs/reiserfs/stree.c linux-2.4.6.cleanup/fs/reiserfs/stree.c --- linux-2.4.6/fs/reiserfs/stree.c Wed Jul 11 23:05:13 2001 +++ linux-2.4.6.cleanup/fs/reiserfs/stree.c Wed Jul 11 22:53:46 2001 @@ -727,8 +727,12 @@ int search_by_key (struct super_block * continue; } - RFALSE( ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), - "PAP-5130: key is not in the buffer"); + /* only check that key is in buffer if p_s_key is not equal to the + MAX_KEY. Latter case is only possible in "finish_unfinished()" + processing during mount. */ + RFALSE( COMP_KEYS( &MAX_KEY, p_s_key ) && + ! key_in_buffer(p_s_search_path, p_s_key, p_s_sb), + "PAP-5130: key is not in the buffer" ); #ifdef CONFIG_REISERFS_CHECK if ( cur_tb ) { print_cur_tb ("5140"); @@ -1401,7 +1405,7 @@ int reiserfs_delete_item (struct reiserf /* this deletes item which never gets split */ -static void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, +void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, struct key * key) { struct tree_balance tb; @@ -1413,16 +1417,17 @@ static void reiserfs_delete_solid_item ( le_key2cpu_key (&cpu_key, key); + while (1) { retval = search_item (th->t_super, &cpu_key, &path); if (retval == IO_ERROR) { - reiserfs_warning ("vs-: reiserfs_delete_solid_item: " + reiserfs_warning ("vs-5360: reiserfs_delete_solid_item: " "i/o failure occurred trying to delete %K\n", &cpu_key); break; } if (retval != ITEM_FOUND) { pathrelse (&path); - reiserfs_warning ("vs-: reiserfs_delete_solid_item: %k not found", + reiserfs_warning ("vs-5380: reiserfs_delete_solid_item: %k not found\n", key); break; } @@ -1442,7 +1447,7 @@ static void reiserfs_delete_solid_item ( } // IO_ERROR, NO_DISK_SPACE, etc - reiserfs_warning ("vs-: reiserfs_delete_solid_item: " + reiserfs_warning ("vs-5400: reiserfs_delete_solid_item: " "could not delete %K due to fix_nodes failure\n", &cpu_key); unfix_nodes (&tb); break; @@ -1783,6 +1788,15 @@ void reiserfs_do_truncate (struct reiser pathrelse(&s_search_path); return; } + + + if (update_timestamps) + /* we are doing real truncate: if the system crashes before the last + transaction of truncating gets committed - on reboot the file + either appears truncated properly or not truncated at all */ + add_save_link (th, p_s_inode); + + /* Update key to search for the last file item. */ set_cpu_key_k_offset (&s_item_key, n_file_size); @@ -1819,7 +1833,6 @@ void reiserfs_do_truncate (struct reiser if (update_timestamps) { p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME; - // FIXME: sd gets wrong size here } reiserfs_update_sd(th, p_s_inode) ; @@ -1838,6 +1851,10 @@ void reiserfs_do_truncate (struct reiser p_s_inode->i_mtime = p_s_inode->i_ctime = CURRENT_TIME; } reiserfs_update_sd (th, p_s_inode); + + + if (update_timestamps) + remove_save_link (p_s_inode); pathrelse(&s_search_path) ; } diff -rup linux-2.4.6/fs/reiserfs/super.c linux-2.4.6.cleanup/fs/reiserfs/super.c --- linux-2.4.6/fs/reiserfs/super.c Wed Jul 11 23:05:13 2001 +++ linux-2.4.6.cleanup/fs/reiserfs/super.c Wed Jul 11 21:57:58 2001 @@ -664,6 +664,85 @@ int function2code (hashf_t func) return 0; } +extern const struct key MAX_KEY; + +/* look for uncompleted unlinks and truncates and complete them */ +static void finish_unfinished (struct super_block * s) +{ + INITIALIZE_PATH (path); + struct cpu_key save_link_key, key; + int retval; + struct item_head * ih; + struct buffer_head * bh; + int item_pos; + char * item; + int done; + struct inode * inode; + + + /* compose key to look for "save" links */ + save_link_key.version = KEY_FORMAT_1; + save_link_key.on_disk_key = MAX_KEY; + save_link_key.key_length = 3; + + done = 0; + while (1) { + retval = search_item (s, &save_link_key, &path); + if (retval != ITEM_NOT_FOUND) { + reiserfs_warning ("vs-2140: finish_unfinished: search_by_key returned %d\n", + retval); + break; + } + + bh = get_bh (&path); + item_pos = get_item_pos (&path); + if (item_pos != B_NR_ITEMS (bh)) { + reiserfs_warning ("vs-2060: finish_unfinished: wrong position found\n"); + break; + } + item_pos --; + ih = B_N_PITEM_HEAD (bh, item_pos); + + if (le32_to_cpu (ih->ih_key.k_dir_id) != MAX_KEY_OBJECTID) + /* there are no "save" links anymore */ + break; + + /* reiserfs_iget needs k_dirid and k_objectid only */ + item = B_I_PITEM (bh, ih); + key.on_disk_key.k_dir_id = le32_to_cpu (*(__u32 *)item); + key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid); + pathrelse (&path); + + inode = reiserfs_iget (s, &key); + if (!inode) { + reiserfs_warning ("vs-2180: finish_unfinished: iget failed\n"); + break; + } + + if (inode->i_nlink) { + /* not completed truncate found. New size was committed together + with "save" link */ + reiserfs_warning ("Truncating %k to %Ld ..", + INODE_PKEY (inode), inode->i_size); + reiserfs_truncate_file (inode, 0/*don't update modification time*/); + remove_save_link (inode); + } else { + /* not completed unlink (rmdir) found */ + reiserfs_warning ("Removing %k..", INODE_PKEY (inode)); + /* removal gets completed in iput */ + } + + iput (inode); + reiserfs_warning ("done\n"); + done ++; + } + + pathrelse (&path); + if (done) + reiserfs_warning ("There were %d uncompleted unlinks/truncates. " + "Completed\n", done); +} + // // a portion of this function, particularly the VFS interface portion, // was derived from minix or ext2's analog and evolved as the @@ -803,11 +882,15 @@ struct super_block * reiserfs_read_super journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB (s)); journal_end(&th, s, 1) ; + + /* look for files which were to be removed in previous session */ + finish_unfinished (s); + s->s_dirt = 0; } else { struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s); - if (strncmp (rs->s_magic, REISER2FS_SUPER_MAGIC_STRING, - strlen ( REISER2FS_SUPER_MAGIC_STRING))) { + if (strncmp (rs->s_magic, reiser2fs_super_magic_string, + strlen ( reiser2fs_super_magic_string))) { reiserfs_warning("reiserfs: using 3.5.x disk format\n") ; } } @@ -844,6 +927,71 @@ struct super_block * reiserfs_read_super } +/* to protect file being unlinked from getting lost we "safe" link files + being unlinked. This link will be deleted in the same transaction with last + item of file. mounting the filesytem we scan all these links and remove + files which almost got lost */ +void add_save_link (struct reiserfs_transaction_handle * th, + struct inode * inode) +{ + INITIALIZE_PATH (path); + int retval; + struct cpu_key key; + struct item_head ih; + __u32 link; + + + /* setup key of "save" link */ + key.version = KEY_FORMAT_1; + key.on_disk_key.k_dir_id = MAX_KEY_OBJECTID; + key.on_disk_key.k_objectid = inode->i_ino; + set_cpu_key_k_offset (&key, 1); + set_cpu_key_k_type (&key, TYPE_INDIRECT); + key.key_length = 3; + + /* look for its place in the tree */ + retval = search_item (inode->i_sb, &key, &path); + if (retval != ITEM_NOT_FOUND) { + reiserfs_warning ("vs-2100: add_save_link:" + "search_by_key returned %d\n", retval); + pathrelse (&path); + return; + } + + /* compose "save" link itself */ + make_le_item_head (&ih, &key, key.version, 1, TYPE_INDIRECT, + 4/*length*/, 0/*free space*/); + link = cpu_to_le32 (INODE_PKEY (inode)->k_dir_id); + + /* put "save" link inot tree */ + retval = reiserfs_insert_item (th, &path, &key, &ih, (char *)&link); + if (retval) + reiserfs_warning ("vs-2120: add_save_link: insert_item returned %d\n", + retval); +} + + +/* this opens transaction unlike add_save_link */ +void remove_save_link (struct inode * inode) +{ + struct reiserfs_transaction_handle th; + struct key key; + + + /* we are going to do one balancing only */ + journal_begin (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); + + /* setup key of "save" link */ + key.k_dir_id = cpu_to_le32 (MAX_KEY_OBJECTID); + key.k_objectid = INODE_PKEY (inode)->k_objectid; + set_le_key_k_offset (KEY_FORMAT_1, &key, 1); + set_le_key_k_type (KEY_FORMAT_1, &key, TYPE_INDIRECT); + + reiserfs_delete_solid_item (&th, &key); + + journal_end (&th, inode->i_sb, JOURNAL_PER_BALANCE_CNT); +} + // // a portion of this function, particularly the VFS interface portion, // was derived from minix or ext2's analog and evolved as the @@ -885,6 +1033,7 @@ static void __exit exit_reiserfs_fs(void reiserfs_proc_info_global_done(); unregister_filesystem(&reiserfs_fs_type); } + module_init(init_reiserfs_fs) ; module_exit(exit_reiserfs_fs) ; diff -rup linux-2.4.6/include/linux/reiserfs_fs.h linux-2.4.6.cleanup/include/linux/reiserfs_fs.h --- linux-2.4.6/include/linux/reiserfs_fs.h Wed Jul 11 23:05:13 2001 +++ linux-2.4.6.cleanup/include/linux/reiserfs_fs.h Wed Jul 11 22:51:46 2001 @@ -252,6 +252,8 @@ struct unfm_nodeinfo { #define ITEM_VERSION_1 0 #define ITEM_VERSION_2 1 +#define KEY_FORMAT_1 0 +#define KEY_FORMAT_2 1 /* loff_t - long long */ @@ -1392,10 +1394,6 @@ extern struct item_operations * item_ops #define COMP_SHORT_KEYS comp_short_keys #define keys_of_same_object comp_short_keys -/*#define COMP_KEYS(p_s_key1, p_s_key2) comp_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2)) -#define COMP_SHORT_KEYS(p_s_key1, p_s_key2) comp_short_keys((unsigned long *)(p_s_key1), (unsigned long *)(p_s_key2))*/ - - /* number of blocks pointed to by the indirect item */ #define I_UNFM_NUM(p_s_ih) ( (p_s_ih)->ih_item_len / UNFM_P_SIZE ) @@ -1691,15 +1689,13 @@ int reiserfs_delete_item (struct reiserf struct buffer_head * p_s_un_bh); +void reiserfs_delete_solid_item (struct reiserfs_transaction_handle *th, + struct key * key); void reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode * p_s_inode); void reiserfs_do_truncate (struct reiserfs_transaction_handle *th, struct inode * p_s_inode, struct page *, int update_timestamps); -// -//void lock_inode_to_convert (struct inode * p_s_inode); -//void unlock_inode_after_convert (struct inode * p_s_inode); -//void increment_i_read_sync_counter (struct inode * p_s_inode); -//void decrement_i_read_sync_counter (struct inode * p_s_inode); + #define block_size(inode) ((inode)->i_sb->s_blocksize) @@ -1729,8 +1725,6 @@ void make_cpu_key (struct cpu_key * cpu_ void make_le_item_head (struct item_head * ih, const struct cpu_key * key, int version, loff_t offset, int type, int length, int entry_count); -/*void store_key (struct key * key); -void forget_key (struct key * key);*/ int reiserfs_get_block (struct inode * inode, long block, struct buffer_head * bh_result, int create); struct inode * reiserfs_iget (struct super_block * s, @@ -1738,7 +1732,7 @@ struct inode * reiserfs_iget (struct sup void reiserfs_read_inode (struct inode * inode) ; void reiserfs_read_inode2(struct inode * inode, void *p) ; void reiserfs_delete_inode (struct inode * inode); -extern int reiserfs_notify_change(struct dentry * dentry, struct iattr * attr); +int reiserfs_notify_change(struct dentry * dentry, struct iattr * attr); void reiserfs_write_inode (struct inode * inode, int) ; /* nfsd support functions */ @@ -1772,15 +1766,12 @@ int reiserfs_link (struct dentry * old_d int reiserfs_rename (struct inode * old_dir, struct dentry *old_dentry, struct inode * new_dir, struct dentry *new_dentry); /* super.c */ -inline void reiserfs_mark_buffer_dirty (struct buffer_head * bh, int flag); -inline void reiserfs_mark_buffer_clean (struct buffer_head * bh); void reiserfs_write_super (struct super_block * s); void reiserfs_put_super (struct super_block * s); int reiserfs_remount (struct super_block * s, int * flags, char * data); -/*int read_super_block (struct super_block * s, int size); -int read_bitmaps (struct super_block * s); -int read_old_bitmaps (struct super_block * s); -int read_old_super_block (struct super_block * s, int size);*/ +void add_save_link (struct reiserfs_transaction_handle * th, + struct inode * inode); +void remove_save_link (struct inode * inode); struct super_block * reiserfs_read_super (struct super_block * s, void * data, int silent); int reiserfs_statfs (struct super_block * s, struct statfs * buf); .