diff -urN v2.4.19p7/fs/buffer.c linux/fs/buffer.c --- v2.4.19p7/fs/buffer.c Thu Apr 18 16:30:13 2002 +++ linux/fs/buffer.c Wed May 1 03:54:13 2002 @@ -325,6 +325,8 @@ lock_super(sb); if (sb->s_dirt && sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); + if (sb->s_op && sb->s_op->commit_super) + sb->s_op->commit_super(sb); unlock_super(sb); unlock_kernel(); @@ -344,7 +346,7 @@ lock_kernel(); sync_inodes(dev); DQUOT_SYNC(dev); - sync_supers(dev); + commit_supers(dev); unlock_kernel(); return sync_buffers(dev, 1); diff -urN v2.4.19p7/fs/reiserfs/bitmap.c linux/fs/reiserfs/bitmap.c --- v2.4.19p7/fs/reiserfs/bitmap.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/bitmap.c Wed May 1 03:54:13 2002 @@ -122,7 +122,6 @@ set_sb_free_blocks( rs, sb_free_blocks(rs) + 1 ); journal_mark_dirty (th, s, sbh); - s->s_dirt = 1; } void reiserfs_free_block (struct reiserfs_transaction_handle *th, @@ -433,7 +432,6 @@ /* update free block count in super block */ PUT_SB_FREE_BLOCKS( s, SB_FREE_BLOCKS(s) - init_amount_needed ); journal_mark_dirty (th, s, SB_BUFFER_WITH_SB (s)); - s->s_dirt = 1; return CARRY_ON; } diff -urN v2.4.19p7/fs/reiserfs/do_balan.c linux/fs/reiserfs/do_balan.c --- v2.4.19p7/fs/reiserfs/do_balan.c Mon Apr 1 13:24:15 2002 +++ linux/fs/reiserfs/do_balan.c Wed May 1 03:54:31 2002 @@ -437,24 +437,13 @@ ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)), l_n,body, zeros_num > l_n ? l_n : zeros_num ); - +#if 0 RFALSE( l_n && is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val)), "PAP-12110: pasting more than 1 unformatted node pointer into indirect item"); - - /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/ - { - int version; - - version = ih_version (B_N_PITEM_HEAD (tbS0, 0)); - set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), - le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + l_n); - version = ih_version (B_N_PITEM_HEAD(tb->CFL[0],tb->lkey[0])); - set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + l_n); - } +#endif /* Calculate new body, position in item and insert_size[0] */ if ( l_n > zeros_num ) { @@ -522,7 +511,7 @@ ); /* if appended item is indirect item, put unformatted node into un list */ if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); tb->insert_size[0] = 0; zeros_num = 0; } @@ -550,7 +539,7 @@ { /* new item or its part falls to R[0] */ if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 ) { /* part of new item falls into R[0] */ - int old_key_comp, old_len, r_zeros_number; + loff_t old_key_comp, old_len, r_zeros_number; const char * r_body; int version; loff_t offset; @@ -692,12 +681,17 @@ { int version; + unsigned long temp_rem = n_rem; version = ih_version (B_N_PITEM_HEAD (tb->R[0],0)); + if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){ + temp_rem = (n_rem / UNFM_P_SIZE) * + tb->tb_sb->s_blocksize; + } set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), - le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + n_rem); + le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem); set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), - le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + n_rem); + le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem); } /* k_offset (B_N_PKEY(tb->R[0],0)) += n_rem; k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/ @@ -721,13 +715,12 @@ leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number); if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) { - +#if 0 RFALSE( n_rem, "PAP-12160: paste more than one unformatted node pointer"); - - set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), ((struct unfm_nodeinfo*)body)->unfm_freespace); +#endif + set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0); } - tb->insert_size[0] = n_rem; if ( ! n_rem ) pos_in_item ++; @@ -766,7 +759,7 @@ } if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); zeros_num = tb->insert_size[0] = 0; } } @@ -995,11 +988,18 @@ tmp = B_N_PITEM_HEAD(S_new[i],0); if (is_indirect_le_ih (tmp)) { +#if 0 if (n_rem) reiserfs_panic (tb->tb_sb, "PAP-12230: balance_leaf: invalid action with indirect item"); - set_ih_free_space (tmp, ((struct unfm_nodeinfo*)body)->unfm_freespace); +#endif + set_ih_free_space (tmp, 0); + set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + + (n_rem / UNFM_P_SIZE) * + tb->tb_sb->s_blocksize); + } else { + set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + + n_rem ); } - set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + n_rem ); } tb->insert_size[0] = n_rem; @@ -1045,7 +1045,7 @@ /* if we paste to indirect item update ih_free_space */ if (is_indirect_le_ih (pasted)) - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); + set_ih_free_space (pasted, 0); zeros_num = tb->insert_size[0] = 0; } } @@ -1141,11 +1141,12 @@ leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num); if (is_indirect_le_ih (pasted)) { - +#if 0 RFALSE( tb->insert_size[0] != UNFM_P_SIZE, "PAP-12280: insert_size for indirect item must be %d, not %d", UNFM_P_SIZE, tb->insert_size[0]); - set_ih_free_space (pasted, ((struct unfm_nodeinfo*)body)->unfm_freespace); +#endif + set_ih_free_space (pasted, 0); } tb->insert_size[0] = 0; } diff -urN v2.4.19p7/fs/reiserfs/ibalance.c linux/fs/reiserfs/ibalance.c --- v2.4.19p7/fs/reiserfs/ibalance.c Mon Apr 1 13:23:44 2002 +++ linux/fs/reiserfs/ibalance.c Wed May 1 03:54:13 2002 @@ -632,7 +632,6 @@ /* use check_internal if new root is an internal node */ check_internal (new_root); /*&&&&&&&&&&&&&&&&&&&&&&*/ - tb->tb_sb->s_dirt = 1; /* do what is needed for buffer thrown from tree */ reiserfs_invalidate_buffer(tb, tbSh); @@ -950,7 +949,6 @@ PUT_SB_ROOT_BLOCK( tb->tb_sb, tbSh->b_blocknr ); PUT_SB_TREE_HEIGHT( tb->tb_sb, SB_TREE_HEIGHT(tb->tb_sb) + 1 ); do_balance_mark_sb_dirty (tb, tb->tb_sb->u.reiserfs_sb.s_sbh, 1); - tb->tb_sb->s_dirt = 1; } if ( tb->blknum[h] == 2 ) { diff -urN v2.4.19p7/fs/reiserfs/inode.c linux/fs/reiserfs/inode.c --- v2.4.19p7/fs/reiserfs/inode.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/inode.c Wed May 1 04:06:54 2002 @@ -240,83 +240,280 @@ reiserfs_update_inode_transaction(inode) ; } -// it is called by get_block when create == 0. Returns block number -// for 'block'-th logical block of file. When it hits direct item it -// returns 0 (being called from bmap) or read direct item into piece -// of page (bh_result) +/* +** Get block number from the indirect item by position. +*/ +static inline long iitem_get_blocknr (struct path *path, int pos) +{ + struct buffer_head * bh = get_last_bh (path); + struct item_head * ih = get_ih (path); + __u32 * ind_item; -// Please improve the english/clarity in the comment above, as it is -// hard to understand. + if (is_indirect_le_ih (ih)) { + ind_item = (__u32 *)B_I_PITEM (bh, ih); + return le32_to_cpu(ind_item [path->pos_in_item + pos]); + } -static int _get_block_create_0 (struct inode * inode, long block, - struct buffer_head * bh_result, - int args) + return 0; +} + +/* +** Get the indirect item size. +*/ +static inline int iitem_size (struct path *path) { - INITIALIZE_PATH (path); - struct cpu_key key; - struct buffer_head * bh; - struct item_head * ih, tmp_ih; - int fs_gen ; - int blocknr; - char * p = NULL; - int chars; - int ret ; - int done = 0 ; - unsigned long offset ; + struct item_head * ih = get_ih (path); + return (I_UNFM_NUM(ih) - (path->pos_in_item + 1)); +} - // prepare the key to look for the 'block'-th block of file - make_cpu_key (&key, inode, - (loff_t)block * inode->i_sb->s_blocksize + 1, TYPE_ANY, 3); +/* +** Return "1" if last position of the indirect item reached, +** "0" - otherwise. +*/ +static inline int last_pos_of_iitem (struct path *path, int pos) +{ + struct item_head * ih = get_ih (path); + return ((path->pos_in_item + 1 + pos) >= (I_UNFM_NUM(ih)) ? 1 : 0); +} -research: - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) { - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) { - return -ENOENT ; - } - return 0 ; +/* +** Get the number of contiguous blocks in the indirect item +** from given pos to the end of the item. +*/ +static inline int iitem_amount_contiguous (struct path *path, int pos) +{ + long curr = 0; + long next = 0; + int item_size = iitem_size(path); + int amount = 1; + + if (pos >= item_size) { + return 0; + } + curr = iitem_get_blocknr(path, pos++); + + if (curr==0) { + while (pos <= item_size) { + next = iitem_get_blocknr(path, pos++); + if (next != 0) break; + amount++; + } + return amount; + } + + while (pos <= item_size) { + next = iitem_get_blocknr(path, pos++); + if ((next - curr) != 1) break; + curr = next; + amount++; + } + + return amount; +} + +/* +** Return "1" if fs changed and item moved. +*/ +static inline int need_research (int fs_gen, struct super_block * sb, + struct item_head * ih, struct path * path ) +{ + return (fs_changed(fs_gen, sb) && item_moved(ih, path)); +} + +/* Fill indirect item cache. +** Put N block numbers from current indirect item. +*/ +static inline void iicache_fill (struct inode * inode, long block, + struct path * path, struct cpu_key * key) +{ + long blocknr=0, blk=block; + int pos=0; + int amount=0,i=0; + int asize = 8; + int iic_size = (sizeof(struct iicache)) * asize; + struct super_block * sb = inode->i_sb; + struct item_head * ih = get_ih (path); + + if (inode->u.reiserfs_i.iic==NULL) { + inode->u.reiserfs_i.iic = (struct iicache *)kmalloc(iic_size, GFP_NOFS); + if (inode->u.reiserfs_i.iic==NULL) { + return; + } + iicache_set_asize(inode, asize); + } + iicache_clear(inode); + + if (search_for_position_by_key (sb, key, path) != POSITION_FOUND) { + return; + } + + for (i=0; i0) && (amount<=1012)) { + iicache_set (inode, amount, IICACHE_SIZE, i); + iicache_set (inode, blk, IICACHE_BLOCK, i); + iicache_set (inode, blocknr, IICACHE_BLOCKNR,i); + } else { + break; } - - // - bh = get_last_bh (&path); - ih = get_ih (&path); - if (is_indirect_le_ih (ih)) { - __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); + + pos += amount; + blk += amount; + + if (pos <= last_pos_of_iitem(path, pos)) continue; + + if((blk * sb->s_blocksize) < inode->i_size) { + if ((i+1) < iicache_get_asize(inode)) { + set_cpu_key_k_offset (key, cpu_key_k_offset(key) + pos * sb->s_blocksize); + + if (search_for_position_by_key (sb, key, path) != POSITION_FOUND) { + break; + } + + ih = get_ih (path); + if (!is_indirect_le_ih(ih) || + (le_ih_k_offset(ih) + path->pos_in_item) > inode->i_size) { + break ; + } + pos=0; amount=0; - /* FIXME: here we could cache indirect item or part of it in - the inode to avoid search_by_key in case of subsequent - access to file */ - blocknr = get_block_num(ind_item, path.pos_in_item) ; - ret = 0 ; - if (blocknr) { - bh_result->b_dev = inode->i_dev; - bh_result->b_blocknr = blocknr; - bh_result->b_state |= (1UL << BH_Mapped); - } else - // We do not return -ENOENT if there is a hole but page is uptodate, because it means - // That there is some MMAPED data associated with it that is yet to be written to disk. - if ((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page) ) { - ret = -ENOENT ; - } + } + } + } - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - return ret ; + if (i < iicache_get_asize(inode)) { + iicache_clear_from_pos(inode, i); + } + + +} + +/* +** Truncate indirect item cache. +*/ +static inline void iicache_truncate (struct inode * inode) +{ + long new_file_end = inode->i_size >> inode->i_blkbits; + long slot=0, amount=0, blk=0; + + if (inode->u.reiserfs_i.iic==NULL) return; + + if (new_file_end==0) { + iicache_clear(inode); + } + + slot = block_is_iicached(inode, new_file_end); + if (slot > 0) { + slot--; + blk = iicache_get (inode, IICACHE_BLOCK, slot); + amount = new_file_end - blk; + if (amount > 0) { + iicache_set (inode, amount, IICACHE_SIZE, slot); + } else { + iicache_set (inode, 0, IICACHE_SIZE, slot); + iicache_set (inode, 0, IICACHE_BLOCK, slot); + iicache_set (inode, 0, IICACHE_BLOCKNR, slot); } + } + +} + + +/* +** Helper function for _get_block_create_0 +*/ +static inline int iitem_map_indirect_block (struct path * path, struct inode * inode, + long block, struct buffer_head * bh_result, + int args, struct cpu_key * key) +{ + struct buffer_head * bh = get_last_bh (path); + struct item_head * ih = get_ih (path); + __u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih); + int blocknr= get_block_num(ind_item, path->pos_in_item) ; + + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + if (!blocknr && (args & GET_BLOCK_NO_HOLE)&& !Page_Uptodate(bh_result->b_page)) { + return -ENOENT ; + } + + // map the found block + set_block_dev_mapped (bh_result, blocknr, inode); + + return 0; +} + + + +/* +** Helper function for _get_block_create_0 +*/ +static inline void path_relse_page_unmap (struct path * path, char * p, + struct page * page) { + pathrelse(path); + if (p) + kunmap(page); +} + +/* +** Handle Indirect Item case and simple direct case. +** "gbc0" stands for "get_block_create_0" +*/ +static inline int gbc0_indirect_case (char * p, struct path * path, + struct inode *inode, long block, + struct buffer_head * bh_result, + int args, struct cpu_key * key) +{ + struct super_block * sb = inode->i_sb; + struct page * page = bh_result->b_page; + struct item_head * ih = get_ih (path); + int ret=0; + + // requested data are in indirect item(s) + if (is_indirect_le_ih (ih)) { + + ret = iitem_map_indirect_block (path, inode, block, bh_result, args, key); + if (ret<0) { + path_relse_page_unmap (path, p, page); + return ret; + } + + if (p) + kunmap(page); + + /* + ** Here we fill indirect item cache or part of it + ** in the inode to avoid search_by_key in case of + ** subsequent access to file. + */ + iicache_fill (inode, block, path, key); + pathrelse(path); + return 0 ; + } + + return 1; +} + +/* +** Direct Item case start. +** "gbc0" stands for "get_block_create_0" +*/ +static inline int gbc0_direct_case_start (char * p, struct path * path, + struct inode *inode, + struct buffer_head * bh_result, + int args) +{ + struct page * page = bh_result->b_page; // requested data are in direct item(s) if (!(args & GET_BLOCK_READ_DIRECT)) { - // we are called by bmap. FIXME: we can not map block of file - // when it is stored in direct item(s) - pathrelse (&path); - if (p) - kunmap(bh_result->b_page) ; - return -ENOENT; + // we are called by bmap. FIXME: we can not map block of file + // when it is stored in direct item(s) + path_relse_page_unmap (path, p, page); + return -ENOENT; } /* if we've got a direct item, and the buffer was uptodate, @@ -324,89 +521,199 @@ ** end, where we map the buffer and return */ if (buffer_uptodate(bh_result)) { - goto finished ; - } else - /* - ** grab_tail_page can trigger calls to reiserfs_get_block on up to date - ** pages without any buffers. If the page is up to date, we don't want - ** read old data off disk. Set the up to date bit on the buffer instead - ** and jump to the end - */ - if (Page_Uptodate(bh_result->b_page)) { - mark_buffer_uptodate(bh_result, 1); - goto finished ; + set_block_dev_mapped (bh_result, 0, inode); + path_relse_page_unmap (path, p, page); + return 0; + } else { + /* + ** grab_tail_page can trigger calls to reiserfs_get_block on up to date + ** pages without any buffers. If the page is up to date, we don't want + ** read old data off disk. Set the up to date bit on the buffer instead + ** and jump to the end + */ + if (Page_Uptodate(bh_result->b_page)) { + mark_buffer_uptodate(bh_result, 1); + set_block_dev_mapped (bh_result, 0, inode); + path_relse_page_unmap (path, p, page); + return 0; + } } + return 1; +} - // read file tail into part of page - offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; - fs_gen = get_generation(inode->i_sb) ; - copy_item_head (&tmp_ih, ih); - - /* we only want to kmap if we are reading the tail into the page. - ** this is not the common case, so we don't kmap until we are - ** sure we need to. But, this means the item might move if - ** kmap schedules +/* +** Handle Direct Item case. +** "gbc0" stands for "get_block_create_0" +*/ +static inline void gbc0_direct_case (char * p, struct path * path, + struct inode *inode, + struct cpu_key * key) +{ + struct buffer_head * bh; + struct super_block * sb = inode->i_sb; + struct item_head * ih = get_ih (path); + int chars=0, done=0; + + do { + if (!is_direct_le_ih (ih)) { + BUG (); + } + /* make sure we don't read more bytes than actually exist in + ** the file. This can happen in odd cases where i_size isn't + ** correct, and when direct item padding results in a few + ** extra bytes at the end of the direct item */ - if (!p) { - p = (char *)kmap(bh_result->b_page) ; - if (fs_changed (fs_gen, inode->i_sb) && item_moved (&tmp_ih, &path)) { - goto research; - } + if ((le_ih_k_offset(ih) + path->pos_in_item) > inode->i_size) + break ; + + if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { + chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path->pos_in_item; + done = 1 ; + } else { + chars = ih_item_len(ih) - path->pos_in_item; } - p += offset ; - memset (p, 0, inode->i_sb->s_blocksize); - do { - if (!is_direct_le_ih (ih)) { - BUG (); - } - /* make sure we don't read more bytes than actually exist in - ** the file. This can happen in odd cases where i_size isn't - ** correct, and when direct item padding results in a few - ** extra bytes at the end of the direct item - */ - if ((le_ih_k_offset(ih) + path.pos_in_item) > inode->i_size) - break ; - if ((le_ih_k_offset(ih) - 1 + ih_item_len(ih)) > inode->i_size) { - chars = inode->i_size - (le_ih_k_offset(ih) - 1) - path.pos_in_item; - done = 1 ; - } else { - chars = ih_item_len(ih) - path.pos_in_item; - } - memcpy (p, B_I_PITEM (bh, ih) + path.pos_in_item, chars); - if (done) - break ; + bh = get_last_bh (path); + memcpy (p, B_I_PITEM (bh, ih) + path->pos_in_item, chars); - p += chars; + if (done) + break ; - if (PATH_LAST_POSITION (&path) != (B_NR_ITEMS (bh) - 1)) - // we done, if read direct item is not the last item of - // node FIXME: we could try to check right delimiting key - // to see whether direct item continues in the right - // neighbor or rely on i_size - break; + p += chars; - // update key to look for the next piece - set_cpu_key_k_offset (&key, cpu_key_k_offset (&key) + chars); - if (search_for_position_by_key (inode->i_sb, &key, &path) != POSITION_FOUND) - // we read something from tail, even if now we got IO_ERROR - break; - bh = get_last_bh (&path); - ih = get_ih (&path); - } while (1); + if (PATH_LAST_POSITION (path) != (B_NR_ITEMS (bh) - 1)) + // we done, if read direct item is not the last item of + // node FIXME: we could try to check right delimiting key + // to see whether direct item continues in the right + // neighbor or rely on i_size + break; - flush_dcache_page(bh_result->b_page) ; - kunmap(bh_result->b_page) ; + // update key to look for the next piece + set_cpu_key_k_offset (key, cpu_key_k_offset(key) + chars); -finished: - pathrelse (&path); - bh_result->b_blocknr = 0 ; - bh_result->b_dev = inode->i_dev; - mark_buffer_uptodate (bh_result, 1); - bh_result->b_state |= (1UL << BH_Mapped); + if (search_for_position_by_key (sb, key, path) != POSITION_FOUND) + // we read something from tail, even if now we got IO_ERROR + break; + + bh = get_last_bh (path); + ih = get_ih (path); + + } while (1); + +} + + +/* +** Helper function for _get_block_create_0 +** Check iicache. +** If needed block is in iicache we map it and return "1". +*/ +static int check_iicache (struct inode * inode, long block, + struct buffer_head * bh_result, int w_flag) +{ + struct super_block * sb = inode->i_sb; + int n=0, block_nr=0; + + /* + ** Here we use the cache of indirect item. + ** Getting the unfm_block number from the cache + ** we are trying to avoid some of the search_by_key() calls. + */ + if (inode->u.reiserfs_i.iic==NULL) { return 0; + } + // Check iicache and get the iicache array number + 1 , + // where the needed block_nr corresponded given logical block + // could be found. + n = block_is_iicached(inode, block); + + // if the iicache is not empty for this file and + // the requested logical block of file is cached + // then we return corresponded block number. + if (n>0) { + block_nr = iicache_get_blocknr_by_block(inode, block, n-1); + + if (w_flag && block_nr==0) return 0; /* do not write to hole */ + + if ((block_nr > 0)) { + set_block_dev_mapped (bh_result, block_nr, inode); + return 1; + } + } + return 0; } +// +// It is called by reiserfs_get_block when create == 0. +// Returns disk block number by logical block number of file. +// +// When it hits direct item it returns 0 (being called from bmap) +// or read direct item into piece of page (bh_result) +// +static int _get_block_create_0 (struct inode * inode, long block, + struct buffer_head * bh_result, + int args) +{ + INITIALIZE_PATH (path); + struct cpu_key key; + struct item_head * ih, tmp_ih; + struct super_block * sb = inode->i_sb; + struct page * page = bh_result->b_page; + char * p = NULL; + unsigned long offset ; + int fs_gen=0, ret=0, block_iicached=0; + + + block_iicached = check_iicache (inode, block, bh_result, 0); + if (block_iicached) { + return 0; + } + + // prepare the key to look for the 'block'-th block of file + offset = block * sb->s_blocksize + 1; + make_cpu_key (&key, inode, (loff_t)offset, TYPE_ANY, 3); + + do { + + if (search_for_position_by_key (sb, &key, &path) != POSITION_FOUND) { + path_relse_page_unmap (&path, p, page); + // We do not return -ENOENT if there is a hole but page is uptodate, because it means + // That there is some MMAPED data associated with it that is yet to be written to disk. + return (((args & GET_BLOCK_NO_HOLE) && !Page_Uptodate(bh_result->b_page)) ? (-ENOENT) : 0 ) ; + } + + // check and handle indirect case + ret = gbc0_indirect_case (p, &path, inode, block, bh_result, args, &key); + if (ret <= 0) + return ret; + + // start the direct case + ret = gbc0_direct_case_start (p, &path, inode, bh_result, args); + if (ret <= 0) + return ret; + + // we should read the file tail into part of page. + offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1) ; + fs_gen = get_generation(sb) ; + ih = get_ih (&path); + copy_item_head (&tmp_ih, ih); + if (!p) + p=(char *)kmap(page); + + } while (need_research(fs_gen, sb, &tmp_ih, &path)); + + // ok, we have direct item and kmapped page, + // do copy from direct item to page now. + p += offset; + memset (p, 0, sb->s_blocksize); + gbc0_direct_case (p, &path, inode, &key); + + flush_dcache_page(page) ; + path_relse_page_unmap (&path, p, page); + set_block_dev_mapped (bh_result, 0, inode); + mark_buffer_uptodate (bh_result, 1); + return 0; +} // this is called to create file map. So, _get_block_create_0 will not // read direct item @@ -560,10 +867,13 @@ struct cpu_key key; struct buffer_head * bh, * unbh = 0; struct item_head * ih, tmp_ih; + struct super_block * sb = inode->i_sb; __u32 * item; int done; int fs_gen; int windex ; + int block_iicached=0; + struct reiserfs_transaction_handle th ; /* space reserved in transaction batch: . 3 balancings in direct->indirect conversion @@ -590,6 +900,7 @@ return -EFBIG; } + /* if !create, we aren't changing the FS, so we don't need to ** log anything, so we don't need to start a transaction */ @@ -601,19 +912,30 @@ unlock_kernel() ; return ret; } + /* + ** If iicache hash needed disk block number and it is not hole + ** we return it from iicache. + */ + block_iicached = check_iicache (inode, block, bh_result, 1); + if (block_iicached) { + unlock_kernel() ; + return 0; + } inode->u.reiserfs_i.i_flags |= i_pack_on_close_mask; windex = push_journal_writer("reiserfs_get_block") ; - /* set the key of the first byte in the 'block'-th block of file */ - make_cpu_key (&key, inode, new_offset, - TYPE_ANY, 3/*key length*/); + /* set the key of the first byte + in the 'block'-th block of file */ + make_cpu_key (&key, inode, new_offset, TYPE_ANY, 3); + if ((new_offset + inode->i_sb->s_blocksize - 1) > inode->i_size) { journal_begin(&th, inode->i_sb, jbegin_count) ; reiserfs_update_inode_transaction(inode) ; transaction_started = 1 ; } + research: retval = search_for_position_by_key (inode->i_sb, &key, &path); @@ -683,14 +1005,20 @@ inode->i_blocks += (inode->i_sb->s_blocksize / 512) ; reiserfs_update_sd(&th, inode) ; } + + /* + ** do inserting of new block and block_nr + ** to iicache as well + */ + iicache_insert(inode, block, unfm_ptr); + set_block_dev_mapped(bh_result, unfm_ptr, inode); pathrelse (&path); pop_journal_writer(windex) ; if (transaction_started) journal_end(&th, inode->i_sb, jbegin_count) ; - unlock_kernel() ; - + /* the item was found, so new blocks were not added to the file ** there is no need to make sure the inode is updated with this ** transaction @@ -811,36 +1139,69 @@ pointer to 'block'-th block use block, which is already allocated */ struct cpu_key tmp_key; - struct unfm_nodeinfo un = {0, 0}; + unp_t unf_single=0; // We use this in case we need to allocate only + // one block which is a fastpath + unp_t *un; + __u64 max_to_insert=MAX_ITEM_LEN(inode->i_sb->s_blocksize)/UNFM_P_SIZE; + __u64 blocks_needed; RFALSE( pos_in_item != ih_item_len(ih) / UNFM_P_SIZE, "vs-804: invalid position for append"); + /* indirect item has to be appended, set up key of that position */ make_cpu_key (&tmp_key, inode, le_key_k_offset (version, &(ih->ih_key)) + op_bytes_number (ih, inode->i_sb->s_blocksize), //pos_in_item * inode->i_sb->s_blocksize, TYPE_INDIRECT, 3);// key type is unimportant - - if (cpu_key_k_offset (&tmp_key) == cpu_key_k_offset (&key)) { + + blocks_needed = 1 + ((cpu_key_k_offset (&key) - cpu_key_k_offset (&tmp_key)) >> inode->i_sb->s_blocksize_bits); + RFALSE( blocks_needed < 0, "green-805: invalid offset"); + + if ( blocks_needed == 1 ) { + un = &unf_single; + } else { + un=kmalloc( min(blocks_needed,max_to_insert)*UNFM_P_SIZE, + GFP_ATOMIC); // We need to avoid scheduling. + if ( !un) { + un = &unf_single; + blocks_needed = 1; + max_to_insert = 0; + } + memset(un, 0, UNFM_P_SIZE * min(blocks_needed,max_to_insert)); + } + if ( blocks_needed <= max_to_insert) { /* we are going to add target block to the file. Use allocated block for that */ - un.unfm_nodenum = cpu_to_le32 (allocated_block_nr); + un[blocks_needed-1] = cpu_to_le32 (allocated_block_nr); set_block_dev_mapped (bh_result, allocated_block_nr, inode); bh_result->b_state |= (1UL << BH_New); done = 1; } else { /* paste hole to the indirect item */ + // If kmalloc failed, max_to_insert becomes zero and it means we + // only have space for one block + blocks_needed=max_to_insert?max_to_insert:1; } - retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)&un, UNFM_P_SIZE); + retval = reiserfs_paste_into_item (&th, &path, &tmp_key, (char *)un, UNFM_P_SIZE * blocks_needed); + + if (blocks_needed != 1) + kfree(un); + if (retval) { reiserfs_free_block (&th, allocated_block_nr); goto failure; } - if (un.unfm_nodenum) + if (done) { inode->i_blocks += inode->i_sb->s_blocksize / 512; + } else { + // We need to mark new file size in case this function will be + // interrupted/aborted later on. And we may do this only for + // holes. + inode->i_size += inode->i_sb->s_blocksize * blocks_needed; + } //mark_tail_converted (inode); } - + if (done == 1) break; @@ -1531,6 +1892,8 @@ /* item head of new item */ ih.ih_key.k_dir_id = INODE_PKEY (dir)->k_objectid; ih.ih_key.k_objectid = cpu_to_le32 (reiserfs_get_unused_objectid (th)); + + if (!ih.ih_key.k_objectid) { iput(inode) ; *err = -ENOMEM; @@ -1757,6 +2120,7 @@ */ void reiserfs_truncate_file(struct inode *p_s_inode, int update_timestamps) { struct reiserfs_transaction_handle th ; + struct super_block * sb = p_s_inode->i_sb; int windex ; /* we want the offset for the first byte after the end of the file */ @@ -1778,6 +2142,11 @@ bh = NULL ; } } + + /* + ** Truncate iicache as well + */ + iicache_truncate (p_s_inode); /* so, if page != NULL, we have a buffer head for the offset at ** the end of the file. if the bh is mapped, and bh->b_blocknr != 0, diff -urN v2.4.19p7/fs/reiserfs/journal.c linux/fs/reiserfs/journal.c --- v2.4.19p7/fs/reiserfs/journal.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/journal.c Wed May 1 03:54:14 2002 @@ -64,12 +64,15 @@ */ static int reiserfs_mounted_fs_count = 0 ; +static struct list_head kreiserfsd_supers = LIST_HEAD_INIT(kreiserfsd_supers); + /* wake this up when you add something to the commit thread task queue */ DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_wait) ; /* wait on this if you need to be sure you task queue entries have been run */ static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ; DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ; +DECLARE_MUTEX(kreiserfsd_sem) ; #define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit structs at 4k */ @@ -576,17 +579,12 @@ /* lock the current transaction */ inline static void lock_journal(struct super_block *p_s_sb) { PROC_INFO_INC( p_s_sb, journal.lock_journal ); - while(atomic_read(&(SB_JOURNAL(p_s_sb)->j_wlock)) > 0) { - PROC_INFO_INC( p_s_sb, journal.lock_journal_wait ); - sleep_on(&(SB_JOURNAL(p_s_sb)->j_wait)) ; - } - atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 1) ; + down(&SB_JOURNAL(p_s_sb)->j_lock); } /* unlock the current transaction */ inline static void unlock_journal(struct super_block *p_s_sb) { - atomic_dec(&(SB_JOURNAL(p_s_sb)->j_wlock)) ; - wake_up(&(SB_JOURNAL(p_s_sb)->j_wait)) ; + up(&SB_JOURNAL(p_s_sb)->j_lock); } /* @@ -756,7 +754,6 @@ atomic_set(&(jl->j_commit_flushing), 0) ; wake_up(&(jl->j_commit_wait)) ; - s->s_dirt = 1 ; return 0 ; } @@ -1220,7 +1217,6 @@ if (run++ == 0) { goto loop_start ; } - atomic_set(&(jl->j_flushing), 0) ; wake_up(&(jl->j_flush_wait)) ; return ret ; @@ -1250,7 +1246,7 @@ while(i != start) { jl = SB_JOURNAL_LIST(s) + i ; age = CURRENT_TIME - jl->j_timestamp ; - if (jl->j_len > 0 && // age >= (JOURNAL_MAX_COMMIT_AGE * 2) && + if (jl->j_len > 0 && age >= JOURNAL_MAX_COMMIT_AGE && atomic_read(&(jl->j_nonzerolen)) > 0 && atomic_read(&(jl->j_commit_left)) == 0) { @@ -1325,6 +1321,10 @@ static int do_journal_release(struct reiserfs_transaction_handle *th, struct super_block *p_s_sb, int error) { struct reiserfs_transaction_handle myth ; + down(&kreiserfsd_sem); + list_del(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers); + up(&kreiserfsd_sem); + /* we only want to flush out transactions if we were called with error == 0 */ if (!error && !(p_s_sb->s_flags & MS_RDONLY)) { @@ -1811,10 +1811,6 @@ jl = SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex ; flush_commit_list(ct->p_s_sb, SB_JOURNAL_LIST(ct->p_s_sb) + ct->jindex, 1) ; - if (jl->j_len > 0 && atomic_read(&(jl->j_nonzerolen)) > 0 && - atomic_read(&(jl->j_commit_left)) == 0) { - kupdate_one_transaction(ct->p_s_sb, jl) ; - } reiserfs_kfree(ct->self, sizeof(struct reiserfs_journal_commit_task), ct->p_s_sb) ; } @@ -1864,6 +1860,9 @@ ** then run the per filesystem commit task queue when we wakeup. */ static int reiserfs_journal_commit_thread(void *nullp) { + struct list_head *entry, *safe ; + struct super_block *s; + time_t last_run = 0; daemonize() ; @@ -1879,6 +1878,18 @@ while(TQ_ACTIVE(reiserfs_commit_thread_tq)) { run_task_queue(&reiserfs_commit_thread_tq) ; } + if (CURRENT_TIME - last_run > 5) { + down(&kreiserfsd_sem); + list_for_each_safe(entry, safe, &kreiserfsd_supers) { + s = list_entry(entry, struct super_block, + u.reiserfs_sb.s_reiserfs_supers); + if (!(s->s_flags & MS_RDONLY)) { + reiserfs_flush_old_commits(s); + } + } + up(&kreiserfsd_sem); + last_run = CURRENT_TIME; + } /* if there aren't any more filesystems left, break */ if (reiserfs_mounted_fs_count <= 0) { @@ -1953,13 +1964,12 @@ SB_JOURNAL(p_s_sb)->j_last = NULL ; SB_JOURNAL(p_s_sb)->j_first = NULL ; init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; - init_waitqueue_head(&(SB_JOURNAL(p_s_sb)->j_wait)) ; + sema_init(&SB_JOURNAL(p_s_sb)->j_lock, 1); SB_JOURNAL(p_s_sb)->j_trans_id = 10 ; SB_JOURNAL(p_s_sb)->j_mount_id = 10 ; SB_JOURNAL(p_s_sb)->j_state = 0 ; atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; - atomic_set(&(SB_JOURNAL(p_s_sb)->j_wlock), 0) ; SB_JOURNAL(p_s_sb)->j_cnode_free_list = allocate_cnodes(num_cnodes) ; SB_JOURNAL(p_s_sb)->j_cnode_free_orig = SB_JOURNAL(p_s_sb)->j_cnode_free_list ; SB_JOURNAL(p_s_sb)->j_cnode_free = SB_JOURNAL(p_s_sb)->j_cnode_free_list ? num_cnodes : 0 ; @@ -1989,6 +1999,7 @@ kernel_thread((void *)(void *)reiserfs_journal_commit_thread, NULL, CLONE_FS | CLONE_FILES | CLONE_VM) ; } + list_add(&p_s_sb->u.reiserfs_sb.s_reiserfs_supers, &kreiserfsd_supers); return 0 ; } @@ -2117,7 +2128,6 @@ th->t_trans_id = SB_JOURNAL(p_s_sb)->j_trans_id ; th->t_caller = "Unknown" ; unlock_journal(p_s_sb) ; - p_s_sb->s_dirt = 1; return 0 ; } @@ -2159,7 +2169,7 @@ reiserfs_panic(th->t_super, "journal-1577: handle trans id %ld != current trans id %ld\n", th->t_trans_id, SB_JOURNAL(p_s_sb)->j_trans_id); } - p_s_sb->s_dirt = 1 ; + p_s_sb->s_dirt |= S_SUPER_DIRTY; prepared = test_and_clear_bit(BH_JPrepared, &bh->b_state) ; /* already in this transaction, we are done */ @@ -2407,12 +2417,8 @@ ** flushes any old transactions to disk ** ends the current transaction if it is too old ** -** also calls flush_journal_list with old_only == 1, which allows me to reclaim -** memory and such from the journal lists whose real blocks are all on disk. -** -** called by sync_dev_journal from buffer.c */ -int flush_old_commits(struct super_block *p_s_sb, int immediate) { +int reiserfs_flush_old_commits(struct super_block *p_s_sb) { int i ; int count = 0; int start ; @@ -2429,8 +2435,7 @@ /* starting with oldest, loop until we get to the start */ i = (SB_JOURNAL_LIST_INDEX(p_s_sb) + 1) % JOURNAL_LIST_COUNT ; while(i != start) { - if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE || - immediate)) { + if (SB_JOURNAL_LIST(p_s_sb)[i].j_len > 0 && ((now - SB_JOURNAL_LIST(p_s_sb)[i].j_timestamp) > JOURNAL_MAX_COMMIT_AGE)) { /* we have to check again to be sure the current transaction did not change */ if (i != SB_JOURNAL_LIST_INDEX(p_s_sb)) { flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + i, 1) ; @@ -2439,26 +2444,26 @@ i = (i + 1) % JOURNAL_LIST_COUNT ; count++ ; } + /* now, check the current transaction. If there are no writers, and it is too old, finish it, and ** force the commit blocks to disk */ - if (!immediate && atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 && + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) <= 0 && SB_JOURNAL(p_s_sb)->j_trans_start_time > 0 && SB_JOURNAL(p_s_sb)->j_len > 0 && (now - SB_JOURNAL(p_s_sb)->j_trans_start_time) > JOURNAL_MAX_TRANS_AGE) { journal_join(&th, p_s_sb, 1) ; reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; - do_journal_end(&th, p_s_sb,1, COMMIT_NOW) ; - } else if (immediate) { /* belongs above, but I wanted this to be very explicit as a special case. If they say to - flush, we must be sure old transactions hit the disk too. */ - journal_join(&th, p_s_sb, 1) ; - reiserfs_prepare_for_journal(p_s_sb, SB_BUFFER_WITH_SB(p_s_sb), 1) ; - journal_mark_dirty(&th, p_s_sb, SB_BUFFER_WITH_SB(p_s_sb)) ; + + /* we're only being called from kreiserfsd, it makes no sense to do + ** an async commit so that kreiserfsd can do it later + */ do_journal_end(&th, p_s_sb,1, COMMIT_NOW | WAIT) ; - } - reiserfs_journal_kupdate(p_s_sb) ; - return 0 ; + } + reiserfs_journal_kupdate(p_s_sb) ; + + return S_SUPER_DIRTY_COMMIT; } /* @@ -2497,7 +2502,7 @@ if (SB_JOURNAL(p_s_sb)->j_len == 0) { int wcount = atomic_read(&(SB_JOURNAL(p_s_sb)->j_wcount)) ; unlock_journal(p_s_sb) ; - if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) { + if (atomic_read(&(SB_JOURNAL(p_s_sb)->j_jlock)) > 0 && wcount <= 0) { atomic_dec(&(SB_JOURNAL(p_s_sb)->j_jlock)) ; wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; } @@ -2768,6 +2773,7 @@ ** it tells us if we should continue with the journal_end, or just return */ if (!check_journal_end(th, p_s_sb, nblocks, flags)) { + p_s_sb->s_dirt |= S_SUPER_DIRTY; return 0 ; } @@ -2937,17 +2943,12 @@ /* write any buffers that must hit disk before this commit is done */ fsync_inode_buffers(&(SB_JOURNAL(p_s_sb)->j_dummy_inode)) ; - /* honor the flush and async wishes from the caller */ + /* honor the flush wishes from the caller, simple commits can + ** be done outside the journal lock, they are done below + */ if (flush) { - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; flush_journal_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex , 1) ; - } else if (commit_now) { - if (wait_on_commit) { - flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; - } else { - commit_flush_async(p_s_sb, orig_jindex) ; - } } /* reset journal values for the next transaction */ @@ -3009,6 +3010,16 @@ atomic_set(&(SB_JOURNAL(p_s_sb)->j_jlock), 0) ; /* wake up any body waiting to join. */ wake_up(&(SB_JOURNAL(p_s_sb)->j_join_wait)) ; + + if (!flush && commit_now) { + if (current->need_resched) + schedule() ; + if (wait_on_commit) { + flush_commit_list(p_s_sb, SB_JOURNAL_LIST(p_s_sb) + orig_jindex, 1) ; + } else { + commit_flush_async(p_s_sb, orig_jindex) ; + } + } return 0 ; } diff -urN v2.4.19p7/fs/reiserfs/objectid.c linux/fs/reiserfs/objectid.c --- v2.4.19p7/fs/reiserfs/objectid.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/objectid.c Wed May 1 03:54:14 2002 @@ -87,7 +87,6 @@ } journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); - s->s_dirt = 1; return unused_objectid; } @@ -106,8 +105,6 @@ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1) ; journal_mark_dirty(th, s, SB_BUFFER_WITH_SB (s)); - s->s_dirt = 1; - /* start at the beginning of the objectid map (i = 0) and go to the end of it (i = disk_sb->s_oid_cursize). Linear search is diff -urN v2.4.19p7/fs/reiserfs/stree.c linux/fs/reiserfs/stree.c --- v2.4.19p7/fs/reiserfs/stree.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/stree.c Wed May 1 03:54:14 2002 @@ -598,26 +598,32 @@ -#ifdef SEARCH_BY_KEY_READA +#define SEARCH_BY_KEY_READA 32 /* The function is NOT SCHEDULE-SAFE! */ -static void search_by_key_reada (struct super_block * s, int blocknr) +static void search_by_key_reada (struct super_block * s, + struct buffer_head **bh, + unsigned long *b, int num) { - struct buffer_head * bh; + int i,j; - if (blocknr == 0) - return; - - bh = getblk (s->s_dev, blocknr, s->s_blocksize); - - if (!buffer_uptodate (bh)) { - ll_rw_block (READA, 1, &bh); + for (i = 0 ; i < num ; i++) { + bh[i] = sb_getblk (s, b[i]); + if (buffer_uptodate(bh[i])) { + brelse(bh[i]); + break; + } + touch_buffer(bh[i]); + } + if (i) { + ll_rw_block(READA, i, bh); + } + for(j = 0 ; j < i ; j++) { + if (bh[j]) + brelse(bh[j]); } - bh->b_count --; } -#endif - /************************************************************************** * Algorithm SearchByKey * * look for item in the Disk S+Tree by its key * @@ -660,6 +666,9 @@ int n_node_level, n_retval; int right_neighbor_of_leaf_node; int fs_gen; + struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; + unsigned long reada_blocks[SEARCH_BY_KEY_READA]; + int reada_count = 0; #ifdef CONFIG_REISERFS_CHECK int n_repeat_counter = 0; @@ -693,11 +702,11 @@ fs_gen = get_generation (p_s_sb); expected_level --; -#ifdef SEARCH_BY_KEY_READA - /* schedule read of right neighbor */ - search_by_key_reada (p_s_sb, right_neighbor_of_leaf_node); -#endif - + /* schedule read of right neighbors */ + if (reada_count) { + search_by_key_reada (p_s_sb, reada_bh, reada_blocks, reada_count); + reada_count = 0; + } /* Read the next tree node, and set the last element in the path to have a pointer to it. */ if ( ! (p_s_bh = p_s_last_element->pe_buffer = @@ -785,11 +794,20 @@ position in the node. */ n_block_number = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position); -#ifdef SEARCH_BY_KEY_READA - /* if we are going to read leaf node, then calculate its right neighbor if possible */ - if (n_node_level == DISK_LEAF_NODE_LEVEL + 1 && p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh)) - right_neighbor_of_leaf_node = B_N_CHILD_NUM(p_s_bh, p_s_last_element->pe_position + 1); -#endif + /* if we are going to read leaf node, then try to find good leaves + ** for read ahead as well. Don't bother for stat data though + */ + if (reiserfs_test4(p_s_sb) && + n_node_level == DISK_LEAF_NODE_LEVEL + 1 && + p_s_last_element->pe_position < B_NR_ITEMS (p_s_bh) && + !is_statdata_cpu_key(p_s_key)) + { + int pos = p_s_last_element->pe_position; + int limit = B_NR_ITEMS(p_s_bh); + while(pos <= limit && reada_count < SEARCH_BY_KEY_READA) { + reada_blocks[reada_count++] = B_N_CHILD_NUM(p_s_bh, pos++); + } + } } } diff -urN v2.4.19p7/fs/reiserfs/super.c linux/fs/reiserfs/super.c --- v2.4.19p7/fs/reiserfs/super.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/super.c Wed May 1 04:11:09 2002 @@ -29,23 +29,22 @@ static int reiserfs_remount (struct super_block * s, int * flags, char * data); static int reiserfs_statfs (struct super_block * s, struct statfs * buf); -// -// a portion of this function, particularly the VFS interface portion, -// was derived from minix or ext2's analog and evolved as the -// prototype did. You should be able to tell which portion by looking -// at the ext2 code and comparing. It's subfunctions contain no code -// used as a template unless they are so labeled. -// +/* kreiserfsd does all the periodic stuff for us */ static void reiserfs_write_super (struct super_block * s) { + s->s_dirt = S_SUPER_DIRTY_COMMIT; +} - int dirty = 0 ; - lock_kernel() ; - if (!(s->s_flags & MS_RDONLY)) { - dirty = flush_old_commits(s, 1) ; - } - s->s_dirt = dirty; - unlock_kernel() ; +static void reiserfs_commit_super (struct super_block * s) +{ + struct reiserfs_transaction_handle th; + lock_kernel() ; + if (!(s->s_flags & MS_RDONLY)) { + journal_begin(&th, s, 1); + journal_end_sync(&th, s, 1); + s->s_dirt = 0; + } + unlock_kernel() ; } // @@ -413,6 +412,7 @@ put_super: reiserfs_put_super, write_super: reiserfs_write_super, write_super_lockfs: reiserfs_write_super_lockfs, + commit_super: reiserfs_commit_super, unlockfs: reiserfs_unlockfs, statfs: reiserfs_statfs, remount_fs: reiserfs_remount, @@ -557,7 +557,7 @@ #undef SET_OPT handle_attrs( s ); - + if(blocks) { int rc = reiserfs_resize(s, blocks); if (rc != 0) @@ -968,6 +968,7 @@ memset (&s->u.reiserfs_sb, 0, sizeof (struct reiserfs_sb_info)); + INIT_LIST_HEAD(&s->u.reiserfs_sb.s_reiserfs_supers); if (parse_options ((char *) data, &(s->u.reiserfs_sb.s_mount_opt), &blocks) == 0) { return NULL; diff -urN v2.4.19p7/fs/reiserfs/tail_conversion.c linux/fs/reiserfs/tail_conversion.c --- v2.4.19p7/fs/reiserfs/tail_conversion.c Thu Apr 18 16:30:16 2002 +++ linux/fs/reiserfs/tail_conversion.c Wed May 1 03:54:31 2002 @@ -30,7 +30,7 @@ key of unfm pointer to be pasted */ int n_blk_size, n_retval; /* returned value for reiserfs_insert_item and clones */ - struct unfm_nodeinfo unfm_ptr; /* Handle on an unformatted node + unp_t unfm_ptr; /* Handle on an unformatted node that will be inserted in the tree. */ @@ -59,8 +59,7 @@ p_le_ih = PATH_PITEM_HEAD (path); - unfm_ptr.unfm_nodenum = cpu_to_le32 (unbh->b_blocknr); - unfm_ptr.unfm_freespace = 0; // ??? + unfm_ptr = cpu_to_le32 (unbh->b_blocknr); if ( is_statdata_le_ih (p_le_ih) ) { /* Insert new indirect item. */ diff -urN v2.4.19p7/fs/super.c linux/fs/super.c --- v2.4.19p7/fs/super.c Thu Apr 18 16:30:16 2002 +++ linux/fs/super.c Wed May 1 03:54:14 2002 @@ -431,15 +431,68 @@ put_super(sb); } +/* since we've added the idea of comit_dirty vs regular dirty with + * commit_super operation, only use the S_SUPER_DIRTY mask if + * the FS has a commit_super op. + */ +static inline int super_dirty(struct super_block *sb) +{ + if (sb->s_op && sb->s_op->commit_super) { + return sb->s_dirt & S_SUPER_DIRTY; + } + return sb->s_dirt; +} + + static inline void write_super(struct super_block *sb) { lock_super(sb); - if (sb->s_root && sb->s_dirt) + if (sb->s_root && super_dirty(sb)) if (sb->s_op && sb->s_op->write_super) sb->s_op->write_super(sb); unlock_super(sb); } +static inline void commit_super(struct super_block *sb) +{ + lock_super(sb); + if (sb->s_root && sb->s_dirt) { + if (sb->s_op && sb->s_op->write_super) + sb->s_op->write_super(sb); + if (sb->s_op && sb->s_op->commit_super) + sb->s_op->commit_super(sb); + } + unlock_super(sb); +} + +void commit_supers(kdev_t dev) +{ + struct super_block * sb; + + if (dev) { + sb = get_super(dev); + if (sb) { + if (sb->s_dirt) + commit_super(sb); + drop_super(sb); + } + } +restart: + spin_lock(&sb_lock); + sb = sb_entry(super_blocks.next); + while (sb != sb_entry(&super_blocks)) + if (sb->s_dirt) { + sb->s_count++; + spin_unlock(&sb_lock); + down_read(&sb->s_umount); + commit_super(sb); + drop_super(sb); + goto restart; + } else + sb = sb_entry(sb->s_list.next); + spin_unlock(&sb_lock); +} + /* * Note: check the dirty flag before waiting, so we don't * hold up the sync while mounting a device. (The newly @@ -462,7 +515,7 @@ spin_lock(&sb_lock); sb = sb_entry(super_blocks.next); while (sb != sb_entry(&super_blocks)) - if (sb->s_dirt) { + if (super_dirty(sb)) { sb->s_count++; spin_unlock(&sb_lock); down_read(&sb->s_umount); diff -urN v2.4.19p7/include/linux/fs.h linux/include/linux/fs.h --- v2.4.19p7/include/linux/fs.h Thu Apr 18 16:30:26 2002 +++ linux/include/linux/fs.h Wed May 1 03:54:14 2002 @@ -706,6 +706,10 @@ #define sb_entry(list) list_entry((list), struct super_block, s_list) #define S_BIAS (1<<30) + +/* flags for the s_dirt field */ +#define S_SUPER_DIRTY 1 +#define S_SUPER_DIRTY_COMMIT 2 struct super_block { struct list_head s_list; /* Keep this first */ kdev_t s_dev; @@ -918,6 +922,7 @@ struct dentry * (*fh_to_dentry)(struct super_block *sb, __u32 *fh, int len, int fhtype, int parent); int (*dentry_to_fh)(struct dentry *, __u32 *fh, int *lenp, int need_parent); int (*show_options)(struct seq_file *, struct vfsmount *); + void (*commit_super) (struct super_block *); }; /* Inode state bits.. */ @@ -1226,6 +1231,7 @@ extern int filemap_fdatasync(struct address_space *); extern int filemap_fdatawait(struct address_space *); extern void sync_supers(kdev_t); +extern void commit_supers(kdev_t); extern int bmap(struct inode *, int); extern int notify_change(struct dentry *, struct iattr *); extern int permission(struct inode *, int); diff -urN v2.4.19p7/include/linux/reiserfs_fs.h linux/include/linux/reiserfs_fs.h --- v2.4.19p7/include/linux/reiserfs_fs.h Thu Apr 18 16:30:28 2002 +++ linux/include/linux/reiserfs_fs.h Wed May 1 04:08:30 2002 @@ -55,7 +55,7 @@ #define USE_INODE_GENERATION_COUNTER #define REISERFS_PREALLOCATE -#define PREALLOCATION_SIZE 8 +#define PREALLOCATION_SIZE 128 /* n must be power of 2 */ #define _ROUND_UP(x,n) (((x)+(n)-1u) & ~((n)-1u)) @@ -197,7 +197,6 @@ ( (n_tail_size) >= (MAX_DIRECT_ITEM_LEN(n_block_size) * 3)/4) ) ) \ ) - /* * values for s_state field */ @@ -1533,6 +1532,7 @@ */ #define JOURNAL_BUFFER(j,n) ((j)->j_ap_blocks[((j)->j_start + (n)) % JOURNAL_BLOCK_COUNT]) +int reiserfs_flush_old_commits(struct super_block *); void reiserfs_commit_for_inode(struct inode *) ; void reiserfs_update_inode_transaction(struct inode *) ; void reiserfs_wait_on_write_block(struct super_block *s) ; @@ -1731,6 +1731,322 @@ struct dentry *dentry, struct inode *inode, int * err); int reiserfs_sync_inode (struct reiserfs_transaction_handle *th, struct inode * inode); void reiserfs_update_sd (struct reiserfs_transaction_handle *th, struct inode * inode); + +/* +** The indirect item cache - iicache. +** +** We put the indirect item or part of it to iicache and +** can avoid now a lot of search_by_key calls. +*/ + +#define IICACHE_BLOCKNR 1 +#define IICACHE_SIZE 2 +#define IICACHE_BLOCK 3 + +/* +** Get current iicache array size. +*/ +static inline int iicache_get_asize (struct inode * inode) +{ + if (inode->u.reiserfs_i.iic==NULL) return 0; + return (inode->u.reiserfs_i.iic_asize); +} + +/* +** Set current iicache array size. +*/ +static inline void iicache_set_asize (struct inode * inode, int asize) +{ + if (inode->u.reiserfs_i.iic==NULL) return; + inode->u.reiserfs_i.iic_asize = asize; +} + +/* +** Set parameter of given type to iicache +*/ +static inline void iicache_set (struct inode * inode, + long param, int type, int i) +{ + struct iicache * iic; + + if (inode->u.reiserfs_i.iic==NULL) return; + + iic = inode->u.reiserfs_i.iic; + + if (i >= iicache_get_asize(inode)) return; + + iic += i; + + switch (type) { + case IICACHE_BLOCKNR : iic->i_cache_blocknr = param; + break; + case IICACHE_SIZE : iic->i_cache_size = param; + break; + case IICACHE_BLOCK : iic->i_cache_block = param; + break; + } +} + +/* +** Get parameter of given type from iicache +*/ +static inline long iicache_get (struct inode * inode, int type, int i) +{ + long val=0; + struct iicache * iic; + + if (inode->u.reiserfs_i.iic==NULL) return 0; + + iic = inode->u.reiserfs_i.iic; + + if (i >= iicache_get_asize(inode)) return 0; + + iic += i; + + switch (type) { + case IICACHE_BLOCKNR : val=iic->i_cache_blocknr; + break; + case IICACHE_SIZE : val=iic->i_cache_size; + break; + case IICACHE_BLOCK : val=iic->i_cache_block; + break; + } + return val; +} + +/* +** Clear the indirect item cache +*/ +static inline void iicache_clear(struct inode * inode) +{ + int i; + + if (inode->u.reiserfs_i.iic==NULL) return; + + for (i=0; iu.reiserfs_i.iic==NULL) return; + + for (i=0; iu.reiserfs_i.iic==NULL) return (-1); + + for (i=0; iu.reiserfs_i.iic==NULL) return; + + for (i=pos; iu.reiserfs_i.iic==NULL) return 0; + return (iicache_get(inode, IICACHE_BLOCKNR, i)); +} + +/* +** Get the size of indirect item cache +*/ +static inline long iicache_size(struct inode * inode, int i) +{ + if (inode->u.reiserfs_i.iic==NULL) return 0; + return (iicache_get(inode, IICACHE_SIZE, i)); +} + +/* +** Get the first cached logical block of file +*/ +static inline long iicache_first_cached(struct inode * inode, int i) +{ + if (inode->u.reiserfs_i.iic==NULL) return 0; + return (iicache_get(inode, IICACHE_BLOCK, i)); +} + +/* +** Get the last cached logical block of file +*/ +static inline long iicache_last_cached(struct inode * inode, int i) +{ + if (inode->u.reiserfs_i.iic==NULL) return 0; + return (iicache_first_cached(inode,i) + iicache_size(inode,i) - 1); +} + +/* +** Check the logical block of file: is it in iicache +*/ +static inline int block_is_iicached(struct inode * inode, long block) +{ + int i; + if (inode->u.reiserfs_i.iic==NULL) return 0; + + for (i=0; i= iicache_first_cached(inode, i)) && + (block <= iicache_last_cached(inode, i)) ) + return i+1; + } + return 0; +} + +/* +** Get the disk block number by the logical block number of file +*/ +static inline long iicache_get_blocknr_by_block(struct inode * inode, long block, int i) +{ + long offset=0, block_nr=0; + if (inode->u.reiserfs_i.iic==NULL) return 0; + + offset = block - iicache_first_cached(inode,i); + block_nr = iicache_get_blocknr(inode,i); + + return ((block_nr==0) ? 0 : (block_nr + offset)); +} + +/* +** Copy from one iicache slot to another. +*/ +static inline void iicache_copy_slot(struct inode * inode, int from, int to) +{ + long amount=0, blk=0, blknr=0; + if (inode->u.reiserfs_i.iic==NULL) return; + + if (from < 0 || from > iicache_get_asize(inode)) return; + if (to < 0 || to > iicache_get_asize(inode)) return; + + amount = iicache_get (inode, IICACHE_SIZE, from); + blk = iicache_get (inode, IICACHE_BLOCK, from); + blknr = iicache_get (inode, IICACHE_BLOCKNR, from); + + iicache_set (inode, amount, IICACHE_SIZE, to); + iicache_set (inode, blk, IICACHE_BLOCK, to); + iicache_set (inode, blknr, IICACHE_BLOCKNR, to); +} + + +/* +** Insert block_nr to indirect item cache +*/ +static inline void iicache_insert(struct inode * inode, long block, long block_nr) +{ + int n=0, slot=0, from=0, to=0; + long amount=0, blk=0, blknr=0, cur_blk_nr=0; + + if (inode->u.reiserfs_i.iic==NULL) return; + + n = block_is_iicached(inode, block); + if (n>0) { + slot=n-1; + cur_blk_nr = iicache_get_blocknr_by_block(inode, block, slot); + if (cur_blk_nr == 0) { /* hole in file */ + + amount = iicache_get (inode, IICACHE_SIZE, slot); + blk = iicache_get (inode, IICACHE_BLOCK, slot); + blknr = iicache_get (inode, IICACHE_BLOCKNR, slot); + + if (block==iicache_first_cached(inode,slot)) { + iicache_set (inode, amount-1, IICACHE_SIZE, slot); + iicache_set (inode, blk+1, IICACHE_BLOCK, slot); + } + else if (block==iicache_last_cached(inode,slot)) { + iicache_set (inode, amount-1, IICACHE_SIZE, slot); + } + else { + from = slot; + to =iicache_get_free_slot(inode); + if ( to >= 0 ) { + iicache_copy_slot(inode, from, to); + iicache_set (inode, amount, IICACHE_SIZE, to); + iicache_set (inode, block+1, IICACHE_BLOCK, to); + iicache_set (inode, block_nr+1, IICACHE_BLOCK, slot); + } + amount -= block-blk; + if (amount > 0) { + iicache_set (inode, amount, IICACHE_SIZE, slot); + } + } + + slot=iicache_get_free_slot(inode); + if (slot >= 0) { + iicache_set (inode, 1, IICACHE_SIZE, slot); + iicache_set (inode, block, IICACHE_BLOCK, slot); + iicache_set (inode, block_nr, IICACHE_BLOCKNR, slot); + } + + } + return; + } + + // block is not in iicache + // check the previous block + n = block_is_iicached(inode, block-1); + if (n>0) { + slot = n-1; + if ((block-1) == iicache_last_cached(inode, slot)) { + amount = iicache_get (inode, IICACHE_SIZE, slot); + blknr = iicache_get (inode, IICACHE_BLOCKNR, slot); + if ((blknr + amount) == block_nr) { + iicache_set (inode, amount+1, IICACHE_SIZE, slot); + return; + } + } + } + + // block and previous block is not in iicache, + // then just fill a free iicache slot + slot=iicache_get_free_slot(inode); + if (slot >= 0) { + iicache_set (inode, 1, IICACHE_SIZE, slot); + iicache_set (inode, block, IICACHE_BLOCK, slot); + iicache_set (inode, block_nr, IICACHE_BLOCKNR, slot); + } + +} + + void sd_attrs_to_i_attrs( __u16 sd_attrs, struct inode *inode ); void i_attrs_to_sd_attrs( struct inode *inode, __u16 *sd_attrs ); diff -urN v2.4.19p7/include/linux/reiserfs_fs_i.h linux/include/linux/reiserfs_fs_i.h --- v2.4.19p7/include/linux/reiserfs_fs_i.h Thu Apr 18 16:30:28 2002 +++ linux/include/linux/reiserfs_fs_i.h Wed May 1 04:09:18 2002 @@ -2,6 +2,13 @@ #define _REISER_FS_I #include + +// The cache for indirect item (iicache). +struct iicache { + long i_cache_blocknr; /* the first of set of contiguous blocknrs */ + long i_cache_size ; /* the amount of set of contiguous blocknrs */ + long i_cache_block ; /* the first, cached logical block of file */ +}; /** bitmasks for i_flags field in reiserfs-specific part of inode */ typedef enum { @@ -46,6 +53,10 @@ ** flushed */ unsigned long i_trans_id ; unsigned long i_trans_index ; + + // The cache for indirect item (iicache). + struct iicache * iic; + int iic_asize; /* iicache array size */ }; #endif diff -urN v2.4.19p7/include/linux/reiserfs_fs_sb.h linux/include/linux/reiserfs_fs_sb.h --- v2.4.19p7/include/linux/reiserfs_fs_sb.h Thu Apr 18 16:30:28 2002 +++ linux/include/linux/reiserfs_fs_sb.h Wed May 1 04:10:15 2002 @@ -291,8 +291,7 @@ */ struct reiserfs_page_list *j_flush_pages ; time_t j_trans_start_time ; /* time this transaction started */ - wait_queue_head_t j_wait ; /* wait journal_end to finish I/O */ - atomic_t j_wlock ; /* lock for j_wait */ + struct semaphore j_lock ; wait_queue_head_t j_join_wait ; /* wait for current transaction to finish before starting new one */ atomic_t j_jlock ; /* lock for j_join_wait */ int j_journal_list_index ; /* journal list number of the current trans */ @@ -444,6 +443,7 @@ int s_is_unlinked_ok; reiserfs_proc_info_data_t s_proc_info_data; struct proc_dir_entry *procdir; + struct list_head s_reiserfs_supers; }; /* Definitions of reiserfs on-disk properties: */ @@ -499,6 +499,7 @@ #define reiserfs_hashed_relocation(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_HASHED_RELOCATION)) #define reiserfs_test4(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_TEST4)) + #define dont_have_tails(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << NOTAIL)) #define replay_only(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REPLAYONLY)) #define reiserfs_dont_log(s) ((s)->u.reiserfs_sb.s_mount_opt & (1 << REISERFS_NOLOG)) @@ -510,7 +511,6 @@ void reiserfs_file_buffer (struct buffer_head * bh, int list); int reiserfs_is_super(struct super_block *s) ; int journal_mark_dirty(struct reiserfs_transaction_handle *, struct super_block *, struct buffer_head *bh) ; -int flush_old_commits(struct super_block *s, int) ; int show_reiserfs_locks(void) ; int reiserfs_resize(struct super_block *, unsigned long) ; diff -urN v2.4.19p7/mm/filemap.c linux/mm/filemap.c --- v2.4.19p7/mm/filemap.c Thu Apr 18 16:30:28 2002 +++ linux/mm/filemap.c Wed May 1 03:54:14 2002 @@ -1306,6 +1306,7 @@ /* Mark the page referenced, AFTER checking for previous usage.. */ SetPageReferenced(page); } +EXPORT_SYMBOL(mark_page_accessed); /* * This is a generic file read routine, and uses the @@ -2897,6 +2898,14 @@ } } +static void update_inode_times(struct inode *inode) +{ + time_t now = CURRENT_TIME; + if (inode->i_ctime != now || inode->i_mtime != now) { + inode->i_ctime = inode->i_mtime = now; + mark_inode_dirty_sync(inode); + } +} /* * Write to a file through the page cache. * @@ -3026,8 +3035,7 @@ goto out; remove_suid(inode); - inode->i_ctime = inode->i_mtime = CURRENT_TIME; - mark_inode_dirty_sync(inode); + update_inode_times(inode); if (file->f_flags & O_DIRECT) goto o_direct; .