//-< POOLMGR.CXX >---------------------------------------------------*--------*
// GOODS                     Version 1.0         (c) 1997  GARRET    *     ?  *
// (Generic Object Oriented Database System)                         *   /\|  *
//                                                                   *  /  \  *
//                          Created:     18-Jan-97    K.A. Knizhnik  * / [] \ *
//                          Last update: 29-May-97    K.A. Knizhnik  * GARRET *
//-------------------------------------------------------------------*--------*
// Pool of pages used to access server file
//-------------------------------------------------------------------*--------*

#include "server.h"

inline unsigned page_pool_manager::hash_function(fposi_t pos)
{
    return (nat4(pos >> page_bits) ^ nat4(pos >> 32)) 
	   % PAGE_POOL_HASH_TABLE_SIZE;
}

inline void page_pool_manager::save_page(page_header* ph)
{
    cs.leave();
    file::iop_status status = 
	page_file->write(ph->page_pos, ph->page_data, page_size); 
    cs.enter();
    ph->state &= ~page_header::dirty;
    if (status != file::ok) {
	msg_buf buf; 
	page_file->get_error_text(status, buf, sizeof buf);
	console::error("Failed to write page to file: %s\n", buf);
    } else if (ph->page_pos + page_size > file_size) {
	file_size = ph->page_pos + page_size;
    }
}

void page_pool_manager::release(page_header* ph)
{ 
    cs.enter();
    ph->state &= ~page_header::busy;
    if (ph->state & page_header::wait) { 
	ph->state &= ~page_header::wait;
	ph->busy_event.signal();
    }
    if (--ph->used == 0) { 
	ph->link_after(&lru);
	if (lru.prev == ph) { 
	    lru_sem.signal();
	}
    }
    cs.leave();
}

page_header* page_pool_manager::get(fposi_t page_pos, access_mode mode) 
{
    cs.enter();

    unsigned h = hash_function(page_pos);
    page_header* ph;

    if (mode == pg_read) { 
	n_page_reads += 1;
    } else { 
	n_page_writes += 1;
    }

  search_page:
    for (ph = hash_table[h]; ph != NULL; ph = ph->collision_chain) {  
	if (ph->page_pos == page_pos) { 
	    if (ph->used++ == 0) { 
		ph->unlink();
	    }
	    if (ph->state & page_header::busy) {
		if (!(ph->state & page_header::wait)) { 
		    ph->state |= page_header::wait;
		    ph->busy_event.reset();
		}
		cs.leave();
		ph->busy_event.wait(); 
		cs.enter();
		internal_assert(!(ph->state & page_header::busy));
	    }
	    if (mode != pg_read) { 
		ph->state |= page_header::dirty;
	    }
	    cs.leave();
	    return ph;
	}
    }
    //
    // Allocate new page
    //
    n_page_faults += 1;
    ph = free;
    if (ph == NULL) { 
	if ((ph = (page_header*)lru.prev) == &lru) { 
	    lru_sem.wait();
	    goto search_page;
	}
	internal_assert(ph->used == 0);
	ph->unlink();
	if (ph->state & page_header::dirty) { 
	    ph->state |= page_header::busy;
	    ph->used = 1;
	    save_page(ph);
	    ph->state &= ~page_header::busy;
	    ph->used -= 1;
	    if (ph->state & page_header::wait) { 
		//
		// While we are saving page new request to this page 
		// arrive so try to choose another fage for replacing...
		//
		internal_assert(ph->used != 0);
		ph->state &= ~page_header::wait;
		ph->busy_event.signal();
		goto search_page;
	    } 
	    internal_assert(ph->used == 0);
	    ph->next = free;
	    free = ph;
	    page_header** php = &hash_table[hash_function(ph->page_pos)];
	    while (*php != ph) { 
		internal_assert(*php != NULL);
		php = &(*php)->collision_chain;
	    }
	    *php = ph->collision_chain;
	    goto search_page;
	}
	page_header** php = &hash_table[hash_function(ph->page_pos)];
	while (*php != ph) { 
	    internal_assert(*php != NULL);
	    php = &(*php)->collision_chain;
	}
	*php = ph->collision_chain;
    } else {
	free = (page_header*)free->next;
    }

    ph->collision_chain = hash_table[h];
    hash_table[h] = ph;
    ph->page_pos = page_pos; 
    ph->state = (mode == pg_read) 
	? page_header::busy : (page_header::busy | page_header::dirty);
    ph->used = 1;

    if (mode != pg_write && ph->page_pos < file_size) {
        msg_buf buf;
	size_t available_size = ph->page_pos + page_size <= file_size
	    ? page_size : size_t(file_size - ph->page_pos);
	cs.leave();
        file::iop_status status = page_file->read(ph->page_pos, ph->page_data, 
						  available_size);
        if (status == file::end_of_file) {
	    cs.enter();
            status = page_file->get_size(file_size);
            if (status != file::ok) {
                page_file->get_error_text(status, buf, sizeof buf);
                console::error("Failed to read page beyond end of file: %s\n",
                               buf);
            }
	    cs.leave();
        } else {
            if (status != file::ok) {
                page_file->get_error_text(status, buf, sizeof buf);
                console::error("Failed to read page from file: %s\n", buf);
            }
        }
    } else { 
	cs.leave();
    }
    return ph;
}

void page_pool_manager::read(fposi_t pos, void* buf, size_t size)
{
    char* dst = (char*)buf;
    unsigned offs = int(pos) & (page_size-1);
    fposi_t page_pos = pos - offs; 

    while (True) {
	page_header* ph = get(page_pos, pg_read);
	size_t available = page_size - offs;
	if (available < size) { 
	    memcpy(dst, ph->page_data + offs, available);
	    dst += available;
	    size -= available;
	    offs = 0;
	    page_pos += page_size;
	    release(ph);
	} else { 
	    memcpy(dst, ph->page_data + offs, size);
	    release(ph);
	    break;
	} 
    }
}


void page_pool_manager::write(fposi_t pos, void const* buf, size_t size)
{
    char* src = (char*)buf; 
    unsigned offs = int(pos) & (page_size-1);
    fposi_t page_pos = pos - offs;

    while (True) { 
	size_t available = page_size - offs;
	page_header* ph = get(page_pos, 
			      (offs == 0 && size >= page_size)
			       ? pg_write : pg_modify);
	if (available < size) { 
	    memcpy(ph->page_data + offs, src, available);
	    src += available;
	    size -= available;
	    offs = 0;
	    page_pos += page_size;
	    internal_assert(ph->state & page_header::dirty);
	    release(ph);
	} else { 
	    memcpy(ph->page_data + offs, src, size);
	    internal_assert(ph->state & page_header::dirty);
	    release(ph);
	    break;
	} 
    }
}

boolean page_pool_manager::in_cache(fposi_t pos)
{
    unsigned offs = int(pos) & (page_size-1);
    fposi_t page_pos = pos - offs; 
    page_header *ph = hash_table[hash_function(page_pos)];
    
    while (ph != NULL && ph->page_pos != page_pos) { 
	ph = ph->collision_chain;
    }  
    return ph != NULL;
}     

void page_pool_manager::flush()
{
    page_header* ph = pages;
    int n = pool_size;
    cs.enter();
    while (--n >= 0) { 
	if (ph->state & page_header::dirty) { 
	    if (ph->used++ == 0) { 
		ph->unlink();
	    }
	    if (ph->state & page_header::busy) {
		if (!(ph->state & page_header::wait)) { 
		    ph->state |= page_header::wait;
		    ph->busy_event.reset();
		}
		cs.leave();
		ph->busy_event.wait(); 
		cs.enter();
		internal_assert(!(ph->state & page_header::busy));
		if (!(ph->state & page_header::dirty)) {
		    // page was already saved
		    ph += 1;
		    continue;
		}
	    }
	    save_page(ph);
	    if (--ph->used == 0) { 
		ph->link_after(&lru);
	    }
	    if (lru.prev == ph) { 
		lru_sem.signal();
	    }
	} 
	ph += 1;
    }
    cs.leave();

    if (page_file->flush() != file::ok) { 
	console::error("Failed to flush page pool to disk\n");
    }
}

boolean page_pool_manager::open(dbs_server* server)
{
    opened = False;
    this->server = server;
    file::iop_status status = page_file->open(file::fa_readwrite, 
					      file::fo_random); 

    if (status != file::ok) { 
	status = page_file->open(file::fa_readwrite, 
				 file::fo_random|file::fo_create); 
	file_size = 0;
    } else { 
	// it is not so easy to calculate file size for raw partitions
	file_size = MAX_FSIZE;
    }
    if (status == file::ok) {
	memset(hash_table, 0, sizeof hash_table);
        for (size_t i = 1; i < pool_size; i++) {
	    pages[i-1].next = &pages[i];
	    pages[i].state = 0;
	}
	pages[0].state = 0;
	pages[pool_size-1].next = NULL;
	free = pages;
	lru.prune();
	opened = True;
	n_page_writes = 0;
	n_page_reads = 0;
	n_page_faults = 0;
    } else {  
	msg_buf buf;
	page_file->get_error_text(status, buf, sizeof buf);
	console::message(msg_error, "Failed to open database file: %s\n", buf);
    }
    return opened;
}

void page_pool_manager::close()
{ 
    cs.enter();

    if (opened) { 
	backup_cs.enter(); // wait backup completion
	flush();
	file::iop_status status = page_file->close(); 
	if (status != file::ok) { 
	    msg_buf buf;
	    page_file->get_error_text(status, buf, sizeof buf);
	    console::error("Failed to close page file: %s\n", buf);
	}
	opened = False; 
	backup_cs.leave();
    } 
    cs.leave();
}

void page_pool_manager::dump(char*)
{
    fsize_t size;
    if (page_file->get_size(size) == file::ok && size != 0) { 
	console::output("Storage file size: %"INT8_FORMAT"u\n", size);
    }
    console::output("Page pool size: %lu\n", pool_size);		     
    console::output("Page pool access: %u reads, %u writes, %u page faults\n",
		    n_page_reads, n_page_writes, n_page_faults);
}

boolean page_pool_manager::backup(file& backup_file)
{
    fsize_t size;
    msg_buf  err;
    file::iop_status status; 

    backup_cs.enter(); // linger manager closing until backup completion
    if (!opened) { 
        backup_cs.leave();
	return False;
    }

    if (page_file->get_size(size) != file::ok || (size == 0 && file_size != 0))
    {
	// file is located at raw partition
	size = server->mem_mgr->get_storage_size();
    } 
    nat8 pksize;
    pack8((char*)&pksize, (char*)&size);
    status = backup_file.write(&pksize, sizeof pksize);
    if (status != file::ok) { 
	backup_file.get_error_text(status, err, sizeof err);
	console::message(msg_error|msg_time, 
			 "Failed to write backup file: %s\n", err);
	return False;
    } 
    char* buf = new char[page_size];
    for (fposi_t pos = 0; pos < size; pos += page_size) {
	if (!server->backup_started) { 
	    backup_cs.leave();
     	    delete[] buf;
	    return False;
	}
	status = page_file->read(pos, buf, pos + page_size <= size 
                                 ? page_size : size_t(size - pos));
        if (status != file::ok) {
	    backup_cs.leave();
	    delete[] buf;
	    page_file->get_error_text(status, err, sizeof err);
	    console::message(msg_error|msg_time, 
			     "Failed to read database file: %s\n", err);
	    return False;
	}
	status = backup_file.write(buf, page_size);
	if (status != file::ok) { 
	    backup_file.get_error_text(status, err, sizeof err);
	    console::message(msg_error|msg_time, 
			     "Failed to write page to backup file: %s\n", err);
	    backup_cs.leave();
	    delete[] buf; 
	    return False;
	}
	task::reschedule();
    }
    backup_cs.leave();
    delete[] buf;
    return True;
}

void page_pool_manager::stop_backup() {}

boolean page_pool_manager::restore(file& backup_file)
{
    nat8 size, pksize; 
    msg_buf buf; 
    file::iop_status status = backup_file.read(&pksize, sizeof pksize);

    if (status != file::ok) { 
	backup_file.get_error_text(status, buf, sizeof buf);
	console::message(msg_error, 
			 "Failed to read from backup file: %s\n", buf);
	return False;
    }
    unpack8((char*)&size, (char*)&pksize);
    status = page_file->open(file::fa_write,file::fo_create|file::fo_truncate);
    if (status != file::ok) { 
	page_file->get_error_text(status, buf, sizeof buf);
	console::message(msg_error, 
			 "Failed to open storage data file: %s\n", buf);
	return False;
    }
    char* page = new char[page_size];
    for (fposi_t pos = 0; pos < size; pos += page_size) { 
	status = backup_file.read(page, pos + page_size <= size 
                                  ? page_size : size_t(size - pos));
	if (status != file::ok) { 
	    backup_file.get_error_text(status, buf, sizeof buf);
	    console::message(msg_error, 
			     "Failed to read page from backup file: %s\n",buf);
	    page_file->close();
	    delete page;
	    return False;
	}
	status = page_file->write(page, page_size);
	if (status != file::ok) { 
	    page_file->get_error_text(status, buf, sizeof buf);
	    console::message(msg_error, 
			     "Failed to restore page from from backup file:"
			     " %s\n", buf);
	    page_file->close();
	    delete page;
	    return False;
	}
    }
    page_file->close();
    delete page;
    return True;
}

int page_pool_manager::get_page_bits() { return page_bits; }

void page_pool_manager::initialize() {}

void page_pool_manager::shutdown() {}

page_pool_manager::page_pool_manager(file& page_file, 
				     size_t pool_size, 
				     int page_size) 
: lru_sem(cs)
{
    opened = False; 

    this->page_file = &page_file;
    this->pool_size = pool_size;

    if (page_size == 0) { 
	page_size = os_file::get_disk_block_size();
    }
    assert(((page_size - 1) & page_size) == 0/* page_size is power of two*/);
    this->page_size = page_size;
    for (page_bits = 0; (1 << page_bits) < page_size; page_bits += 1); 

    pages = new page_header[pool_size];
    char* p_data = (char*)os_file::allocate_disk_buffer(pool_size*page_size);
    assert(p_data != NULL);
    pages_data = p_data;

    for (size_t i = 0; i < pool_size; i++) { 
	pages[i].page_data = p_data;
	p_data += page_size;
    }
}

page_pool_manager::~page_pool_manager()
{
    os_file::free_disk_buffer(pages_data);
    lru.prune();
    delete[] pages;     
}

