susumu.yata
null+****@clear*****
Wed Feb 20 14:04:59 JST 2013
susumu.yata 2013-02-20 14:04:59 +0900 (Wed, 20 Feb 2013) New Revision: bea67c114f8bb543f97b8c6693ae2170c488e4ec https://github.com/groonga/grnxx/commit/bea67c114f8bb543f97b8c6693ae2170c488e4ec Log: Implement cross-defragment from basic::Trie to large::Trie. Modified files: lib/map/da/basic_trie.cpp lib/map/da/basic_trie.hpp lib/map/da/large_trie.cpp lib/map/da/large_trie.hpp test/test_map_da_basic_trie.cpp test/test_map_da_large_trie.cpp Modified: lib/map/da/basic_trie.cpp (+19 -14) =================================================================== --- lib/map/da/basic_trie.cpp 2013-02-20 11:51:10 +0900 (d7e9014) +++ lib/map/da/basic_trie.cpp 2013-02-20 14:04:59 +0900 (0a4d6ec) @@ -1,4 +1,5 @@ #include "basic_trie.hpp" +#include "large_trie.hpp" #include "../../lock.hpp" #include "../../logger.hpp" @@ -57,7 +58,7 @@ Trie::~Trie() { Trie *Trie::create(const TrieOptions &options, io::Pool pool) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::basic::Trie failed"; GRNXX_THROW(); } trie->create_trie(options, pool); @@ -67,7 +68,7 @@ Trie *Trie::create(const TrieOptions &options, io::Pool pool) { Trie *Trie::open(io::Pool pool, uint32_t block_id) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::basic::Trie failed"; GRNXX_THROW(); } trie->open_trie(pool, block_id); @@ -84,13 +85,17 @@ void Trie::unlink(io::Pool pool, uint32_t block_id) { pool.free_block(*trie->block_info_); } -Trie *Trie::defrag(const TrieOptions &options) { +da::Trie *Trie::defrag(const TrieOptions &options) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::basic::Trie failed"; GRNXX_THROW(); } - trie->defrag_trie(options, *this, pool_); + try { + trie->defrag_trie(options, *this, pool_); + } catch (const TrieException &) { + return large::Trie::defrag(options, *this, pool_); + } return trie.release(); } @@ -413,19 +418,19 @@ void Trie::defrag_trie(const TrieOptions &options, const Trie &trie, } if (nodes_size > MAX_NODES_SIZE) { - GRNXX_ERROR() << "too large request: nodes_size = " << nodes_size - << ", MAX_NODES_SIZE = " << MAX_NODES_SIZE; - GRNXX_THROW(); + GRNXX_NOTICE() << "too large request: nodes_size = " << nodes_size + << ", MAX_NODES_SIZE = " << MAX_NODES_SIZE; + throw TrieException(); } if (entries_size > MAX_ENTRIES_SIZE) { - GRNXX_ERROR() << "too large request: entries_size = " << entries_size - << ", MAX_ENTRIES_SIZE = " << MAX_ENTRIES_SIZE; - GRNXX_THROW(); + GRNXX_NOTICE() << "too large request: entries_size = " << entries_size + << ", MAX_ENTRIES_SIZE = " << MAX_ENTRIES_SIZE; + throw TrieException(); } if (keys_size > MAX_KEYS_SIZE) { - GRNXX_ERROR() << "too large request: keys_size = " << keys_size - << ", MAX_KEYS_SIZE = " << MAX_KEYS_SIZE; - GRNXX_THROW(); + GRNXX_NOTICE() << "too large request: keys_size = " << keys_size + << ", MAX_KEYS_SIZE = " << MAX_KEYS_SIZE; + throw TrieException(); } pool_ = pool; Modified: lib/map/da/basic_trie.hpp (+9 -1) =================================================================== --- lib/map/da/basic_trie.hpp 2013-02-20 11:51:10 +0900 (1b8e43d) +++ lib/map/da/basic_trie.hpp 2013-02-20 14:04:59 +0900 (7122baa) @@ -33,6 +33,12 @@ namespace grnxx { namespace map { namespace da { +namespace large { + +class Trie; + +} // namespace large + namespace basic { constexpr int32_t MIN_KEY_ID = 0; @@ -410,6 +416,8 @@ class Key { }; class Trie : public da::Trie { + friend class large::Trie; + public: ~Trie(); @@ -418,7 +426,7 @@ class Trie : public da::Trie { static void unlink(io::Pool pool, uint32_t block_id); - Trie *defrag(const TrieOptions &options); + da::Trie *defrag(const TrieOptions &options); uint32_t block_id() const; Modified: lib/map/da/large_trie.cpp (+149 -4) =================================================================== --- lib/map/da/large_trie.cpp 2013-02-20 11:51:10 +0900 (418a821) +++ lib/map/da/large_trie.cpp 2013-02-20 14:04:59 +0900 (2451cf5) @@ -61,7 +61,7 @@ Trie::~Trie() { Trie *Trie::create(const TrieOptions &options, io::Pool pool) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::large::Trie failed"; GRNXX_THROW(); } trie->create_trie(options, pool); @@ -71,7 +71,7 @@ Trie *Trie::create(const TrieOptions &options, io::Pool pool) { Trie *Trie::open(io::Pool pool, uint32_t block_id) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::large::Trie failed"; GRNXX_THROW(); } trie->open_trie(pool, block_id); @@ -89,10 +89,21 @@ void Trie::unlink(io::Pool pool, uint32_t block_id) { pool.free_block(*trie->block_info_); } -Trie *Trie::defrag(const TrieOptions &options) { +da::Trie *Trie::defrag(const TrieOptions &options, + const basic::Trie &basic_trie, io::Pool pool) { + std::unique_ptr<Trie> large_trie(new (std::nothrow) Trie); + if (!large_trie) { + GRNXX_ERROR() << "new grnxx::map::da::large::Trie failed"; + GRNXX_THROW(); + } + large_trie->defrag_trie(options, basic_trie, pool); + return large_trie.release(); +} + +da::Trie *Trie::defrag(const TrieOptions &options) { std::unique_ptr<Trie> trie(new (std::nothrow) Trie); if (!trie) { - GRNXX_ERROR() << "new grnxx::map::Trie failed"; + GRNXX_ERROR() << "new grnxx::map::da::large::Trie failed"; GRNXX_THROW(); } trie->defrag_trie(options, *this, pool_); @@ -403,6 +414,140 @@ void Trie::open_trie(io::Pool pool, uint32_t block_id) { pool_.get_block_address(header_->keys_block_id)); } +void Trie::defrag_trie(const TrieOptions &options, const basic::Trie &trie, + io::Pool pool) { + uint64_t nodes_size = options.nodes_size; + if (nodes_size == 0) { + nodes_size = trie.header_->num_chunks * CHUNK_SIZE; + nodes_size *= 2; + } + uint64_t entries_size = options.entries_size; + if (entries_size == 0) { + entries_size = trie.header_->max_key_id; + entries_size *= 2; + } + uint64_t keys_size = options.keys_size; + if (keys_size == 0) { + keys_size = trie.header_->next_key_pos; + keys_size *= 2; + } + + if (nodes_size > MAX_NODES_SIZE) { + GRNXX_ERROR() << "too large request: nodes_size = " << nodes_size + << ", MAX_NODES_SIZE = " << MAX_NODES_SIZE; + GRNXX_THROW(); + } + if (entries_size > MAX_ENTRIES_SIZE) { + GRNXX_ERROR() << "too large request: entries_size = " << entries_size + << ", MAX_ENTRIES_SIZE = " << MAX_ENTRIES_SIZE; + GRNXX_THROW(); + } + if (keys_size > MAX_KEYS_SIZE) { + GRNXX_ERROR() << "too large request: keys_size = " << keys_size + << ", MAX_KEYS_SIZE = " << MAX_KEYS_SIZE; + GRNXX_THROW(); + } + + pool_ = pool; + + block_info_ = pool_.create_block(sizeof(Header)); + + void * const block_address = pool_.get_block_address(*block_info_); + header_ = static_cast<Header *>(block_address); + *header_ = Header(); + + header_->nodes_size = static_cast<uint64_t>(nodes_size); + header_->nodes_size &= ~CHUNK_MASK; + if (header_->nodes_size == 0) { + header_->nodes_size = INITIAL_NODES_SIZE; + } + header_->chunks_size = header_->nodes_size / CHUNK_SIZE; + header_->entries_size = static_cast<uint64_t>(entries_size); + if (header_->entries_size == 0) { + header_->entries_size = INITIAL_ENTRIES_SIZE; + } + header_->keys_size = static_cast<uint64_t>(keys_size); + if (header_->keys_size == 0) { + header_->keys_size = INITIAL_KEYS_SIZE; + } + + create_arrays(); + + reserve_node(ROOT_NODE_ID); + nodes_[INVALID_OFFSET].set_is_origin(true); + + header_->total_key_length = trie.header_->total_key_length; + header_->num_keys = trie.header_->num_keys; + header_->max_key_id = trie.header_->max_key_id; + header_->next_key_id = trie.header_->next_key_id; + for (int64_t key_id = MIN_KEY_ID; key_id <= header_->max_key_id; ++key_id) { + // Valid entries will be filled in the next step. + if (!trie.entries_[key_id]) { + entries_[key_id] = Entry::invalid_entry(trie.entries_[key_id].next()); + } + } + + defrag_trie(trie, ROOT_NODE_ID, ROOT_NODE_ID); + + initialized_ = true; +} + +void Trie::defrag_trie(const basic::Trie &trie, uint64_t src, uint64_t dest) { + if (trie.nodes_[src].is_leaf()) { + const basic::Key &key = trie.get_key(trie.nodes_[src].key_pos()); + const uint64_t key_pos = header_->next_key_pos; + new (&keys_[key_pos]) Key(key.id(), key.slice()); + nodes_[dest].set_key(key_pos, key.size()); + entries_[key.id()] = Entry::valid_entry(key_pos, key.size()); + header_->next_key_pos += Key::estimate_size(key.size()); + return; + } + + const uint64_t src_offset = trie.nodes_[src].offset(); + uint64_t dest_offset; + { + uint16_t labels[MAX_LABEL + 1]; + uint16_t num_labels = 0; + + uint16_t label = trie.nodes_[src].child(); + while (label != INVALID_LABEL) { + GRNXX_DEBUG_THROW_IF(label > MAX_LABEL); + const uint64_t child = src_offset ^ label; + if (trie.nodes_[child].is_leaf() || + (trie.nodes_[child].child() != INVALID_LABEL)) { + labels[num_labels++] = label; + } + label = trie.nodes_[child].sibling(); + } + if (num_labels == 0) { + return; + } + + dest_offset = find_offset(labels, num_labels); + for (uint16_t i = 0; i < num_labels; ++i) { + const uint64_t child = dest_offset ^ labels[i]; + reserve_node(child); + nodes_[child].set_label(labels[i]); + if ((i + 1) < num_labels) { + nodes_[child].set_has_sibling(true); + siblings_[child] = labels[i + 1]; + } + } + + GRNXX_DEBUG_THROW_IF(nodes_[dest_offset].is_origin()); + nodes_[dest_offset].set_is_origin(true); + nodes_[dest].set_offset(dest_offset); + nodes_[dest].set_child(labels[0]); + } + + uint16_t label = nodes_[dest].child(); + while (label != INVALID_LABEL) { + defrag_trie(trie, src_offset ^ label, dest_offset ^ label); + label = nodes_[dest_offset ^ label].has_sibling() ? + siblings_[dest_offset ^ label] : INVALID_LABEL; + } +} + void Trie::defrag_trie(const TrieOptions &options, const Trie &trie, io::Pool pool) { uint64_t nodes_size = options.nodes_size; Modified: lib/map/da/large_trie.hpp (+9 -12) =================================================================== --- lib/map/da/large_trie.hpp 2013-02-20 11:51:10 +0900 (7e93129) +++ lib/map/da/large_trie.hpp 2013-02-20 14:04:59 +0900 (321a44d) @@ -18,17 +18,7 @@ #ifndef GRNXX_MAP_DA_LARGE_TRIE_HPP #define GRNXX_MAP_DA_LARGE_TRIE_HPP -#include "trie.hpp" - -#if 0 -# define GRNXX_DEBUG_THROW(msg)\ - ({ GRNXX_ERROR() << msg; GRNXX_THROW(); }) -# define GRNXX_DEBUG_THROW_IF(cond)\ - (void)((!(cond)) || (GRNXX_DEBUG_THROW(#cond), 0)) -#else -# define GRNXX_DEBUG_THROW(msg) -# define GRNXX_DEBUG_THROW_IF(cond) -#endif +#include "basic_trie.hpp" namespace grnxx { namespace map { @@ -428,7 +418,10 @@ class Trie : public da::Trie { static void unlink(io::Pool pool, uint32_t block_id); - Trie *defrag(const TrieOptions &options); + static da::Trie *defrag(const TrieOptions &options, + const basic::Trie &basic_trie, io::Pool pool); + + da::Trie *defrag(const TrieOptions &options); uint32_t block_id() const; @@ -463,6 +456,10 @@ class Trie : public da::Trie { void create_trie(const TrieOptions &options, io::Pool pool); void open_trie(io::Pool pool, uint32_t block_id); + void defrag_trie(const TrieOptions &options, const basic::Trie &trie, + io::Pool pool); + void defrag_trie(const basic::Trie &trie, uint64_t src, uint64_t dest); + void defrag_trie(const TrieOptions &options, const Trie &trie, io::Pool pool); void defrag_trie(const Trie &trie, uint64_t src, uint64_t dest); Modified: test/test_map_da_basic_trie.cpp (+7 -7) =================================================================== --- test/test_map_da_basic_trie.cpp 2013-02-20 11:51:10 +0900 (c069fc9) +++ test/test_map_da_basic_trie.cpp 2013-02-20 14:04:59 +0900 (66155b1) @@ -30,7 +30,7 @@ void test_basics() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); std::vector<grnxx::Slice> keys; @@ -94,7 +94,7 @@ void test_lcp_search() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); assert(trie->insert("AB")); @@ -165,7 +165,7 @@ void test_insert() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -206,7 +206,7 @@ void test_remove() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -262,7 +262,7 @@ void test_update() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -309,7 +309,7 @@ void test_defrag() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::basic::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::basic::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -327,7 +327,7 @@ void test_defrag() { options.nodes_size = grnxx::map::da::basic::INITIAL_NODES_SIZE; options.entries_size = grnxx::map::da::basic::INITIAL_ENTRIES_SIZE; options.keys_size = grnxx::map::da::basic::INITIAL_KEYS_SIZE; - std::unique_ptr<grnxx::map::da::basic::Trie> new_trie( + std::unique_ptr<grnxx::map::da::Trie> new_trie( trie->defrag(options)); for (std::size_t i = 0; i < NUM_KEYS; ++i) { Modified: test/test_map_da_large_trie.cpp (+55 -7) =================================================================== --- test/test_map_da_large_trie.cpp 2013-02-20 11:51:10 +0900 (dfbdf1c) +++ test/test_map_da_large_trie.cpp 2013-02-20 14:04:59 +0900 (586d177) @@ -30,7 +30,7 @@ void test_basics() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); std::vector<grnxx::Slice> keys; @@ -94,7 +94,7 @@ void test_lcp_search() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); assert(trie->insert("AB")); @@ -165,7 +165,7 @@ void test_insert() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -206,7 +206,7 @@ void test_remove() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -262,7 +262,7 @@ void test_update() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -309,7 +309,7 @@ void test_defrag() { pool.open(grnxx::io::POOL_TEMPORARY); grnxx::map::da::TrieOptions options; - std::unique_ptr<grnxx::map::da::large::Trie> trie( + std::unique_ptr<grnxx::map::da::Trie> trie( grnxx::map::da::large::Trie::create(options, pool)); std::unordered_set<std::string> both_keys; @@ -327,7 +327,7 @@ void test_defrag() { options.nodes_size = grnxx::map::da::large::INITIAL_NODES_SIZE; options.entries_size = grnxx::map::da::large::INITIAL_ENTRIES_SIZE; options.keys_size = grnxx::map::da::large::INITIAL_KEYS_SIZE; - std::unique_ptr<grnxx::map::da::large::Trie> new_trie( + std::unique_ptr<grnxx::map::da::Trie> new_trie( trie->defrag(options)); for (std::size_t i = 0; i < NUM_KEYS; ++i) { @@ -345,6 +345,53 @@ void test_defrag() { } } +void test_cross_defrag() { + constexpr std::size_t NUM_KEYS = 1 << 12; + constexpr std::size_t MIN_SIZE = 1; + constexpr std::size_t MAX_SIZE = 10; + + std::mt19937 random; + + grnxx::io::Pool pool; + pool.open(grnxx::io::POOL_TEMPORARY); + + grnxx::map::da::TrieOptions options; + std::unique_ptr<grnxx::map::da::basic::Trie> trie( + grnxx::map::da::basic::Trie::create(options, pool)); + + std::unordered_set<std::string> both_keys; + std::vector<grnxx::Slice> true_keys; + std::vector<grnxx::Slice> false_keys; + create_keys(NUM_KEYS, MIN_SIZE, MAX_SIZE, + &both_keys, &true_keys, &false_keys); + + for (std::size_t i = 0; i < NUM_KEYS; ++i) { + std::int64_t key_id; + assert(trie->insert(true_keys[i], &key_id)); + assert(key_id == static_cast<std::int64_t>(i)); + } + + options.nodes_size = grnxx::map::da::large::INITIAL_NODES_SIZE; + options.entries_size = grnxx::map::da::large::INITIAL_ENTRIES_SIZE; + options.keys_size = grnxx::map::da::large::INITIAL_KEYS_SIZE; + std::unique_ptr<grnxx::map::da::Trie> new_trie( + grnxx::map::da::large::Trie::defrag(options, *trie, pool)); + + for (std::size_t i = 0; i < NUM_KEYS; ++i) { + std::int64_t key_id; + assert(new_trie->search(true_keys[i], &key_id)); + assert(key_id == static_cast<std::int64_t>(i)); + + assert(!new_trie->search(false_keys[i], &key_id)); + } + + for (std::size_t i = 0; i < NUM_KEYS; ++i) { + std::int64_t key_id; + assert(new_trie->insert(false_keys[i], &key_id)); + assert(key_id == static_cast<std::int64_t>(NUM_KEYS + i)); + } +} + int main() { grnxx::Logger::set_flags(grnxx::LOGGER_WITH_ALL | grnxx::LOGGER_ENABLE_COUT); @@ -358,6 +405,7 @@ int main() { test_update(); test_defrag(); + test_cross_defrag(); return 0; } -------------- next part -------------- HTML����������������������������... Download