null+****@clear*****
null+****@clear*****
2011年 6月 27日 (月) 09:26:52 JST
Susumu Yata 2011-06-27 00:26:52 +0000 (Mon, 27 Jun 2011) New Revision: 69419b68ac100dd8629087eea943b8dc809cad6f Log: added grn::dat::KeyCursor. Added files: lib/dat/key-cursor.cpp lib/dat/key-cursor.hpp Modified files: lib/dat/Makefile.am lib/dat/common-prefix-search-cursor.cpp lib/dat/id-cursor.cpp lib/dat/predictive-search-cursor.cpp lib/dat/predictive-search-cursor.hpp Modified: lib/dat/Makefile.am (+2 -0) =================================================================== --- lib/dat/Makefile.am 2011-06-24 15:18:34 +0000 (f85cbb1) +++ lib/dat/Makefile.am 2011-06-27 00:26:52 +0000 (176feb0) @@ -5,6 +5,7 @@ DEFS += -D_REENTRANT $(GRN_DEFS) libgrndat_la_SOURCES = \ common-prefix-search-cursor.cpp \ id-cursor.cpp \ + key-cursor.cpp \ predictive-search-cursor.cpp \ trie.cpp @@ -17,6 +18,7 @@ noinst_HEADERS = \ dat.hpp \ header.hpp \ id-cursor.hpp \ + key-cursor.hpp \ key-info.hpp \ key.hpp \ node.hpp \ Modified: lib/dat/common-prefix-search-cursor.cpp (+14 -15) =================================================================== --- lib/dat/common-prefix-search-cursor.cpp 2011-06-24 15:18:34 +0000 (6a426f0) +++ lib/dat/common-prefix-search-cursor.cpp 2011-06-27 00:26:52 +0000 (85bf069) @@ -96,21 +96,6 @@ void CommonPrefixSearchCursor::init(const UInt8 *ptr, UInt32 node_id = ROOT_NODE_ID; UInt32 skip_count = 0; for (UInt32 i = 0; i < max_length; ++i) { - if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) { - if (i >= min_length) { - if (skip_count < offset_) { - ++skip_count; - } else { - const UInt32 terminal = - trie_->ith_node(node_id).offset() ^ TERMINAL_LABEL; - buf_.push_back(trie_->ith_node(terminal).key_id()); - if (buf_.size() >= limit_) { - return; - } - } - } - } - const Base base = trie_->ith_node(node_id).base(); if (base.is_terminal()) { Key key; @@ -126,6 +111,20 @@ void CommonPrefixSearchCursor::init(const UInt8 *ptr, return; } + if (trie_->ith_node(node_id).child() == TERMINAL_LABEL) { + if (i >= min_length) { + if (skip_count < offset_) { + ++skip_count; + } else { + const UInt32 terminal = base.offset() ^ TERMINAL_LABEL; + buf_.push_back(trie_->ith_node(terminal).key_id()); + if (buf_.size() >= limit_) { + return; + } + } + } + } + node_id = base.offset() ^ ptr[i]; if (trie_->ith_node(node_id).label() != ptr[i]) { return; Modified: lib/dat/id-cursor.cpp (+2 -1) =================================================================== --- lib/dat/id-cursor.cpp 2011-06-24 15:18:34 +0000 (809ac42) +++ lib/dat/id-cursor.cpp 2011-06-27 00:26:52 +0000 (e0b7591) @@ -98,7 +98,8 @@ UInt32 IdCursor::fix_flags(UInt32 flags) const { return flags; } -IdCursor::IdCursor(const Trie &trie, UInt32 offset, UInt32 limit, UInt32 flags) +IdCursor::IdCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) : trie_(&trie), offset_(offset), limit_(limit), Added: lib/dat/key-cursor.cpp (+357 -0) 100644 =================================================================== --- /dev/null +++ lib/dat/key-cursor.cpp 2011-06-27 00:26:52 +0000 (90c8d72) @@ -0,0 +1,357 @@ +#include "key-cursor.hpp" + +#include <algorithm> +#include <cstring> + +namespace grn { +namespace dat { + +KeyCursor::KeyCursor() + : trie_(NULL), + offset_(0), + limit_(UINT32_MAX), + flags_(PREDICTIVE_CURSOR), + buf_(), + count_(0), + max_count_(0), + end_(false), + end_ptr_(NULL), + end_length_(0) {} + +KeyCursor::~KeyCursor() { + close(); +} + + +void KeyCursor::open(const Trie &trie, + const void *min_ptr, UInt32 min_length, + const void *max_ptr, UInt32 max_length, + UInt32 offset, + UInt32 limit, + UInt32 flags) { + GRN_DAT_PARAM_ERROR_IF((min_ptr == NULL) && (min_length != 0)); + GRN_DAT_PARAM_ERROR_IF((max_ptr == NULL) && (max_length != 0)); + + flags = fix_flags(flags); + KeyCursor new_cursor(trie, offset, limit, flags); + new_cursor.init(static_cast<const UInt8 *>(min_ptr), min_length, + static_cast<const UInt8 *>(max_ptr), max_length); + new_cursor.swap(this); +} + +void KeyCursor::close() { + trie_ = NULL; + offset_ = 0; + limit_ = UINT32_MAX; + flags_ = PREDICTIVE_CURSOR; + buf_.clear(); + count_ = 0; + max_count_ = 0; + end_ = false; + if ((end_ptr_ != NULL) && (end_length_ != 0)) { + delete [] end_ptr_; + } + end_ptr_ = NULL; + end_length_ = 0; +} + +bool KeyCursor::next(Key *key) { + if (end_ || (count_ >= max_count_)) { + return false; + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + return ascending_next(key); + } else { + return descending_next(key); + } +} + +UInt32 KeyCursor::fix_flags(UInt32 flags) const { + const UInt32 cursor_type = flags & CURSOR_TYPE_MASK; + GRN_DAT_PARAM_ERROR_IF((cursor_type != 0) && + (cursor_type != PREDICTIVE_CURSOR)); + flags |= PREDICTIVE_CURSOR; + + const UInt32 cursor_order = flags & CURSOR_ORDER_MASK; + GRN_DAT_PARAM_ERROR_IF((cursor_order != 0) && + (cursor_order != ASCENDING_CURSOR) && + (cursor_order != DESCENDING_CURSOR)); + if (cursor_order == 0) { + flags |= ASCENDING_CURSOR; + } + + const UInt32 cursor_options = flags & CURSOR_OPTIONS_MASK; + GRN_DAT_PARAM_ERROR_IF( + cursor_options & ~(EXCEPT_LOWER_BOUND | EXCEPT_UPPER_BOUND)); + + return flags; +} + +KeyCursor::KeyCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags) + : trie_(&trie), + offset_(offset), + limit_(limit), + flags_(flags), + buf_(), + count_(0), + max_count_(0), + end_(false), + end_ptr_(NULL), + end_length_(0) {} + +void KeyCursor::init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length) { + if (offset_ > (UINT32_MAX - limit_)) { + max_count_ = UINT32_MAX; + } else { + max_count_ = offset_ + limit_; + } + + if (limit_ == 0) { + return; + } + + if ((flags_ & ASCENDING_CURSOR) == ASCENDING_CURSOR) { + ascending_init(min_ptr, min_length, max_ptr, max_length); + } else { + descending_init(min_ptr, min_length, max_ptr, max_length); + } +} + +void KeyCursor::ascending_init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length) { + if (max_ptr != NULL) { + if (max_length != 0) { + end_ptr_ = new UInt8[max_length]; + std::memcpy(end_ptr_, max_ptr, max_length); + } else { + end_ptr_ = reinterpret_cast<UInt8 *>(const_cast<char *>("")); + } + } + end_length_ = max_length; + + UInt32 node_id = ROOT_NODE_ID; + Node node = trie_->ith_node(node_id); + for (UInt32 i = 0; i < min_length; ++i) { + if (node.is_terminal()) { + Key key; + trie_->ith_key(node.key_id(), &key); + const int result = compare(key, min_ptr, min_length, i); + if ((result > 0) || ((result == 0) && + ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND))) { + buf_.push_back(node_id); + } + return; + } + + node_id = node.offset() ^ min_ptr[i]; + if (trie_->ith_node(node_id).label() != min_ptr[i]) { + UInt16 label = node.child(); + if (label == TERMINAL_LABEL) { + label = trie_->ith_node(node.offset() ^ label).sibling(); + } + while (label != INVALID_LABEL) { + if (label > min_ptr[i]) { + buf_.push_back(node.offset() ^ label); + break; + } + label = trie_->ith_node(node.offset() ^ label).sibling(); + } + return; + } + + node = trie_->ith_node(node_id); + if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ min_ptr[i] ^ node.sibling()); + } + } + + const Base base = trie_->ith_node(node_id).base(); + if (base.is_terminal()) { + Key key; + trie_->ith_key(base.key_id(), &key); + if ((key.length() != min_length) || + ((flags_ & EXCEPT_LOWER_BOUND) != EXCEPT_LOWER_BOUND)) { + buf_.push_back(node_id); + } + return; + } + + UInt16 label = trie_->ith_node(node_id).child(); + if ((label == TERMINAL_LABEL) && + ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND)) { + label = trie_->ith_node(base.offset() ^ label).sibling(); + } + if (label != INVALID_LABEL) { + buf_.push_back(base.offset() ^ label); + } +} + +void KeyCursor::descending_init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length) { + if (min_ptr != NULL) { + if (min_length != 0) { + end_ptr_ = new UInt8[min_length]; + std::memcpy(end_ptr_, min_ptr, min_length); + } else { + end_ptr_ = reinterpret_cast<UInt8 *>(const_cast<char *>("")); + } + } + end_length_ = min_length; + + UInt32 node_id = ROOT_NODE_ID; + for (UInt32 i = 0; i < max_length; ++i) { + const Base base = trie_->ith_node(node_id).base(); + if (base.is_terminal()) { + Key key; + trie_->ith_key(base.key_id(), &key); + const int result = compare(key, max_ptr, max_length, i); + if ((result < 0) || ((result == 0) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND))) { + buf_.push_back(node_id | POST_ORDER_FLAG); + } + return; + } + + UInt32 label = trie_->ith_node(node_id).child(); + if (label == TERMINAL_LABEL) { + node_id = base.offset() ^ label; + buf_.push_back(node_id | POST_ORDER_FLAG); + label = trie_->ith_node(node_id).sibling(); + } + while (label != INVALID_LABEL) { + node_id = base.offset() ^ label; + if (label < max_ptr[i]) { + buf_.push_back(node_id); + } else if (label > max_ptr[i]) { + return; + } else { + break; + } + label = trie_->ith_node(node_id).sibling(); + } + if (label == INVALID_LABEL) { + return; + } + } + + const Base base = trie_->ith_node(node_id).base(); + if (base.is_terminal()) { + Key key; + trie_->ith_key(base.key_id(), &key); + if ((key.length() == max_length) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { + buf_.push_back(node_id | POST_ORDER_FLAG); + } + return; + } + + UInt16 label = trie_->ith_node(node_id).child(); + if ((label == TERMINAL_LABEL) && + ((flags_ & EXCEPT_UPPER_BOUND) != EXCEPT_UPPER_BOUND)) { + buf_.push_back((base.offset() ^ label) | POST_ORDER_FLAG); + } +} + +void KeyCursor::swap(KeyCursor *cursor) { + std::swap(trie_, cursor->trie_); + std::swap(offset_, cursor->offset_); + std::swap(limit_, cursor->limit_); + std::swap(flags_, cursor->flags_); + buf_.swap(cursor->buf_); + std::swap(count_, cursor->count_); + std::swap(max_count_, cursor->max_count_); + std::swap(end_, cursor->end_); + std::swap(end_ptr_, cursor->end_ptr_); + std::swap(end_length_, cursor->end_length_); +} + +bool KeyCursor::ascending_next(Key *key) { + while (!buf_.empty()) { + const UInt32 node_id = buf_.back(); + buf_.pop_back(); + + const Node node = trie_->ith_node(node_id); + if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + if (node.child() != INVALID_LABEL) { + buf_.push_back(node.offset() ^ node.child()); + } + + if (node.is_terminal()) { + Key temp_key; + trie_->ith_key(node.key_id(), &temp_key); + if (end_ptr_ != NULL) { + const int result = compare(temp_key, end_ptr_, end_length_, 0); + if ((result > 0) || ((result == 0) && + ((flags_ & EXCEPT_UPPER_BOUND) == EXCEPT_UPPER_BOUND))) { + end_ = true; + return false; + } + } + if (count_++ >= offset_) { + *key = temp_key; + return true; + } + } + } + return false; +} + +bool KeyCursor::descending_next(Key *key) { + while (!buf_.empty()) { + const bool post_order = (buf_.back() & POST_ORDER_FLAG) == POST_ORDER_FLAG; + const UInt32 node_id = buf_.back() & ~POST_ORDER_FLAG; + + const Base base = trie_->ith_node(node_id).base(); + if (post_order) { + buf_.pop_back(); + if (base.is_terminal()) { + Key temp_key; + trie_->ith_key(base.key_id(), &temp_key); + if (end_ptr_ != NULL) { + const int result = compare(temp_key, end_ptr_, end_length_, 0); + if ((result < 0) || ((result == 0) && + ((flags_ & EXCEPT_LOWER_BOUND) == EXCEPT_LOWER_BOUND))) { + end_ = true; + return false; + } + } + if (count_++ >= offset_) { + trie_->ith_key(base.key_id(), key); + return true; + } + } + } else { + buf_.back() |= POST_ORDER_FLAG; + UInt16 label = trie_->ith_node(node_id).child(); + while (label != INVALID_LABEL) { + buf_.push_back(base.offset() ^ label); + label = trie_->ith_node(base.offset() ^ label).sibling(); + } + } + } + return false; +} + +int KeyCursor::compare(const Key &key, + const UInt8 *ptr, UInt32 length, + UInt32 offset) const { + const UInt32 min_length = (key.length() < length) ? key.length() : length; + for (UInt32 i = offset; i < min_length; ++i) { + if (static_cast<UInt8>(key[i]) != ptr[i]) { + return static_cast<UInt8>(key[i]) - ptr[i]; + } + } + if (key.length() < length) { + return -1; + } + return (key.length() > length) ? 1 : 0; +} + +} // namespace grn +} // namespace dat Added: lib/dat/key-cursor.hpp (+79 -0) 100644 =================================================================== --- /dev/null +++ lib/dat/key-cursor.hpp 2011-06-27 00:26:52 +0000 (6315d05) @@ -0,0 +1,79 @@ +#ifndef GRN_DAT_KEY_CURSOR_HPP_ +#define GRN_DAT_KEY_CURSOR_HPP_ + +#include "cursor.hpp" +#include "trie.hpp" + +#include <vector> + +namespace grn { +namespace dat { + +class KeyCursor : public Cursor { + public: + KeyCursor(); + ~KeyCursor(); + + void open(const Trie &trie, + const void *min_ptr, UInt32 min_length, + const void *max_ptr, UInt32 max_length, + UInt32 offset = 0, + UInt32 limit = UINT32_MAX, + UInt32 flags = 0); + + void close(); + + bool next(Key *key); + + UInt32 offset() const { + return offset_; + } + UInt32 limit() const { + return limit_; + } + UInt32 flags() const { + return flags_; + } + + private: + const Trie *trie_; + UInt32 offset_; + UInt32 limit_; + UInt32 flags_; + + std::vector<UInt32> buf_; + UInt32 count_; + UInt32 max_count_; + bool end_; + UInt8 *end_ptr_; + UInt32 end_length_; + + UInt32 fix_flags(UInt32 flags) const; + KeyCursor(const Trie &trie, + UInt32 offset, UInt32 limit, UInt32 flags); + void init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length); + void ascending_init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length); + void descending_init(const UInt8 *min_ptr, UInt32 min_length, + const UInt8 *max_ptr, UInt32 max_length); + void swap(KeyCursor *cursor); + + bool ascending_next(Key *key); + bool descending_next(Key *key); + + int compare(const Key &key, + const UInt8 *ptr, UInt32 length, + UInt32 offset) const; + + static const UInt32 POST_ORDER_FLAG = 0x80000000U; + + // Disallows copy and assignment. + KeyCursor(const KeyCursor &); + KeyCursor &operator=(const KeyCursor &); +}; + +} // namespace grn +} // namespace dat + +#endif // GRN_DAT_KEY_CURSOR_HPP_ Modified: lib/dat/predictive-search-cursor.cpp (+9 -17) =================================================================== --- lib/dat/predictive-search-cursor.cpp 2011-06-24 15:18:34 +0000 (a00db28) +++ lib/dat/predictive-search-cursor.cpp 2011-06-27 00:26:52 +0000 (6558128) @@ -139,18 +139,23 @@ bool PredictiveSearchCursor::ascending_next(Key *key) { while (!buf_.empty()) { const UInt32 node_id = buf_.back(); buf_.pop_back(); - push_next_nodes(node_id); - const Base base = trie_->ith_node(node_id).base(); - if (base.is_terminal()) { + const Node node = trie_->ith_node(node_id); + if (node.sibling() != INVALID_LABEL) { + buf_.push_back(node_id ^ node.label() ^ node.sibling()); + } + + if (node.is_terminal()) { Key temp_key; - trie_->ith_key(base.key_id(), &temp_key); + trie_->ith_key(node.key_id(), &temp_key); if (temp_key.length() >= min_length_) { if (cur_++ >= offset_) { *key = temp_key; return true; } } + } else if (node.child() != INVALID_LABEL) { + buf_.push_back(node.offset() ^ node.child()); } } return false; @@ -186,18 +191,5 @@ bool PredictiveSearchCursor::descending_next(Key *key) { return false; } - -void PredictiveSearchCursor::push_next_nodes(UInt32 node_id) { - if (trie_->ith_node(node_id).sibling() != INVALID_LABEL) { - buf_.push_back(node_id ^ trie_->ith_node(node_id).label() - ^ trie_->ith_node(node_id).sibling()); - } - - if (trie_->ith_node(node_id).child() != INVALID_LABEL) { - buf_.push_back(trie_->ith_node(node_id).offset() - ^ trie_->ith_node(node_id).child()); - } -} - } // namespace grn } // namespace dat Modified: lib/dat/predictive-search-cursor.hpp (+0 -1) =================================================================== --- lib/dat/predictive-search-cursor.hpp 2011-06-24 15:18:34 +0000 (0522560) +++ lib/dat/predictive-search-cursor.hpp 2011-06-27 00:26:52 +0000 (0f30357) @@ -54,7 +54,6 @@ class PredictiveSearchCursor : public Cursor { bool ascending_next(Key *key); bool descending_next(Key *key); - void push_next_nodes(UInt32 node_id); static const UInt32 POST_ORDER_FLAG = 0x80000000U;