susumu.yata
null+****@clear*****
Fri Nov 28 15:08:25 JST 2014
susumu.yata 2014-11-28 15:08:25 +0900 (Fri, 28 Nov 2014) New Revision: 61f75f5fd13c709d50ac5cc8c58a1e11efae7edb https://github.com/groonga/grnxx/commit/61f75f5fd13c709d50ac5cc8c58a1e11efae7edb Message: Implement Index for Int, Float, and Text. (#121) Modified files: include/grnxx/index.hpp lib/grnxx/impl/column/base.cpp lib/grnxx/impl/index.cpp lib/grnxx/impl/index.hpp Modified: include/grnxx/index.hpp (+4 -4) =================================================================== --- include/grnxx/index.hpp 2014-11-27 22:58:44 +0900 (b18df4a) +++ include/grnxx/index.hpp 2014-11-28 15:08:25 +0900 (c6bbf81) @@ -99,16 +99,16 @@ class Index { // Remove an entry. // // On failure, throws an exception. - virtual bool remove(Int row_id, const Datum &value) = 0; + virtual void remove(Int row_id, const Datum &value) = 0; // Return whether "value" is registered or not. - virtual bool contains(const Datum &value) const; + virtual bool contains(const Datum &value) const = 0; // Find "datum". // // If found, returns the row ID. // If not found, returns N/A. - virtual Int find_one(const Datum &value) const; + virtual Int find_one(const Datum &value) const = 0; // Create a cursor to get records. // @@ -116,7 +116,7 @@ class Index { // On failure, throws an exception. virtual std::unique_ptr<Cursor> find( const Datum &value, - const CursorOptions &options = CursorOptions()) const; + const CursorOptions &options = CursorOptions()) const = 0; protected: virtual ~Index() = default; Modified: lib/grnxx/impl/column/base.cpp (+26 -32) =================================================================== --- lib/grnxx/impl/column/base.cpp 2014-11-27 22:58:44 +0900 (667c17d) +++ lib/grnxx/impl/column/base.cpp 2014-11-28 15:08:25 +0900 (0e8ff66) @@ -33,44 +33,38 @@ Index *ColumnBase::create_index( const String &name, IndexType type, const IndexOptions &options) { - throw "Not supported yet"; // TODO - -// if (find_index(name)) { -// throw "Index already exists"; // TODO -// } -// indexes_.reserve(indexes_.size() + 1); -// std::unique_ptr<Index> new_index = Index::create(this, name, type, options); -// indexes_.push_back(std::move(new_index)); -// return indexes_.back().get(); + if (find_index(name)) { + throw "Index already exists"; // TODO + } + indexes_.reserve(indexes_.size() + 1); + std::unique_ptr<Index> new_index(Index::create(this, name, type, options)); + indexes_.push_back(std::move(new_index)); + return indexes_.back().get(); } void ColumnBase::remove_index(const String &name) { - throw "Not supported yet"; // TODO - -// size_t index_id; -// if (!find_index_with_id(name, &index_id)) { -// throw "Index not found"; // TODO -// } -// if (!indexes_[index_id]->is_removable()) { -// throw "Index not removable"; // TODO -// } -// indexes_.erase(index_id); + size_t index_id; + if (!find_index_with_id(name, &index_id)) { + throw "Index not found"; // TODO + } + if (!indexes_[index_id]->is_removable()) { + throw "Index not removable"; // TODO + } + indexes_.erase(index_id); } void ColumnBase::rename_index(const String &name, const String &new_name) { - throw "Not supported yet"; // TODO - -// size_t index_id; -// if (!find_index_with_id(name, &index_id)) { -// throw "Index not found"; // TODO -// } -// if (name == new_name) { -// return; -// } -// if (find_index(new_name)) { -// throw "Index already exists"; // TODO -// } -// indexes_[index_id]->rename(new_name); + size_t index_id; + if (!find_index_with_id(name, &index_id)) { + throw "Index not found"; // TODO + } + if (name == new_name) { + return; + } + if (find_index(new_name)) { + throw "Index already exists"; // TODO + } + indexes_[index_id]->rename(new_name); } void ColumnBase::reorder_index(const String &name, const String &prev_name) { Modified: lib/grnxx/impl/index.cpp (+487 -0) =================================================================== --- lib/grnxx/impl/index.cpp 2014-11-27 22:58:44 +0900 (a9c5b36) +++ lib/grnxx/impl/index.cpp 2014-11-28 15:08:25 +0900 (791df7e) @@ -1,7 +1,494 @@ #include "grnxx/impl/index.hpp" +#include <map> +#include <set> + +#include "grnxx/impl/column.hpp" +#include "grnxx/impl/cursor.hpp" + namespace grnxx { namespace impl { +namespace index { + +// TODO: These are test implementations. + +// -- IteratorCursor -- + +template <typename T> +class IteratorCursor : public Cursor { + public: + using Iterator = T; + + IteratorCursor(Iterator begin, Iterator end, size_t offset, size_t limit) + : Cursor(), + it_(begin), + end_(end), + offset_(offset), + limit_(limit) {} + ~IteratorCursor() = default; + + size_t read(ArrayRef<Record> records); + + private: + Iterator it_; + Iterator end_; + size_t offset_; + size_t limit_; +}; + +template <typename T> +size_t IteratorCursor<T>::read(ArrayRef<Record> records) { + size_t max_count = records.size(); + if (max_count > limit_) { + max_count = limit_; + } + if (max_count <= 0) { + return 0; + } + size_t count = 0; + while ((offset_ > 0) && (it_ != end_)) { + ++it_; + --offset_; + } + while ((count < max_count) && (it_ != end_)) { + records[count] = Record(*it_, Float(0.0)); + ++count; + ++it_; + } + limit_ -= count; + return count; +} + +// Helper function to create an iterator cursor. +template <typename T> +std::unique_ptr<Cursor> create_iterator_cursor(T begin, + T end, + size_t offset, + size_t limit) { + return std::unique_ptr<Cursor>( + new IteratorCursor<T>(begin, end, offset, limit)); +} + +// TODO: It's not clear that a reverse cursor should return row IDs in +// reverse order or not. +// +// Helper function to create a reverse iterator cursor. +template <typename T> +std::unique_ptr<Cursor> create_reverse_iterator_cursor(T begin, + T end, + size_t offset, + size_t limit) { + using ReverseIterator = std::reverse_iterator<T>; + return std::unique_ptr<Cursor>( + new IteratorCursor<ReverseIterator>(ReverseIterator(end), + ReverseIterator(begin), + offset, + limit)); +} + +template <typename T> class TreeIndex; + +template <> +class TreeIndex<Int> : public Index { + public: + struct Less { + bool operator()(Int lhs, Int rhs) const { + return lhs.raw() < rhs.raw(); + } + }; + + using Value = Int; + using Set = std::set<Int, Less>; + using Map = std::map<Value, Set, Less>; + + TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options); + ~TreeIndex() = default; + + IndexType type() const { + return TREE_INDEX; + } + + void insert(Int row_id, const Datum &value); + void remove(Int row_id, const Datum &value); + + std::unique_ptr<Cursor> find(const Datum &datum, + const CursorOptions &options) const; + + private: + mutable Map map_; +}; + +TreeIndex<Int>::TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options) + : Index(column, name), + map_() { + auto cursor = column->table()->create_cursor(); + auto typed_column = static_cast<Column<Int> *>(column); + Array<Record> records; + Array<Value> values; + for ( ; ; ) { + size_t count = cursor->read(1024, &records); + if (count == 0) { + break; + } + values.resize(records.size()); + typed_column->read(records, values.ref()); + for (size_t i = 0; i < count; ++i) { + insert(records[i].row_id, values[i]); + } + records.clear(); + } +} + +void TreeIndex<Int>::insert(Int row_id, const Datum &value) { + auto result = map_[value.as_int()].insert(row_id); + if (!result.second) { + throw "Entry already exists"; // TODO + } +} + +void TreeIndex<Int>::remove(Int row_id, const Datum &value) { + auto map_it = map_.find(value.as_int()); + if (map_it == map_.end()) { + throw "Entry not found"; // TODO + } + auto set_it = map_it->second.find(row_id); + if (set_it == map_it->second.end()) { + throw "Entry not found"; // TODO + } + map_it->second.erase(set_it); + if (map_it->second.size() == 0) { + map_.erase(map_it); + } +} + +std::unique_ptr<Cursor> TreeIndex<Int>::find( + const Datum &datum, + const CursorOptions &options) const { + if (datum.type() != INT_DATA) { + throw "Data type conflict"; // TODO + } + auto map_it = map_.find(datum.as_int()); + if (map_it == map_.end()) { + return std::unique_ptr<Cursor>(new EmptyCursor); + } else { + auto set_begin = map_it->second.begin(); + auto set_end = map_it->second.end(); + if (options.order_type == CURSOR_REGULAR_ORDER) { + return create_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } else { + return create_reverse_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } + } +} + +template <> +class TreeIndex<Float> : public Index { + public: + struct Less { + bool operator()(Int lhs, Int rhs) const { + return lhs.raw() < rhs.raw(); + } + bool operator()(Float lhs, Float rhs) const { + return lhs.raw() < rhs.raw(); + } + }; + + using Value = Float; + using Set = std::set<Int, Less>; + using Map = std::map<Value, Set, Less>; + + TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options); + ~TreeIndex() = default; + + IndexType type() const { + return TREE_INDEX; + } + + void insert(Int row_id, const Datum &value); + void remove(Int row_id, const Datum &value); + + std::unique_ptr<Cursor> find(const Datum &datum, + const CursorOptions &options) const; + + private: + mutable Map map_; +}; + +TreeIndex<Float>::TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options) + : Index(column, name), + map_() { + auto cursor = column->table()->create_cursor(); + auto typed_column = static_cast<Column<Float> *>(column); + Array<Record> records; + Array<Value> values; + for ( ; ; ) { + size_t count = cursor->read(1024, &records); + if (count == 0) { + break; + } + values.resize(records.size()); + typed_column->read(records, values.ref()); + for (size_t i = 0; i < count; ++i) { + insert(records[i].row_id, values[i]); + } + records.clear(); + } +} + +void TreeIndex<Float>::insert(Int row_id, const Datum &value) { + auto result = map_[value.as_float()].insert(row_id); + if (!result.second) { + throw "Entry already exists"; // TODO + } +} + +void TreeIndex<Float>::remove(Int row_id, const Datum &value) { + auto map_it = map_.find(value.as_float()); + if (map_it == map_.end()) { + throw "Entry not found"; // TODO + } + auto set_it = map_it->second.find(row_id); + if (set_it == map_it->second.end()) { + throw "Entry not found"; // TODO + } + map_it->second.erase(set_it); + if (map_it->second.size() == 0) { + map_.erase(map_it); + } +} + +std::unique_ptr<Cursor> TreeIndex<Float>::find( + const Datum &datum, + const CursorOptions &options) const { + if (datum.type() != FLOAT_DATA) { + throw "Data type conflict"; // TODO + } + auto map_it = map_.find(datum.as_float()); + if (map_it == map_.end()) { + return std::unique_ptr<Cursor>(new EmptyCursor); + } else { + auto set_begin = map_it->second.begin(); + auto set_end = map_it->second.end(); + if (options.order_type == CURSOR_REGULAR_ORDER) { + return create_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } else { + return create_reverse_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } + } +} + +template <> +class TreeIndex<Text> : public Index { + public: + struct Less { + bool operator()(Int lhs, Int rhs) const { + return lhs.raw() < rhs.raw(); + } + }; + + using Value = Text; + using Set = std::set<Int, Less>; + using Map = std::map<String, Set>; + + TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options); + ~TreeIndex() = default; + + IndexType type() const { + return TREE_INDEX; + } + + void insert(Int row_id, const Datum &value); + void remove(Int row_id, const Datum &value); + + std::unique_ptr<Cursor> find(const Datum &datum, + const CursorOptions &options) const; + + private: + mutable Map map_; +}; + +TreeIndex<Text>::TreeIndex(ColumnBase *column, + const String &name, + const IndexOptions &options) + : Index(column, name), + map_() { + auto cursor = column->table()->create_cursor(); + auto typed_column = static_cast<Column<Text> *>(column); + Array<Record> records; + Array<Value> values; + for ( ; ; ) { + size_t count = cursor->read(1024, &records); + if (count == 0) { + break; + } + values.resize(records.size()); + typed_column->read(records, values.ref()); + for (size_t i = 0; i < count; ++i) { + insert(records[i].row_id, values[i]); + } + records.clear(); + } +} + +void TreeIndex<Text>::insert(Int row_id, const Datum &value) { + Text text = value.as_text(); + String string; + string.assign(text.raw_data(), text.raw_size()); + auto result = map_[std::move(string)].insert(row_id); + if (!result.second) { + throw "Entry already exists"; // TODO + } +} + +void TreeIndex<Text>::remove(Int row_id, const Datum &value) { + Text text = value.as_text(); + String string(text.raw_data(), text.raw_size()); + auto map_it = map_.find(string); + if (map_it == map_.end()) { + throw "Entry not found"; // TODO + } + auto set_it = map_it->second.find(row_id); + if (set_it == map_it->second.end()) { + throw "Entry not found"; // TODO + } + map_it->second.erase(set_it); + if (map_it->second.size() == 0) { + map_.erase(map_it); + } +} + +std::unique_ptr<Cursor> TreeIndex<Text>::find( + const Datum &datum, + const CursorOptions &options) const { + if (datum.type() != TEXT_DATA) { + throw "Data type conflict"; // TODO + } + Text text = datum.as_text(); + String string(text.raw_data(), text.raw_size()); + auto map_it = map_.find(string); + if (map_it == map_.end()) { + return std::unique_ptr<Cursor>(new EmptyCursor); + } else { + auto set_begin = map_it->second.begin(); + auto set_end = map_it->second.end(); + if (options.order_type == CURSOR_REGULAR_ORDER) { + return create_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } else { + return create_reverse_iterator_cursor(set_begin, + set_end, + options.offset, + options.limit); + } + } +} + +//std::unique_ptr<Cursor> TreeIndex<Text>::find( +// const Datum &datum, +// const CursorOptions &options) const; + +} // namespace index + +using namespace index; + +Index::Index(ColumnBase *column, const String &name) + : column_(column), + name_(name.clone()) {} + +Index::~Index() {} + +ColumnInterface *Index::column() const { + return column_; +} + +bool Index::contains(const Datum &value) const { + return !find_one(value).is_na(); +} + +Int Index::find_one(const Datum &value) const { + // TODO: This function should not fail, so Cursor should not be used. + auto cursor = find(value); + Array<Record> records; + auto count = cursor->read(1, &records); + if (count == 0) { + Int::na(); + } + return records[0].row_id; +} + +std::unique_ptr<Cursor> Index::find( + const Datum &value, + const CursorOptions &options) const { + throw "Not supported yet"; // TODO +} + +Index *Index::create(ColumnBase *column, + const String &name, + IndexType type, + const IndexOptions &options) try { + if (type != TREE_INDEX) { + throw "Not supoprted yet"; // TODO + } + switch (column->data_type()) { + case BOOL_DATA: { + throw "Not supported yet"; // TODO + } + case INT_DATA: { + return new TreeIndex<Int>(column, name, options); + } + case FLOAT_DATA: { + return new TreeIndex<Float>(column, name, options); + } + case GEO_POINT_DATA: { + throw "Not supported yet"; // TODO + } + case TEXT_DATA: { + return new TreeIndex<Text>(column, name, options); + } + case BOOL_VECTOR_DATA: + case INT_VECTOR_DATA: + case FLOAT_VECTOR_DATA: + case GEO_POINT_VECTOR_DATA: + case TEXT_VECTOR_DATA: + default: { + throw "Not supported yet"; // TODO + } + } +} catch (const std::bad_alloc &) { + throw "Memory allocation failed"; // TODO +} + +void Index::rename(const String &new_name) { + name_.assign(new_name); +} + +bool Index::is_removable() const { + return true; +} } // namespace impl } // namespace grnxx Modified: lib/grnxx/impl/index.hpp (+18 -10) =================================================================== --- lib/grnxx/impl/index.hpp 2014-11-27 22:58:44 +0900 (77edc0f) +++ lib/grnxx/impl/index.hpp 2014-11-28 15:08:25 +0900 (edc6cf4) @@ -12,34 +12,42 @@ namespace impl { using ColumnInterface = grnxx::Column; using IndexInterface = grnxx::Index; +class ColumnBase; + class Index : public IndexInterface { public: // -- Public API (grnxx/index.hpp) -- - Index(Column *column, const String &name); + Index(ColumnBase *column, const String &name); virtual ~Index(); - // Return the owner column. ColumnInterface *column() const; - // Return the name. String name() const { return name_; } + virtual void insert(Int row_id, const Datum &value) = 0; + virtual void remove(Int row_id, const Datum &value) = 0; + + virtual bool contains(const Datum &value) const; + virtual Int find_one(const Datum &value) const; + virtual std::unique_ptr<Cursor> find( + const Datum &value, + const CursorOptions &options = CursorOptions()) const; + // -- Internal API -- // Create a new index. // // On success, returns the column. // On failure, throws an exception. - static std::unique_ptr<Index> create( - Column *column, - const String &name, - IndexType type, - const IndexOptions &options); + static Index *create(ColumnBase *column, + const String &name, + IndexType type, + const IndexOptions &options); // Return the owner table. - Column *_column() const { + ColumnBase *_column() const { return column_; } @@ -52,7 +60,7 @@ class Index : public IndexInterface { bool is_removable() const; private: - Column *column_; + ColumnBase *column_; String name_; }; -------------- next part -------------- HTML����������������������������...Download