[Groonga-commit] groonga/grnxx at 306b6c3 [master] Add TreeIndex<Text>. (#52)

Zurück zum Archiv-Index

susumu.yata null+****@clear*****
Tue Sep 16 18:39:10 JST 2014


susumu.yata	2014-09-16 18:39:10 +0900 (Tue, 16 Sep 2014)

  New Revision: 306b6c336cef24f5397f8aaaf31a6696a2707067
  https://github.com/groonga/grnxx/commit/306b6c336cef24f5397f8aaaf31a6696a2707067

  Message:
    Add TreeIndex<Text>. (#52)

  Modified files:
    lib/grnxx/column.cpp
    lib/grnxx/index.cpp
    lib/grnxx/tree_index.hpp

  Modified: lib/grnxx/column.cpp (+45 -20)
===================================================================
--- lib/grnxx/column.cpp    2014-09-16 15:39:54 +0900 (d94736e)
+++ lib/grnxx/column.cpp    2014-09-16 18:39:10 +0900 (d0b3cca)
@@ -426,29 +426,42 @@ bool ColumnImpl<Text>::set(Error *error, Int row_id, const Datum &datum) {
   if (!table_->test_row(error, row_id)) {
     return false;
   }
-  Text value = datum.force_text();
-  if (value.size() == 0) {
-    headers_[row_id] = 0;
-    return true;
-  }
-  Int offset = bodies_.size();
-  if (value.size() < 0xFFFF) {
-    if (!bodies_.resize(error, offset + value.size())) {
-      return false;
+  Text old_value = get(row_id);
+  Text new_value = datum.force_text();
+  if (new_value != old_value) {
+    for (Int i = 0; i < num_indexes(); ++i) {
+      if (!indexes_[i]->insert(error, row_id, datum)) {
+        for (Int j = 0; j < i; ++i) {
+          indexes_[j]->remove(nullptr, row_id, datum);
+        }
+        return false;
+      }
     }
-    std::memcpy(&bodies_[offset], value.data(), value.size());
-    headers_[row_id] = (offset << 16) | value.size();
-  } else {
-    // The size of a long text is stored in front of the body.
-    if ((offset % sizeof(Int)) != 0) {
-      offset += sizeof(Int) - (offset % sizeof(Int));
+    Int offset = bodies_.size();
+    UInt new_header;
+    if (new_value.size() < 0xFFFF) {
+      if (!bodies_.resize(error, offset + new_value.size())) {
+        return false;
+      }
+      std::memcpy(&bodies_[offset], new_value.data(), new_value.size());
+      new_header = (offset << 16) | new_value.size();
+    } else {
+      // The size of a long text is stored in front of the body.
+      if ((offset % sizeof(Int)) != 0) {
+        offset += sizeof(Int) - (offset % sizeof(Int));
+      }
+      if (!bodies_.resize(error, offset + sizeof(Int) + new_value.size())) {
+        return false;
+      }
+      *reinterpret_cast<Int *>(&bodies_[offset]) = new_value.size();
+      std::memcpy(&bodies_[offset + sizeof(Int)],
+                  new_value.data(), new_value.size());
+      new_header = (offset << 16) | 0xFFFF;
     }
-    if (!bodies_.resize(error, offset + sizeof(Int) + value.size())) {
-      return false;
+    for (Int i = 0; i < num_indexes(); ++i) {
+      indexes_[i]->remove(nullptr, row_id, old_value);
     }
-    *reinterpret_cast<Int *>(&bodies_[offset]) = value.size();
-    std::memcpy(&bodies_[offset + sizeof(Int)], value.data(), value.size());
-    headers_[row_id] = (offset << 16) | 0xFFFF;
+    headers_[row_id] = new_header;
   }
   return true;
 }
@@ -488,11 +501,23 @@ bool ColumnImpl<Text>::set_default_value(Error *error, Int row_id) {
       return false;
     }
   }
+  Text value = TypeTraits<Text>::default_value();
+  for (Int i = 0; i < num_indexes(); ++i) {
+    if (!indexes_[i]->insert(error, row_id, value)) {
+      for (Int j = 0; j < i; ++j) {
+        indexes_[j]->remove(nullptr, row_id, value);
+      }
+      return false;
+    }
+  }
   headers_[row_id] = 0;
   return true;
 }
 
 void ColumnImpl<Text>::unset(Int row_id) {
+  for (Int i = 0; i < num_indexes(); ++i) {
+    indexes_[i]->remove(nullptr, row_id, get(row_id));
+  }
   headers_[row_id] = 0;
 }
 

  Modified: lib/grnxx/index.cpp (+170 -2)
===================================================================
--- lib/grnxx/index.cpp    2014-09-16 15:39:54 +0900 (6e6fdf0)
+++ lib/grnxx/index.cpp    2014-09-16 18:39:10 +0900 (4af5a41)
@@ -65,8 +65,13 @@ unique_ptr<Index> Index::create(Error *error,
     case FLOAT_DATA: {
       return TreeIndex<Float>::create(error, column, name, options);
     }
-    case GEO_POINT_DATA:
-    case TEXT_DATA:
+    case GEO_POINT_DATA: {
+      GRNXX_ERROR_SET(error, NOT_SUPPORTED_YET, "Not supoprted yet");
+      return nullptr;
+    }
+    case TEXT_DATA: {
+      return TreeIndex<Text>::create(error, column, name, options);
+    }
     case BOOL_VECTOR_DATA:
     case INT_VECTOR_DATA:
     case FLOAT_VECTOR_DATA:
@@ -832,4 +837,167 @@ bool TreeIndex<Float>::remove(Error *, Int row_id, const Datum &value) {
   return true;
 }
 
+// -- TreeIndex<Text> --
+
+unique_ptr<TreeIndex<Text>> TreeIndex<Text>::create(
+    Error *error,
+    Column *column,
+    String name,
+    const IndexOptions &options) {
+  unique_ptr<TreeIndex> index(new (nothrow) TreeIndex);
+  if (!index) {
+    GRNXX_ERROR_SET(error, NO_MEMORY, "Memory allocation failed");
+    return nullptr;
+  }
+  if (!index->initialize_base(error, column, name, TREE_INDEX, options)) {
+    return nullptr;
+  }
+  auto cursor = column->table()->create_cursor(error);
+  if (!cursor) {
+    return nullptr;
+  }
+  auto typed_column = static_cast<ColumnImpl<Text> *>(column);
+  Array<Record> records;
+  for ( ; ; ) {
+    Int count = cursor->read(error, 1024, &records);
+    if (count < 0) {
+      return nullptr;
+    } else if (count == 0) {
+      break;
+    }
+    for (Int i = 0; i < count; ++i) {
+      Int row_id = records.get_row_id(i);
+      if (!index->insert(error, row_id, typed_column->get(row_id))) {
+        return nullptr;
+      }
+    }
+    records.clear();
+  }
+  return index;
+}
+
+TreeIndex<Text>::~TreeIndex() {}
+
+unique_ptr<Cursor> TreeIndex<Text>::create_cursor(
+    Error *error,
+    const Datum &datum,
+    const CursorOptions &options) const {
+  if (datum.type() != TEXT_DATA) {
+    GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict");
+    return nullptr;
+  }
+
+  Text text = datum.force_text();
+  std::string string(text.data(), text.size());
+  auto map_it = map_.find(string);
+  if (map_it == map_.end()) {
+    return create_empty_cursor(error, column_->table());
+  }
+  auto set_begin = map_it->second.begin();
+  auto set_end = map_it->second.end();
+  if (options.order_type == REGULAR_ORDER) {
+    return create_iterator_cursor(error,
+                                  column_->table(),
+                                  set_begin,
+                                  set_end,
+                                  options.offset,
+                                  options.limit);
+  } else {
+    return create_reverse_iterator_cursor(error,
+                                          column_->table(),
+                                          set_begin,
+                                          set_end,
+                                          options.offset,
+                                          options.limit);
+  }
+}
+
+unique_ptr<Cursor> TreeIndex<Text>::create_cursor(
+    Error *error,
+    const IndexRange &range,
+    const CursorOptions &options) const {
+  std::string lower_bound_value = "";
+  if (range.has_lower_bound()) {
+    const EndPoint &lower_bound = range.lower_bound();
+    if (lower_bound.value.type() != TEXT_DATA) {
+      GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict");
+      return nullptr;
+    }
+    Text text = lower_bound.value.force_text();
+    lower_bound_value.assign(text.data(), text.size());
+    if (lower_bound.type == EXCLUSIVE_END_POINT) {
+      lower_bound_value += '\0';
+    }
+  }
+
+  std::string upper_bound_value = "";
+  if (range.has_upper_bound()) {
+    const EndPoint &upper_bound = range.upper_bound();
+    if (upper_bound.value.type() != TEXT_DATA) {
+      GRNXX_ERROR_SET(error, INVALID_ARGUMENT, "Data type conflict");
+      return nullptr;
+    }
+    Text text = upper_bound.value.force_text();
+    upper_bound_value.assign(text.data(), text.size());
+    if (upper_bound.type == INCLUSIVE_END_POINT) {
+      upper_bound_value += '\0';
+    }
+  }
+
+  if (range.has_upper_bound()) {
+    if (lower_bound_value >= upper_bound_value) {
+      // TODO: Invalid range.
+      return nullptr;
+    }
+  }
+
+  auto begin = map_.lower_bound(lower_bound_value);
+  auto end = range.has_upper_bound() ?
+      map_.lower_bound(upper_bound_value) : map_.end();
+  if (options.order_type == REGULAR_ORDER) {
+    return create_map_set_cursor(error,
+                                 column_->table(),
+                                 begin,
+                                 end,
+                                 options.offset,
+                                 options.limit);
+  } else {
+    return create_reverse_map_set_cursor(error,
+                                         column_->table(),
+                                         begin,
+                                         end,
+                                         options.offset,
+                                         options.limit);
+  }
+}
+
+bool TreeIndex<Text>::insert(Error *, Int row_id, const Datum &value) {
+  Text text = value.force_text();
+  std::string string(text.data(), text.size());
+  auto result = map_[string].insert(row_id);
+  if (!result.second) {
+    // Already exists.
+    return false;
+  }
+  return true;
+}
+
+bool TreeIndex<Text>::remove(Error *, Int row_id, const Datum &value) {
+  Text text = value.force_text();
+  std::string string(text.data(), text.size());
+  auto map_it = map_.find(string);
+  if (map_it == map_.end()) {
+    return false;
+  }
+  auto set_it = map_it->second.find(row_id);
+  if (set_it == map_it->second.end()) {
+    return false;
+  }
+  map_it->second.erase(set_it);
+  if (map_it->second.size() == 0) {
+    map_.erase(map_it);
+  }
+  return true;
+}
+
 }  // namespace grnxx

  Modified: lib/grnxx/tree_index.hpp (+36 -2)
===================================================================
--- lib/grnxx/tree_index.hpp    2014-09-16 15:39:54 +0900 (3f63270)
+++ lib/grnxx/tree_index.hpp    2014-09-16 18:39:10 +0900 (29923f4)
@@ -46,7 +46,7 @@ class TreeIndex<Bool> : public Index {
   TreeIndex() : Index(), map_() {}
 };
 
-// TODO: This is just a test implementation.
+// TODO: This is a test implementation.
 template <>
 class TreeIndex<Int> : public Index {
  public:
@@ -80,7 +80,7 @@ class TreeIndex<Int> : public Index {
   TreeIndex() : Index(), map_() {}
 };
 
-// TODO: This is just a test implementation.
+// TODO: This is a test implementation.
 template <>
 class TreeIndex<Float> : public Index {
  public:
@@ -126,6 +126,40 @@ class TreeIndex<Float> : public Index {
   TreeIndex() : Index(), map_() {}
 };
 
+// TODO: This is a test implementation.
+template <>
+class TreeIndex<Text> : public Index {
+ public:
+  using Value = Text;
+  using Set = std::set<Int>;
+  using Map = std::map<std::string, Set>;
+
+  static unique_ptr<TreeIndex> create(Error *error,
+                                      Column *column,
+                                      String name,
+                                      const IndexOptions &options);
+
+  ~TreeIndex();
+
+  unique_ptr<Cursor> create_cursor(
+      Error *error,
+      const Datum &datum,
+      const CursorOptions &options = CursorOptions()) const;
+
+  unique_ptr<Cursor> create_cursor(
+      Error *error,
+      const IndexRange &range,
+      const CursorOptions &options) const;
+
+  bool insert(Error *error, Int row_id, const Datum &value);
+  bool remove(Error *error, Int row_id, const Datum &value);
+
+ private:
+  mutable Map map_;
+
+  TreeIndex() : Index(), map_() {}
+};
+
 }  // namespace grnxx
 
 #endif  // GRNXX_TREE_INDEX_HPP
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Zurück zum Archiv-Index