[Groonga-commit] groonga/grnxx at c9a36c9 [master] Update Column<Int> to change the internal data type. (#130)

Zurück zum Archiv-Index

susumu.yata null+****@clear*****
Mon Dec 22 16:09:58 JST 2014


susumu.yata	2014-12-22 16:09:58 +0900 (Mon, 22 Dec 2014)

  New Revision: c9a36c9134e6b2a7de93214943878b04f7693bd1
  https://github.com/groonga/grnxx/commit/c9a36c9134e6b2a7de93214943878b04f7693bd1

  Message:
    Update Column<Int> to change the internal data type. (#130)

  Modified files:
    lib/grnxx/impl/column/scalar/int.cpp
    lib/grnxx/impl/column/scalar/int.hpp

  Modified: lib/grnxx/impl/column/scalar/int.cpp (+369 -27)
===================================================================
--- lib/grnxx/impl/column/scalar/int.cpp    2014-12-18 16:36:33 +0900 (1428370)
+++ lib/grnxx/impl/column/scalar/int.cpp    2014-12-22 16:09:58 +0900 (fe28d0f)
@@ -13,7 +13,10 @@ Column<Int>::Column(Table *table,
                     const String &name,
                     const ColumnOptions &options)
     : ColumnBase(table, name, INT_DATA),
-      values_() {
+      value_size_(8),
+      buffer_(nullptr),
+      size_(0),
+      capacity_(0) {
   if (!options.reference_table_name.is_empty()) {
     reference_table_ = table->_db()->find_table(options.reference_table_name);
     if (!reference_table_) {
@@ -22,7 +25,9 @@ Column<Int>::Column(Table *table,
   }
 }
 
-Column<Int>::~Column() {}
+Column<Int>::~Column() {
+  std::free(buffer_);
+}
 
 void Column<Int>::set(Int row_id, const Datum &datum) {
   Int new_value = parse_datum(datum);
@@ -57,9 +62,7 @@ void Column<Int>::set(Int row_id, const Datum &datum) {
     }
   }
   size_t value_id = row_id.raw();
-  if (value_id >= values_.size()) {
-    values_.resize(value_id + 1, Int::na());
-  }
+  reserve(value_id + 1, new_value);
   // Insert the new value into indexes.
   for (size_t i = 0; i < num_indexes(); ++i) try {
     indexes_[i]->insert(row_id, datum);
@@ -69,16 +72,29 @@ void Column<Int>::set(Int row_id, const Datum &datum) {
     }
     throw;
   }
-  values_[value_id] = new_value;
+  switch (value_size_) {
+    case 8: {
+      values_8_[value_id] = static_cast<int8_t>(new_value.raw());
+      break;
+    }
+    case 16: {
+      values_16_[value_id] = static_cast<int16_t>(new_value.raw());
+      break;
+    }
+    case 32: {
+      values_32_[value_id] = static_cast<int32_t>(new_value.raw());
+      break;
+    }
+    default: {
+      values_64_[value_id] = new_value;
+      break;
+    }
+  }
 }
 
 void Column<Int>::get(Int row_id, Datum *datum) const {
   size_t value_id = row_id.raw();
-  if (value_id >= values_.size()) {
-    *datum = Int::na();
-  } else {
-    *datum = values_[value_id];
-  }
+  *datum = (value_id < size_) ? _get(value_id) : Int::na();
 }
 
 bool Column<Int>::contains(const Datum &datum) const {
@@ -90,13 +106,15 @@ bool Column<Int>::contains(const Datum &datum) const {
   size_t valid_size = get_valid_size();
   if (value.is_na()) {
     for (size_t i = 0; i < valid_size; ++i) {
-      if (values_[i].is_na() && table_->_test_row(i)) {
+      // TODO: Improve this loop.
+      if (_get(i).is_na() && table_->_test_row(i)) {
         return true;
       }
     }
   } else {
     for (size_t i = 0; i < valid_size; ++i) {
-      if (values_[i].match(value)) {
+      // TODO: Improve this loop.
+      if (_get(i).match(value)) {
         return true;
       }
     }
@@ -113,13 +131,17 @@ Int Column<Int>::find_one(const Datum &datum) const {
   size_t valid_size = get_valid_size();
   if (value.is_na()) {
     for (size_t i = 0; i < valid_size; ++i) {
-      if (values_[i].is_na() && table_->_test_row(i)) {
+      // TODO: Improve this loop.
+      Int stored_value = _get(i);
+      if (stored_value.is_na() && table_->_test_row(i)) {
         return Int(i);
       }
     }
   } else {
     for (size_t i = 0; i < valid_size; ++i) {
-      if (values_[i].match(value)) {
+      // TODO: Improve this loop.
+      Int stored_value = _get(i);
+      if (stored_value.match(value)) {
         return Int(i);
       }
     }
@@ -144,8 +166,10 @@ void Column<Int>::set_key_attribute() {
     std::unordered_set<int64_t> set;
     size_t valid_size = get_valid_size();
     for (size_t i = 0; i < valid_size; ++i) try {
-      if (!values_[i].is_na()) {
-        if (!set.insert(values_[i].raw()).second) {
+      // TODO: Improve this loop.
+      Int value = _get(i);
+      if (!value.is_na()) {
+        if (!set.insert(value.raw()).second) {
           throw "Key duplicate";  // TODO
         }
       }
@@ -171,10 +195,8 @@ void Column<Int>::set_key(Int row_id, const Datum &key) {
     throw "Key already exists";  // TODO
   }
   size_t value_id = row_id.raw();
-  if (value_id >= values_.size()) {
-    values_.resize(value_id + 1, Int::na());
-  }
   Int value = parse_datum(key);
+  reserve(value_id + 1, value);
   // Update indexes if exist.
   for (size_t i = 0; i < num_indexes(); ++i) try {
     indexes_[i]->insert(row_id, value);
@@ -184,7 +206,24 @@ void Column<Int>::set_key(Int row_id, const Datum &key) {
     }
     throw;
   }
-  values_[value_id] = value;
+  switch (value_size_) {
+    case 8: {
+      values_8_[value_id] = static_cast<int8_t>(value.raw());
+      break;
+    }
+    case 16: {
+      values_16_[value_id] = static_cast<int16_t>(value.raw());
+      break;
+    }
+    case 32: {
+      values_32_[value_id] = static_cast<int32_t>(value.raw());
+      break;
+    }
+    default: {
+      values_64_[value_id] = value;
+      break;
+    }
+  }
 }
 
 void Column<Int>::unset(Int row_id) {
@@ -194,7 +233,24 @@ void Column<Int>::unset(Int row_id) {
     for (size_t i = 0; i < num_indexes(); ++i) {
       indexes_[i]->remove(row_id, value);
     }
-    values_[row_id.raw()] = Int::na();
+    switch (value_size_) {
+      case 8: {
+        values_8_[row_id.raw()] = na_value_8();
+        break;
+      }
+      case 16: {
+        values_16_[row_id.raw()] = na_value_16();
+        break;
+      }
+      case 32: {
+        values_32_[row_id.raw()] = na_value_32();
+        break;
+      }
+      default: {
+        values_64_[row_id.raw()] = Int::na();
+        break;
+      }
+    }
   }
 }
 
@@ -202,8 +258,41 @@ void Column<Int>::read(ArrayCRef<Record> records, ArrayRef<Int> values) const {
   if (records.size() != values.size()) {
     throw "Data size conflict";  // TODO
   }
-  for (size_t i = 0; i < records.size(); ++i) {
-    values.set(i, get(records[i].row_id));
+  switch (value_size_) {
+    case 8: {
+      for (size_t i = 0; i < records.size(); ++i) {
+        size_t value_id = records[i].row_id.raw();
+        values[i] = ((value_id < size_) &&
+                     (values_8_[value_id] != na_value_8())) ?
+                    Int(values_8_[value_id]) : Int::na();
+      }
+      break;
+    }
+    case 16: {
+      for (size_t i = 0; i < records.size(); ++i) {
+        size_t value_id = records[i].row_id.raw();
+        values[i] = ((value_id < size_) &&
+                     (values_16_[value_id] != na_value_16())) ?
+                    Int(values_16_[value_id]) : Int::na();
+      }
+      break;
+    }
+    case 32: {
+      for (size_t i = 0; i < records.size(); ++i) {
+        size_t value_id = records[i].row_id.raw();
+        values[i] = ((value_id < size_) &&
+                     (values_32_[value_id] != na_value_32())) ?
+                    Int(values_32_[value_id]) : Int::na();
+      }
+      break;
+    }
+    default: {
+      for (size_t i = 0; i < records.size(); ++i) {
+        size_t value_id = records[i].row_id.raw();
+        values[i] = (value_id < size_) ? values_64_[value_id] : Int::na();
+      }
+      break;
+    }
   }
 }
 
@@ -259,10 +348,263 @@ size_t Column<Int>::get_valid_size() const {
     return 0;
   }
   size_t table_size = table_->max_row_id().raw() + 1;
-  if (table_size < values_.size()) {
-    return table_size;
+  return (table_size < size_) ? table_size : size_;
+}
+
+void Column<Int>::reserve(size_t size, Int value) {
+  if (!value.is_na()) {
+    int64_t raw = value.raw();
+    switch (value_size_) {
+      case 8: {
+        if ((raw >= min_value_8()) && (raw <= max_value_8())) {
+          // 8 -> 8.
+          if (size > capacity_) {
+            size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+            while (new_capacity < size) {
+              new_capacity *= 2;
+            }
+            void *new_buffer =
+                std::realloc(buffer_, sizeof(int8_t) * new_capacity);
+            if (!new_buffer) {
+              throw "Memory allocation failed";
+            }
+            buffer_ = new_buffer;
+            capacity_ = new_capacity;
+          }
+          if (size > size_) {
+            for (size_t i = size_; i < size; ++i) {
+              values_8_[i] = na_value_8();
+            }
+            size_ = size;
+          }
+        } else if ((raw >= min_value_16()) && (raw <= max_value_16())) {
+          // 8 -> 16.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          int16_t *new_values = static_cast<int16_t *>(
+              std::malloc(sizeof(int16_t) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_8_[i] != na_value_8()) ?
+                            values_8_[i] : na_value_16();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = na_value_16();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 16;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        } else if ((raw >= min_value_32()) && (raw <= max_value_32())) {
+          // 8 -> 32.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          int32_t *new_values = static_cast<int32_t *>(
+              std::malloc(sizeof(int32_t) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_8_[i] != na_value_8()) ?
+                            values_8_[i] : na_value_32();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = na_value_32();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 32;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        } else {
+          // 8 -> 64.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          Int *new_values = static_cast<Int *>(
+              std::malloc(sizeof(Int) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_8_[i] != na_value_8()) ?
+                            Int(values_8_[i]) : Int::na();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = Int::na();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 64;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        }
+        break;
+      }
+      case 16: {
+        if ((raw >= min_value_16()) && (raw <= max_value_16())) {
+          // 16 -> 16.
+          if (size > capacity_) {
+            size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+            while (new_capacity < size) {
+              new_capacity *= 2;
+            }
+            void *new_buffer =
+                std::realloc(buffer_, sizeof(int16_t) * new_capacity);
+            if (!new_buffer) {
+              throw "Memory allocation failed";
+            }
+            buffer_ = new_buffer;
+            capacity_ = new_capacity;
+          }
+          if (size > size_) {
+            for (size_t i = size_; i < size; ++i) {
+              values_16_[i] = na_value_16();
+            }
+            size_ = size;
+          }
+        } else if ((raw >= min_value_32()) && (raw <= max_value_32())) {
+          // 16 -> 32.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          int32_t *new_values = static_cast<int32_t *>(
+              std::malloc(sizeof(int32_t) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_16_[i] != na_value_16()) ?
+                            values_16_[i] : na_value_32();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = na_value_32();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 32;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        } else {
+          // 16 -> 64.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          Int *new_values = static_cast<Int *>(
+              std::malloc(sizeof(Int) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_16_[i] != na_value_16()) ?
+                            Int(values_16_[i]) : Int::na();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = Int::na();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 64;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        }
+        break;
+      }
+      case 32: {
+        if ((raw >= min_value_32()) && (raw <= max_value_32())) {
+          // 32 -> 32.
+          if (size > capacity_) {
+            size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+            while (new_capacity < size) {
+              new_capacity *= 2;
+            }
+            void *new_buffer =
+                std::realloc(buffer_, sizeof(int32_t) * new_capacity);
+            if (!new_buffer) {
+              throw "Memory allocation failed";
+            }
+            buffer_ = new_buffer;
+            capacity_ = new_capacity;
+          }
+          if (size > size_) {
+            for (size_t i = size_; i < size; ++i) {
+              values_32_[i] = na_value_32();
+            }
+            size_ = size;
+          }
+        } else {
+          // 32 -> 64.
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          Int *new_values = static_cast<Int *>(
+              std::malloc(sizeof(Int) * new_capacity));
+          if (!new_values) {
+            throw "Memory allocation failed";
+          }
+          for (size_t i = 0; i < size_; ++i) {
+            new_values[i] = (values_32_[i] != na_value_32()) ?
+                            Int(values_32_[i]) : Int::na();
+          }
+          for (size_t i = size_; i < size; ++i) {
+            new_values[i] = Int::na();
+          }
+          std::free(buffer_);
+          buffer_ = new_values;
+          value_size_ = 64;
+          if (size > size_) {
+            size_ = size;
+          }
+          capacity_ = new_capacity;
+        }
+        break;
+      }
+      default: {
+        // 64 -> 64.
+        if (size > capacity_) {
+          size_t new_capacity = (capacity_ != 0) ? capacity_ : 1;
+          while (new_capacity < size) {
+            new_capacity *= 2;
+          }
+          void *new_buffer =
+              std::realloc(buffer_, sizeof(Int) * new_capacity);
+          if (!new_buffer) {
+            throw "Memory allocation failed";
+          }
+          buffer_ = new_buffer;
+          capacity_ = new_capacity;
+        }
+        if (size > size_) {
+          for (size_t i = size_; i < size; ++i) {
+            values_64_[i] = Int::na();
+          }
+          size_ = size;
+        }
+        break;
+      }
+    }
   }
-  return values_.size();
 }
 
 Int Column<Int>::parse_datum(const Datum &datum) {

  Modified: lib/grnxx/impl/column/scalar/int.hpp (+66 -3)
===================================================================
--- lib/grnxx/impl/column/scalar/int.hpp    2014-12-18 16:36:33 +0900 (9e4a4ac)
+++ lib/grnxx/impl/column/scalar/int.hpp    2014-12-22 16:09:58 +0900 (a37be53)
@@ -39,10 +39,10 @@ class Column<Int> : public ColumnBase {
   // If "row_id" is invalid, returns N/A.
   Int get(Int row_id) const {
     size_t value_id = row_id.raw();
-    if (value_id >= values_.size()) {
+    if (value_id >= size_) {
       return Int::na();
     }
-    return values_[value_id];
+    return _get(value_id);
   }
   // Read values.
   //
@@ -50,16 +50,79 @@ class Column<Int> : public ColumnBase {
   void read(ArrayCRef<Record> records, ArrayRef<Int> values) const;
 
  private:
-  Array<Int> values_;
+  size_t value_size_;
+  union {
+    void *buffer_;
+    int8_t *values_8_;
+    int16_t *values_16_;
+    int32_t *values_32_;
+    Int *values_64_;
+  };
+  size_t size_;
+  size_t capacity_;
+
+  // Return the stored value.
+  Int _get(size_t i) const {
+    switch (value_size_) {
+      case 8: {
+        return (values_8_[i] != na_value_8()) ?
+               Int(values_8_[i]) : Int::na();
+      }
+      case 16: {
+        return (values_16_[i] != na_value_16()) ?
+               Int(values_16_[i]) : Int::na();
+      }
+      case 32: {
+        return (values_32_[i] != na_value_32()) ?
+               Int(values_32_[i]) : Int::na();
+      }
+      default: {
+        return values_64_[i];
+      }
+    }
+  }
 
   // Return the active column size.
   size_t get_valid_size() const;
 
+  // Reserve memory for a new value.
+  void reserve(size_t size, Int value);
+
   // Parse "datum" as Int.
   //
   // On success, returns the result.
   // On failure, throws an exception.
   static Int parse_datum(const Datum &datum);
+
+  static constexpr int8_t na_value_8() {
+    return std::numeric_limits<int8_t>::min();
+  }
+  static constexpr int8_t min_value_8() {
+    return std::numeric_limits<int8_t>::min() + 1;
+  }
+  static constexpr int8_t max_value_8() {
+    return std::numeric_limits<int8_t>::max();
+  }
+
+  static constexpr int16_t na_value_16() {
+    return std::numeric_limits<int16_t>::min();
+  }
+  static constexpr int16_t min_value_16() {
+    return std::numeric_limits<int16_t>::min() + 1;
+  }
+  static constexpr int16_t max_value_16() {
+    return std::numeric_limits<int16_t>::max();
+  }
+
+  static constexpr int32_t na_value_32() {
+    return std::numeric_limits<int32_t>::min();
+  }
+  static constexpr int32_t min_value_32() {
+    return std::numeric_limits<int32_t>::min() + 1;
+  }
+  static constexpr int32_t max_value_32() {
+    return std::numeric_limits<int32_t>::max();
+  }
 };
 
 }  // namespace impl
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Zurück zum Archiv-Index