Move from dataset_dev branch to here

pull/1076/head
Jesse Lee 5 years ago
parent 2bc3fcb1c1
commit 3c643e7298

@ -18,6 +18,7 @@
#include <atomic>
#include <memory>
#include <utility>
#include <vector>
#include "dataset/util/btree.h"
@ -25,19 +26,20 @@
namespace mindspore {
namespace dataset {
// This is a B+ tree with generated uint64_t value as key.
// Use minKey() function to query the min key.
// Use maxKey() function to query the max key.
// @tparam T
template <typename T>
class AutoIndexObj : public BPlusTree<uint64_t, T> {
/// This is a B+ tree with generated int64_t value as key.
/// Use minKey() function to query the min key.
/// Use maxKey() function to query the max key.
/// @tparam T
template <typename T, typename A = std::allocator<T>>
class AutoIndexObj : public BPlusTree<int64_t, T, A> {
public:
using my_tree = BPlusTree<uint64_t, T>;
using my_tree = BPlusTree<int64_t, T, A>;
using key_type = typename my_tree::key_type;
using value_type = typename my_tree::value_type;
explicit AutoIndexObj(const typename my_tree::value_allocator &alloc = Allocator<T>{std::make_shared<SystemPool>()})
: my_tree::BPlusTree(alloc), inx_(kMinKey) {}
AutoIndexObj() : my_tree::BPlusTree(), inx_(kMinKey) {}
explicit AutoIndexObj(const Allocator<T> &alloc) : my_tree::BPlusTree(alloc), inx_(kMinKey) {}
~AutoIndexObj() = default;
@ -52,6 +54,14 @@ class AutoIndexObj : public BPlusTree<uint64_t, T> {
return my_tree::DoInsert(my_inx, val);
}
Status insert(std::unique_ptr<value_type> &&val, key_type *key = nullptr) {
key_type my_inx = inx_.fetch_add(1);
if (key) {
*key = my_inx;
}
return my_tree::DoInsert(my_inx, std::move(val));
}
// Insert a vector of objects into the tree.
// @param v
// @return

@ -44,12 +44,14 @@ struct BPlusTreeTraits {
static constexpr bool kAppendMode = false;
};
// Implementation of B+ tree
// @tparam K
// @tparam V
// @tparam C
// @tparam T
template <typename K, typename V, typename C = std::less<K>, typename T = BPlusTreeTraits>
/// Implementation of B+ tree
/// @tparam K -- the type of key
/// @tparam V -- the type of value
/// @tparam A -- allocator
/// @tparam C -- comparison class
/// @tparam T -- trait
template <typename K, typename V, typename A = std::allocator<V>, typename C = std::less<K>,
typename T = BPlusTreeTraits>
class BPlusTree {
public:
enum class IndexRc : char {
@ -87,11 +89,13 @@ class BPlusTree {
using key_compare = C;
using slot_type = typename T::slot_type;
using traits = T;
using key_allocator = Allocator<key_type>;
using value_allocator = Allocator<value_type>;
using slot_allocator = Allocator<slot_type>;
using value_allocator = A;
using key_allocator = typename value_allocator::template rebind<key_type>::other;
using slot_allocator = typename value_allocator::template rebind<slot_type>::other;
explicit BPlusTree(const value_allocator &alloc);
BPlusTree();
explicit BPlusTree(const Allocator<V> &alloc);
~BPlusTree() noexcept;
@ -109,10 +113,15 @@ class BPlusTree {
bool empty() const { return (size() == 0); }
// @param key
// @param value
// @return
/// @param key
/// @param value
/// @return
Status DoInsert(const key_type &key, const value_type &value);
Status DoInsert(const key_type &key, std::unique_ptr<value_type> &&value);
// Update a new value for a given key.
std::unique_ptr<value_type> DoUpdate(const key_type &key, const value_type &new_value);
std::unique_ptr<value_type> DoUpdate(const key_type &key, std::unique_ptr<value_type> &&new_value);
void PopulateNumKeys();
@ -144,7 +153,7 @@ class BPlusTree {
virtual ~BaseNode() = default;
protected:
RWLock rw_lock_;
mutable RWLock rw_lock_;
value_allocator alloc_;
private:
@ -267,7 +276,7 @@ class BPlusTree {
// 50/50 split
IndexRc Split(LeafNode *to);
IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::shared_ptr<value_type> value);
IndexRc InsertIntoSlot(LockPathCB *insCB, slot_type slot, const key_type &key, std::unique_ptr<value_type> &&value);
explicit LeafNode(const value_allocator &alloc) : BaseNode::BaseNode(alloc), slotuse_(0) {}
@ -275,11 +284,11 @@ class BPlusTree {
slot_type slot_dir_[traits::kLeafSlots];
key_type keys_[traits::kLeafSlots];
std::shared_ptr<value_type> data_[traits::kLeafSlots];
std::unique_ptr<value_type> data_[traits::kLeafSlots];
slot_type slotuse_;
};
RWLock rw_lock_;
mutable RWLock rw_lock_;
value_allocator alloc_;
// All the leaf nodes. Used by the iterator to traverse all the key/values.
List<LeafNode> leaf_nodes_;
@ -319,8 +328,8 @@ class BPlusTree {
return lo;
}
IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key, std::shared_ptr<value_type> value,
key_type *split_key, LeafNode **split_node);
IndexRc LeafInsertKeyValue(LockPathCB *ins_cb, LeafNode *node, const key_type &key,
std::unique_ptr<value_type> &&value, key_type *split_key, LeafNode **split_node);
IndexRc InnerInsertKeyChild(InnerNode *node, const key_type &key, BaseNode *ptr, key_type *split_key,
InnerNode **split_node);
@ -335,10 +344,11 @@ class BPlusTree {
return child;
}
IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::shared_ptr<value_type> value,
IndexRc InsertKeyValue(LockPathCB *ins_cb, BaseNode *n, const key_type &key, std::unique_ptr<value_type> &&value,
key_type *split_key, BaseNode **split_node);
IndexRc Locate(BaseNode *top, const key_type &key, LeafNode **ln, slot_type *s) const;
IndexRc Locate(RWLock *parent_lock, bool forUpdate, BaseNode *top, const key_type &key, LeafNode **ln,
slot_type *s) const;
public:
class Iterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
@ -346,19 +356,27 @@ class BPlusTree {
using reference = BPlusTree::value_type &;
using pointer = BPlusTree::value_type *;
explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0) {}
explicit Iterator(BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
Iterator(LeafNode *leaf, slot_type slot, bool locked = false) : cur_(leaf), slot_(slot), locked_(locked) {}
~Iterator();
explicit Iterator(const Iterator &);
Iterator &operator=(const Iterator &lhs);
Iterator(LeafNode *leaf, slot_type slot) : cur_(leaf), slot_(slot) {}
Iterator(Iterator &&);
~Iterator() = default;
Iterator &operator=(Iterator &&lhs);
pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
reference operator*() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
const key_type &key() { return cur_->keys_[cur_->slot_dir_[slot_]]; }
const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
const value_type &value() { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
// Prefix++
Iterator &operator++();
@ -379,6 +397,7 @@ class BPlusTree {
private:
typename BPlusTree::LeafNode *cur_;
slot_type slot_;
bool locked_;
};
class ConstIterator : public std::iterator<std::bidirectional_iterator_tag, value_type> {
@ -386,11 +405,20 @@ class BPlusTree {
using reference = BPlusTree::value_type &;
using pointer = BPlusTree::value_type *;
explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0) {}
explicit ConstIterator(const BPlusTree *btree) : cur_(btree->leaf_nodes_.head), slot_(0), locked_(false) {}
~ConstIterator();
ConstIterator(const LeafNode *leaf, slot_type slot, bool locked = false)
: cur_(leaf), slot_(slot), locked_(locked) {}
explicit ConstIterator(const ConstIterator &);
ConstIterator &operator=(const ConstIterator &lhs);
~ConstIterator() = default;
ConstIterator(ConstIterator &&);
ConstIterator(const LeafNode *leaf, slot_type slot) : cur_(leaf), slot_(slot) {}
ConstIterator &operator=(ConstIterator &&lhs);
pointer operator->() const { return cur_->data_[cur_->slot_dir_[slot_]].get(); }
@ -398,7 +426,7 @@ class BPlusTree {
const key_type &key() const { return cur_->keys_[cur_->slot_dir_[slot_]]; }
const value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
value_type &value() const { return *(cur_->data_[cur_->slot_dir_[slot_]].get()); }
// Prefix++
ConstIterator &operator++();
@ -419,6 +447,7 @@ class BPlusTree {
private:
const typename BPlusTree::LeafNode *cur_;
slot_type slot_;
bool locked_;
};
Iterator begin();
@ -435,6 +464,7 @@ class BPlusTree {
// Locate the entry with key
ConstIterator Search(const key_type &key) const;
Iterator Search(const key_type &key);
value_type operator[](key_type key);
};

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

@ -50,7 +50,7 @@ class MindDataTestBPlusTree : public UT::Common {
// Test serial insert.
TEST_F(MindDataTestBPlusTree, Test1) {
Allocator<std::string> alloc(std::make_shared<SystemPool>());
BPlusTree<uint64_t, std::string, std::less<uint64_t>, mytraits> btree(alloc);
BPlusTree<uint64_t, std::string, Allocator<std::string>, std::less<uint64_t>, mytraits> btree(alloc);
Status rc;
for (int i = 0; i < 100; i++) {
uint64_t key = 2 * i;
@ -92,16 +92,16 @@ TEST_F(MindDataTestBPlusTree, Test1) {
}
}
// Test nearch
// Test search
{
MS_LOG(INFO) << "Locate key " << 100 << " Expect found.";
auto it = btree.Search(100);
EXPECT_FALSE(it == btree.cend());
EXPECT_FALSE(it == btree.end());
EXPECT_EQ(it.key(), 100);
EXPECT_EQ(it.value(), "Hello World. I am 100");
MS_LOG(INFO) << "Locate key " << 300 << " Expect not found.";
it = btree.Search(300);
EXPECT_TRUE(it == btree.cend());
EXPECT_TRUE(it == btree.end());
}
// Test duplicate key
@ -114,7 +114,7 @@ TEST_F(MindDataTestBPlusTree, Test1) {
// Test concurrent insert.
TEST_F(MindDataTestBPlusTree, Test2) {
Allocator<std::string> alloc(std::make_shared<SystemPool>());
BPlusTree<uint64_t, std::string, std::less<uint64_t>, mytraits> btree(alloc);
BPlusTree<uint64_t, std::string, Allocator<std::string>, std::less<uint64_t>, mytraits> btree(alloc);
TaskGroup vg;
auto f = [&](int k) -> Status {
TaskManager::FindMe()->Post();
@ -127,10 +127,22 @@ TEST_F(MindDataTestBPlusTree, Test2) {
}
return Status::OK();
};
// Spawn two threads. One insert the odd numbers and the other insert the even numbers just like Test1
auto g = [&](int k) -> Status {
TaskManager::FindMe()->Post();
for (int i = 0; i < 1000; i++) {
uint64_t key = rand() % 10000;;
auto it = btree.Search(key);
}
return Status::OK();
};
// Spawn multiple threads to do insert.
for (int k = 0; k < 100; k++) {
vg.CreateAsyncTask("Concurrent Insert", std::bind(f, k));
}
// Spawn a few threads to do random search.
for (int k = 0; k < 2; k++) {
vg.CreateAsyncTask("Concurrent search", std::bind(g, k));
}
vg.join_all();
EXPECT_EQ(btree.size(), 10000);
@ -158,7 +170,7 @@ TEST_F(MindDataTestBPlusTree, Test2) {
MS_LOG(INFO) << "Locating key from 0 to 9999. Expect found.";
for (int i = 0; i < 10000; i++) {
auto it = btree.Search(i);
bool eoS = (it == btree.cend());
bool eoS = (it == btree.end());
EXPECT_FALSE(eoS);
if (!eoS) {
EXPECT_EQ(it.key(), i);
@ -168,7 +180,7 @@ TEST_F(MindDataTestBPlusTree, Test2) {
}
MS_LOG(INFO) << "Locate key " << 10000 << ". Expect not found";
auto it = btree.Search(10000);
EXPECT_TRUE(it == btree.cend());
EXPECT_TRUE(it == btree.end());
}
// Test to retrieve key at certain position.
@ -182,11 +194,11 @@ TEST_F(MindDataTestBPlusTree, Test2) {
TEST_F(MindDataTestBPlusTree, Test3) {
Allocator<std::string> alloc(std::make_shared<SystemPool>());
AutoIndexObj<std::string> ai(alloc);
AutoIndexObj<std::string, Allocator<std::string>> ai(alloc);
Status rc;
rc = ai.insert("Hello World");
EXPECT_TRUE(rc.IsOk());
ai.insert({"a", "b", "c"});
rc = ai.insert({"a", "b", "c"});
EXPECT_TRUE(rc.IsOk());
uint64_t min = ai.min_key();
uint64_t max = ai.max_key();
@ -199,3 +211,30 @@ TEST_F(MindDataTestBPlusTree, Test3) {
MS_LOG(DEBUG) << ai[i] << std::endl;
}
}
TEST_F(MindDataTestBPlusTree, Test4) {
Allocator<int64_t> alloc(std::make_shared<SystemPool>());
AutoIndexObj<int64_t, Allocator<int64_t>> ai(alloc);
Status rc;
for (int i = 0; i < 1000; i++) {
rc = ai.insert(std::make_unique<int64_t>(i));
EXPECT_TRUE(rc.IsOk());
}
// Test iterator
{
int cnt = 0;
auto it = ai.begin();
uint64_t prev = it.key();
++it;
++cnt;
while (it != ai.end()) {
uint64_t cur = it.key();
EXPECT_TRUE(prev < cur);
EXPECT_EQ(it.value(), cnt);
prev = cur;
++it;
++cnt;
}
EXPECT_EQ(cnt, 1000);
}
}

Loading…
Cancel
Save