C++跳表项目源码分析
什么是跳表skiplist
一种基于链表list改造的数据结构,以空间换时间的方式加速链表的搜索。
具体定义这里不赘述,具体可看传送门:漫画小灰之跳表
本文主要赏析github上一个跳表项目的实现
传送门:一个使用C++编程实现的基于跳表的轻量级键值型数据库
项目中跳表实现都在一个头文件skipList.h中,main.cpp为使用示例。
skiplist源码分析
节点类定义
节点为key-value型,由于跳表为多层结构,数组forward用于存放每层中该节点的下一节点
class Node {
public:
Node() {}
Node(K k, V v, int);
~Node();
K get_key() const;
V get_value() const;
void set_value(V);
// Linear array to hold pointers to next node of different level
//存放不同层中下一个节点的指针的线性表
Node<K, V> **forward;
int node_level;
private:
K key;
V value;
};
节点构造函数
level为该节点在跳表中有多少层,创建节点时随机分配
template<typename K, typename V>
Node<K, V>::Node(const K k, const V v, int level) {
this->key = k;
this->value = v;
this->node_level = level;
// level + 1, because array index is from 0 - level
this->forward = new Node<K, V>*[level+1];
// Fill forward array with 0(NULL)
memset(this->forward, 0, sizeof(Node<K, V>*)*(level+1));
};
跳表类定义
我们主要看创建节点、插入节点、搜索元素、删除元素这几个方法的实现。
// Class template for Skip list
template <typename K, typename V>
class SkipList {
public:
SkipList(int);
~SkipList();
int get_random_level();
Node<K, V>* create_node(K, V, int);
int insert_element(K, V);
void display_list();
bool search_element(K);
void delete_element(K);
void dump_file();
void load_file();
int size();
private:
void get_key_value_from_string(const std::string& str, std::string* key, std::string* value);
bool is_valid_string(const std::string& str);
private:
// Maximum level of the skip list
//最大层数
int _max_level;
// current level of skip list
//当前层数
int _skip_list_level;
// pointer to header node
//头节点
Node<K, V> *_header;
// file operator
std::ofstream _file_writer;
std::ifstream _file_reader;
// skiplist current element count
int _element_count;
};
构造函数
头节点的level为跳表的最大层数
项目中的析构函数应该是有内存泄漏的问题的,只detele了头节点
// construct skip list
template<typename K, typename V>
SkipList<K, V>::SkipList(int max_level) {
this->_max_level = max_level;
this->_skip_list_level = 0;
this->_element_count = 0;
// create header node and initialize key and value to null
K k;
V v;
this->_header = new Node<K, V>(k, v, _max_level);
};
创建节点
直接new一个node,返回指针
// create new node
template<typename K, typename V>
Node<K, V>* SkipList<K, V>::create_node(const K k, const V v, int level) {
Node<K, V> *n = new Node<K, V>(k, v, level);
return n;
}
插入节点
如在以下跳表中插入key为50的节点:
+------------+
| insert 50 |
+------------+
level 4 +-->1+ 100
|
| insert +----+
level 3 1+-------->10+---------------> | 50 | 70 100
| |
| |
level 2 1 10 30 | 50 | 70 100
| |
| |
level 1 1 4 10 30 | 50 | 70 100
| |
| |
level 0 1 4 9 10 30 40 | 50 | 60 70 100
需要在每层中找到插入的位置,即每层插入位置的上一个节点,该节点的key小于插入节点,下一节点的key大于插入节点。这里定义了一个数组update存放插入位置的上一个节点。
template<typename K, typename V>
int SkipList<K, V>::insert_element(const K key, const V value) {
mtx.lock();
Node<K, V> *current = this->_header;
// create update array and initialize it
// update is array which put node that the node->forward[i] should be operated later
//update[i]是第i层中key最后一个比插入key小的node*
Node<K, V> *update[_max_level+1];
memset(update, 0, sizeof(Node<K, V>*)*(_max_level+1));
// start form highest level of skip list
//从最高层搜索填补update
for(int i = _skip_list_level; i >= 0; i--) {
while(current->forward[i] != NULL && current->forward[i]->get_key() < key) {
current = current->forward[i];
}
update[i] = current;
}
//在上图示例中,如插入key 50, 在每层中,得到它的上一节点的key依次为40,30,30,10,1
// reached level 0 and forward pointer to right node, which is desired to insert key.
//第0层, current->forward[0]为应该插入的位置
current = current->forward[0];
// if current node have key equal to searched key, we get it
//该key已存在,解锁后直接返回
if (current != NULL && current->get_key() == key) {
std::cout << "key: " << key << ", exists" << std::endl;
mtx.unlock();
return 1;
}
// if current is NULL that means we have reached to end of the level
//current为空,表示到达了该层的末尾
// if current"s key is not equal to key that means we have to insert node between update[0] and current node
//不为空则要在update[0]和current之间插入
if (current == NULL || current->get_key() != key ) {
// Generate a random level for node
//随机层数
int random_level = get_random_level();
// If random level is greater thar skip list"s current level, initialize update value with pointer to header
//随机层数比当前的层数高时,比当前层高的层前一节点就是_header
if (random_level > _skip_list_level) {
for (int i = _skip_list_level+1; i < random_level+1; i++) {
update[i] = _header;
}
_skip_list_level = random_level;
}
// create new node with random level generated
//创建节点
Node<K, V>* inserted_node = create_node(key, value, random_level);
// insert node
// 插入节点
//1、对每一层,插入节点的下一节点为update[i]的下一节点
//2、update[i]的下一节点更新为插入节点
for (int i = 0; i <= random_level; i++) {
inserted_node->forward[i] = update[i]->forward[i];
update[i]->forward[i] = inserted_node;
}
std::cout << "Successfully inserted key:" << key << ", value:" << value << std::endl;
_element_count ++; //增加节点数
}
mtx.unlock();
return 0;
}
搜索节点
+------------+
| select 60 |
+------------+
level 4 +-->1+ 100
|
|
level 3 1+-------->10+------------------>50+ 70 100
|
|
level 2 1 10 30 50| 70 100
|
|
level 1 1 4 10 30 50| 70 100
|
|
level 0 1 4 9 10 30 40 50+-->60 70 100
从顶层的header开始,从上而下,从左到右,依次遍历每层的节点key,直到第0层。
template<typename K, typename V>
bool SkipList<K, V>::search_element(K key) {
std::cout << "search_element-----------------" << std::endl;
Node<K, V> *current = _header;
// start from highest level of skip list
for (int i = _skip_list_level; i >= 0; i--) {
while (current->forward[i] && current->forward[i]->get_key() < key) {
current = current->forward[i];
}
}
//reached level 0 and advance pointer to right node, which we search
current = current->forward[0];
// if current node have key equal to searched key, we get it
//key存在则找到目标元素
if (current and current->get_key() == key) {
std::cout << "Found key: " << key << ", value: " << current->get_value() << std::endl;
return true;
}
std::cout << "Not Found Key:" << key << std::endl;
return false;
}
删除节点
同理,先找到每层中该节点位置的前一节点
若某层中该key存在,前一节点的下一节点指向该元素的下一节点。
// Delete element from skip list
template<typename K, typename V>
void SkipList<K, V>::delete_element(K key) {
mtx.lock();
Node<K, V> *current = this->_header;
Node<K, V> *update[_max_level+1];
memset(update, 0, sizeof(Node<K, V>*)*(_max_level+1));
// start from highest level of skip list
for (int i = _skip_list_level; i >= 0; i--) {
while (current->forward[i] !=NULL && current->forward[i]->get_key() < key) {
current = current->forward[i];
}
update[i] = current;
}
current = current->forward[0];
//找到该节点
if (current != NULL && current->get_key() == key) {
// start for lowest level and delete the current node of each level
for (int i = 0; i <= _skip_list_level; i++) {
// if at level i, next node is not target node, break the loop.
//如果该层没有该节点,跳过
if (update[i]->forward[i] != current)
break;
update[i]->forward[i] = current->forward[i];
}
// Remove levels which have no elements
//如果删除节点后该层没有元素,则调整当前的层数
while (_skip_list_level > 0 && _header->forward[_skip_list_level] == 0) {
_skip_list_level --;
}
std::cout << "Successfully deleted key "<< key << std::endl;
_element_count --;
delete current; //这句我自己加的,源码中没有,难道节点只new不delete吗?
}
mtx.unlock();
return;
}