Redis source code analysis (1): skiplist

Time:2021-12-29

1 Preface

Skiplist was proposed by William Pugh in his paper “skip lists: a probabilistic alternative to balanced trees”. It is a data structure that can be used to replace the balanced tree. It adopts probability balance instead of strict forced balance, so the insertion and deletion algorithm is much simpler and faster than the equivalent algorithm of balance tree.

1.1 space overhead

The bottom layer of skiplist is a single linked list. The difference is that each linked list node has multiple pointers to the next node, while the random layer number algorithm of the linked list consists of a constantpTherefore, the additional space overhead of skiplist is the same asprelevant.

According to the random layer algorithm, the average number of layers of each node is1/(1-p), the overall space overhead of skiplist isn/(1-p)

1.2 time complexity

The lookup time complexity of skiplist isO(log(n))

2. Implementation in redis

In redis, skiplist is mainly used as the underlying data structure of sorted set, and its implementation is almost the C language translation of the original algorithm described by William Pugh in his paper. In addition to the following three aspects:

  • This implementation allows duplicate scores.
  • At the same time, the key (i.e. score) is compared with the corresponding element
  • There is a forward pointer, so the first layer is a two-way linked list, which allows traversal from end to end.

2.1 data structure

/*Linked list node*/
typedef struct zskiplistNode {
    sds ele;
    double score;
    struct zskiplistNode *backward; //  Backward pointer of the first layer
    struct zskiplistLevel {
        struct zskiplistNode *forward; //  Forward pointer of each layer
        unsigned long span; //  Span to next node
    } level[];
} zskiplistNode;

/*Linked list*/
typedef struct zskiplist {
    struct zskiplistNode *header; //  Dummy node, initializing Max_ Level layer
    struct zskiplistNode *tail; //  Tail node, initialized to null
    unsigned long length;
    int level;
} zskiplist;

2.2 skiplist core API

2.2. 1 initialization

/*Create skiplist*/
zskiplist *zslCreate(void) {
    int j;
    zskiplist *zsl;

    zsl = zmalloc(sizeof(*zsl));
    zsl->level = 1; //  The maximum number of initialization layers is 1
    zsl->length = 0; //  Initialization length is 0
    
    /*The head node is a dummy node. Initialize max_ Level layer*/
    zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
    for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
        zsl->header->level[j].forward = NULL;
        zsl->header->level[j].span = 0;
    }
    zsl->header->backward = NULL;
    zsl->tail = NULL; //  The tail node is initialized to null
    return zsl;
}

2.2. 2 add

/*Random layer algorithm*/
int zslRandomLevel(void) {
    int level = 1;
    while ((random()&0xFFFF) < (ZSKIPLIST_P * 0xFFFF))
        level += 1;
    return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}

/*Insert node (assuming the inserted element does not exist)*/
zskiplistNode *zslInsert(zskiplist *zsl, double score, sds ele) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL]; //  Record the precursor node of each layer of the inserted node
    zskiplistNode *x;
    
    unsigned int rank[ZSKIPLIST_MAXLEVEL]; //  Record the span of the precursor node of each layer of the inserted node
    int i, level;

    serverAssert(!isnan(score));
    x = zsl->header;
    /*Each layer of loop is equivalent to a down pointer*/
    for (i = zsl->level-1; i >= 0; i--) {
        /* store rank that is crossed to reach the insert position */
        rank[i] = i == (zsl->level-1) ? 0 : rank[i+1];
        
        /*
        Conditions for going to the forward pointer:
        1. The forward node is not empty
        2. The value of the forward node is less than the insertion value, or equal to but the element is less than the insertion element
        */
        while (x->level[i].forward &&
                (x->level[i].forward->score < score ||
                    (x->level[i].forward->score == score &&
                    sdscmp(x->level[i].forward->ele,ele) < 0)))
        {
            rank[i] += x->level[i].span;
            x = x->level[i].forward;
        }
        
        update[i] = x; //  At the end of the while loop, the node where x is located must be the precursor node of the node inserted by the current layer
    }
    /* we assume the element is not already inside, since we allow duplicated
     * scores, reinserting the same element should never happen since the
     * caller of zslInsert() should test in the hash table if the element is
     * already inside or not. */
     
    level = zslRandomLevel(); //  Randomly generated layers
    
    /*If the number of random layers is greater than the current maximum number of layers, update it appropriately*/
    if (level > zsl->level) {
        for (i = zsl->level; i < level; i++) {
            rank[i] = 0;
            update[i] = zsl->header;
            update[i]->level[i].span = zsl->length;
        }
        zsl->level = level;
    }
    
    x = zslCreateNode(level,score,ele); //  Create node
    for (i = 0; i < level; i++) {
        /*Insert node*/
        x->level[i].forward = update[i]->level[i].forward;
        update[i]->level[i].forward = x;

        /* update span covered by update[i] as x is inserted here */
        x->level[i].span = update[i]->level[i].span - (rank[0] - rank[i]);
        update[i]->level[i].span = (rank[0] - rank[i]) + 1;
    }

    /* increment span for untouched levels */
    for (i = level; i < zsl->level; i++) {
        update[i]->level[i].span++;
    }

    /*Update the backward pointer of the first layer*/
    x->backward = (update[0] == zsl->header) ? NULL : update[0];
    if (x->level[0].forward)
        x->level[0].forward->backward = x;
    else
        zsl->tail = x;
        
    zsl->length++; //  Update linked list length
    return x;
}

2.2. 3 delete

/*Delete node
 * Internal function used by zslDelete, zslDeleteRangeByScore and
 * zslDeleteRangeByRank. */
void zslDeleteNode(zskiplist *zsl, zskiplistNode *x, zskiplistNode **update) {
    int i;
    /*
    The logic of gradually deleting nodes in each layer:
    1. If the forward pointer of the predecessor node points to the deleted node, update the forward pointer and span
    2. Otherwise, update the span. But why does the forward pointer not point to the deleted node?
    */
    for (i = 0; i < zsl->level; i++) {
        if (update[i]->level[i].forward == x) {
            update[i]->level[i].span += x->level[i].span - 1;
            update[i]->level[i].forward = x->level[i].forward;
        } else {
            update[i]->level[i].span -= 1;
        }
    }
    
    /*Update the backward pointer of the first layer appropriately*/
    if (x->level[0].forward) {
        x->level[0].forward->backward = x->backward;
    } else {
        zsl->tail = x->backward;
    }
    
    /*If the highest layer is empty, reduce the number of layers*/
    while(zsl->level > 1 && zsl->header->level[zsl->level-1].forward == NULL)
        zsl->level--;
    zsl->length--; //  Update linked list length
}

/*Delete element
 * Delete an element with matching score/element from the skiplist.
 * The function returns 1 if the node was found and deleted, otherwise
 * 0 is returned.
 *
 * If 'node' is NULL the deleted node is freed by zslFreeNode(), otherwise
 * it is not freed (but just unlinked) and *node is set to the node pointer,
 * so that it is possible for the caller to reuse the node (including the
 * referenced SDS string at node->ele). */
int zslDelete(zskiplist *zsl, double score, sds ele, zskiplistNode **node) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
    int i;

    x = zsl->header;
    for (i = zsl->level-1; i >= 0; i--) {
        while (x->level[i].forward &&
                (x->level[i].forward->score < score ||
                    (x->level[i].forward->score == score &&
                        sdscmp(x->level[i].forward->ele,ele) < 0)))
        {
            x = x->level[i].forward;
        }
        update[i] = x;
    }
    /* We may have multiple elements with the same score, what we need
     * is to find the element with both the right score and object. */
    x = x->level[0].forward;
    if (x && score == x->score && sdscmp(x->ele,ele) == 0) {
        zslDeleteNode(zsl, x, update);
        if (!node)
            zslFreeNode(x);
        else
            *node = x;
        return 1;
    }
    return 0; /* not found */
}

2.2. 4 range judgment

/* Returns if there is a part of the zset is in range. */
int zslIsInRange(zskiplist *zsl, zrangespec *range) {
    zskiplistNode *x;

    /* Test for ranges that will always be empty. */
    if (range->min > range->max ||
            (range->min == range->max && (range->minex || range->maxex)))
        return 0;
    x = zsl->tail;
    If (x = = null |! Zslvaluegtemin (x - > score, range)) // is the tail element less than the minimum value
        return 0;
    x = zsl->header->level[0].forward;
    If (x = = null |! Zslvalueltemax (x - > score, range)) // is the first element greater than the maximum value
        return 0;
    return 1;
}

/* Find the first node that is contained in the specified range.
 * Returns NULL when no element is contained in the range. */
zskiplistNode *zslFirstInRange(zskiplist *zsl, zrangespec *range) {
    zskiplistNode *x;
    int i;

    /* If everything is out of range, return early. */
    if (!zslIsInRange(zsl,range)) return NULL;

    x = zsl->header;
    /*Advance when it is less than the minimum value*/
    for (i = zsl->level-1; i >= 0; i--) {
        /* Go forward while *OUT* of range. */
        while (x->level[i].forward &&
            !zslValueGteMin(x->level[i].forward->score,range))
                x = x->level[i].forward;
    }

    /* This is an inner range, so the next node cannot be NULL. */
    x = x->level[0].forward;
    serverAssert(x != NULL);

    /* Check if score <= max. */
    if (!zslValueLteMax(x->score,range)) return NULL;
    return x;
}

/* Find the last node that is contained in the specified range.
 * Returns NULL when no element is contained in the range. */
zskiplistNode *zslLastInRange(zskiplist *zsl, zrangespec *range) {
    zskiplistNode *x;
    int i;

    /* If everything is out of range, return early. */
    if (!zslIsInRange(zsl,range)) return NULL;

    x = zsl->header;
    /*Advance when less than the maximum*/
    for (i = zsl->level-1; i >= 0; i--) {
        /* Go forward while *IN* range. */
        while (x->level[i].forward &&
            zslValueLteMax(x->level[i].forward->score,range))
                x = x->level[i].forward;
    }

    /* This is an inner range, so this node cannot be NULL. */
    serverAssert(x != NULL);

    /* Check if score >= min. */
    if (!zslValueGteMin(x->score,range)) return NULL;
    return x;
}

2.2. 5 scope deletion

/* Delete all the elements with score between min and max from the skiplist.
 * Min and max are inclusive, so a score >= min || score <= max is deleted.
 * Note that this function takes the reference to the hash table view of the
 * sorted set, in order to remove the elements from the hash table too. */
unsigned long zslDeleteRangeByScore(zskiplist *zsl, zrangespec *range, dict *dict) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
    unsigned long removed = 0;
    int i;
    
    /*When it is less than the minimum value, continue to move forward, and record the precursor node of each layer for deletion*/
    x = zsl->header;
    for (i = zsl->level-1; i >= 0; i--) {
        while (x->level[i].forward && (range->minex ?
            x->level[i].forward->score <= range->min :
            x->level[i].forward->score < range->min))
                x = x->level[i].forward;
        update[i] = x;
    }

    /* Current node is the last with score < or <= min. */
    x = x->level[0].forward;

    /* Delete nodes while in range. */
    while (x &&
           (range->maxex ? x->score < range->max : x->score <= range->max))
    {
        /*Delete the current node, free memory, update the number of deleted nodes, and move to the next node*/
        zskiplistNode *next = x->level[0].forward;
        zslDeleteNode(zsl,x,update);
        dictDelete(dict,x->ele);
        zslFreeNode(x); /* Here is where x->ele is actually released. */
        removed++;
        x = next;
    }
    return removed;
}

/* Delete all the elements with rank between start and end from the skiplist.
 * Start and end are inclusive. Note that start and end need to be 1-based */
unsigned long zslDeleteRangeByRank(zskiplist *zsl, unsigned int start, unsigned int end, dict *dict) {
    zskiplistNode *update[ZSKIPLIST_MAXLEVEL], *x;
    unsigned long traversed = 0, removed = 0;
    int i;

    x = zsl->header;
    /*When it is less than start, continue to move forward, and record the precursor node for deletion*/
    for (i = zsl->level-1; i >= 0; i--) {
        while (x->level[i].forward && (traversed + x->level[i].span) < start) {
            traversed += x->level[i].span;
            x = x->level[i].forward;
        }
        update[i] = x;
    }

    traversed++;
    x = x->level[0].forward;
    /*When it is less than end, delete while moving forward. Because span is the span to the next node, deleting the update of span will not affect subsequent nodes*/
    while (x && traversed <= end) {
        zskiplistNode *next = x->level[0].forward;
        zslDeleteNode(zsl,x,update);
        dictDelete(dict,x->ele);
        zslFreeNode(x);
        removed++;
        traversed++;
        x = next;
    }
    return removed;
}

2.2. 6 ranking

/* Find the rank for an element by both score and key.
 * Returns 0 when the element cannot be found, rank otherwise.
 * Note that the rank is 1-based due to the span of zsl->header to the
 * first element. */
unsigned long zslGetRank(zskiplist *zsl, double score, sds ele) {
    zskiplistNode *x;
    unsigned long rank = 0;
    int i;

    x = zsl->header;
    /*Accumulate rank when traversing each layer*/
    for (i = zsl->level-1; i >= 0; i--) {
        while (x->level[i].forward &&
            (x->level[i].forward->score < score ||
                (x->level[i].forward->score == score &&
                sdscmp(x->level[i].forward->ele,ele) <= 0))) {
            rank += x->level[i].span;
            x = x->level[i].forward;
        }

        /* x might be equal to zsl->header, so test if obj is non-NULL */
        if (x->ele && sdscmp(x->ele,ele) == 0) {
            return rank;
        }
    }
    return 0;
}

/* Finds an element by its rank. The rank argument needs to be 1-based. */
zskiplistNode* zslGetElementByRank(zskiplist *zsl, unsigned long rank) {
    zskiplistNode *x;
    unsigned long traversed = 0;
    int i;

    x = zsl->header;
    /*When traversing each layer, the number of nodes traversed is accumulated*/
    for (i = zsl->level-1; i >= 0; i--) {
        while (x->level[i].forward && (traversed + x->level[i].span) <= rank)
        {
            traversed += x->level[i].span;
            x = x->level[i].forward;
        }
        if (traversed == rank) {
            return x;
        }
    }
    return NULL;
}

3 references

[1] William Pugh, Skip Lists: A Probabilistic Alternative to Balanced Trees
https://15721.courses.cs.cmu….

[2] Redis-6.0.6 source codes
https://github.com/redis/redi…

Recommended Today

IOS memory partition

#Stack area It is a continuous data structure expanded from high address to low address, which is used to store the values of local variables and function parameters #Heap area Programmers need to manage and release the data structure domain expanded by low address like high address and discontinuous memory area #Global area Global variables […]