Manual golang basic data structure and algorithm heap sorting

Time:2021-10-25

origin

Recently read < < my first algorithm book > > ([Japan] Ishida Baohui; Miyazaki Xiuyi)
This series of notes is intended to use golang exercises

Heap sort

Heap sorting is characterized by the use of the heap in the data structure.
First, store all the data in the heap and build the heap in descending order.
In order to sort, you need to take the data out of the heap one by one.

At the beginning of heap sorting, N data need to be stored in the heap, and the time required is O (nlogn).
The time required to fetch the maximum data and reconstruct the heap in each round is O (logn).
Since there are n rounds in total, the sorting time after reconstruction is also o (NLog n).
Therefore, overall, the time complexity of heap sorting is O (NLog n).

In this way, the running time of heap sort is shorter than that of bubble sort, selection sort and insertion sort o (n ^ 2).

From < < my first algorithm book > > [Japan] Ishida Baohui; Miyazaki Xiuyi

target

  • Construct a heap and test the efficiency of heap sorting

Design

  • Iheap: defines the interface of the heap
  • tArrayHeap

    • Implementation of heap based on array
    • Heap is a special binary complete tree: the parent node is always smaller than any child node
    • There is a linear relationship between the indexes of the parent and child nodes of the heap. Take the subscript 0 as an example

      • parent.index = (node.index – 1) / 2
      • leftChild.index = node.index*2 + 1
      • rightChild.index = leftChild.index + 1
  • ISorter:

    • Define sorting interface
    • Define a value comparison function to be compatible with any value type
    • The reverse order output can be realized by adjusting the comparison function
  • tHeapSort

    • Heap sorting using tarrayheap
    • First push the entire array into the heap
    • Then pop them out one by one

unit testing

heap_ sort_ Test.go, the test process is similar to the previous bubbling, selection, insertion, etc., but the test array is expanded to 100000 elements

package sorting

import (
    "fmt"
    "learning/gooop/sorting"
    "learning/gooop/sorting/heap_sort"
    "math/rand"
    "testing"
    "time"
)

func Test_HeapSort(t *testing.T) {
    fnAssertTrue := func(b bool, msg string) {
        if !b {
            t.Fatal(msg)
        }
    }

    reversed := false
    fnCompare := func(a interface{}, b interface{}) sorting.CompareResult {
        i1 := a.(int)
        i2 := b.(int)

        if i1 < i2 {
            if reversed {
                return sorting.GREATER
            } else {
                return sorting.LESS
            }
        } else if i1 == i2 {
            return sorting.EQUAL
        } else {
            if reversed {
                return sorting.LESS
            } else {
                return sorting.GREATER
            }
        }
    }

    fnTestSorter := func(sorter sorting.ISorter) {
        reversed = false

        // test simple array
        samples := []interface{} { 2,3,1,5,4,7,6 }
        sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]",  "expecting 1,2,3,4,5,6,7")
        t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")

        // test 10000 items sorting
        rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
        sampleCount := 100*1000
        t.Logf("prepare large array with %v items", sampleCount)
        samples = make([]interface{}, sampleCount)
        for i := 0;i < sampleCount;i++ {
            samples[i] = rnd.Intn(sampleCount*10)
        }

        t.Logf("sorting large array with %v items", sampleCount)
        t0 := time.Now().UnixNano()
        sorter.Sort(samples, fnCompare)
        cost := time.Now().UnixNano() - t0
        for i := 1;i < sampleCount;i++ {
            fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")
        }
        t.Logf("end sorting large array, cost = %v ms", cost / 1000000)

        // test 0-20
        sampleCount = 20
        t.Log("sorting 0-20")
        samples = make([]interface{}, sampleCount)
        for i := 0;i < sampleCount;i++ {
            for {
                p := rnd.Intn(sampleCount)
                if samples[p] == nil {
                    samples[p] = i
                    break
                }
            }
        }
        t.Logf("unsort = %v", samples)

        sorter.Sort(samples, fnCompare)
        t.Logf("sorted = %v", samples)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")
        t.Log("pass sorting 0-20")

        // test special
        samples = []interface{} {}
        sorter.Sort(samples, fnCompare)
        t.Log("pass sorting []")

        samples = []interface{} { 1 }
        sorter.Sort(samples, fnCompare)
        t.Log("pass sorting [1]")

        samples = []interface{} { 3,1 }
        sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]",  "expecting 1,3")
        t.Log("pass sorting [1 3]")

        reversed = true
        samples = []interface{} { 2, 3,1 }
        sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]",  "expecting 3,2,1")
        t.Log("pass sorting [3 2 1]")
    }

    t.Log("\ntesting HeapSort")
    fnTestSorter(heap_sort.HeapSort)
}

Test output

It only takes tens of milliseconds to sort 100000 elements,
It does have an exponential improvement over bubbling, selection, insertion and other sorting
The price is that it takes up an extra heap space

$ go test -v heap_sort_test.go 
=== RUN   Test_HeapSort
    heap_sort_test.go:109: 
        testing HeapSort
    heap_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]
    heap_sort_test.go:53: prepare large array with 100000 items
    heap_sort_test.go:59: sorting large array with 100000 items
    heap_sort_test.go:66: end sorting large array, cost = 67 ms
    heap_sort_test.go:70: sorting 0-20
    heap_sort_test.go:81: unsort = [4 15 1 13 19 14 11 12 9 8 3 7 16 18 2 10 17 6 0 5]
    heap_sort_test.go:84: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
    heap_sort_test.go:86: pass sorting 0-20
    heap_sort_test.go:91: pass sorting []
    heap_sort_test.go:95: pass sorting [1]
    heap_sort_test.go:100: pass sorting [1 3]
    heap_sort_test.go:106: pass sorting [3 2 1]
--- PASS: Test_HeapSort (0.07s)
PASS
ok      command-line-arguments  0.076s

IHeap.go

Defines the interface of the heap

package heap_sort

type IHeap interface {
    Size() int
    IsEmpty() bool
    IsNotEmpty() bool

    Push(value interface{})
    Pop() (error, interface{})
}

tArrayHeap

  • Implementation of heap based on array
  • Heap is a special binary complete tree: the parent node is always smaller than any child node
  • There is a linear relationship between the indexes of the parent and child nodes of the heap. Take the subscript 0 as an example

    • parent.index = (node.index – 1) / 2
    • leftChild.index = node.index*2 + 1
    • rightChild.index = leftChild.index + 1
package heap_sort

import (
    "errors"
    "learning/gooop/sorting"
)

type tArrayHeap struct {
    comparator sorting.CompareFunction
    items []interface{}
    size int
    version int64
}

func newArrayHeap(comparator sorting.CompareFunction) IHeap {
    return &tArrayHeap{
        comparator: comparator,
        items: make([]interface{}, 0),
        size: 0,
        version: 0,
    }
}

func (me *tArrayHeap) Size() int {
    return me.size
}

func (me *tArrayHeap) IsEmpty() bool {
    return me.size <= 0
}

func (me *tArrayHeap) IsNotEmpty() bool {
    return !me.IsEmpty()
}

func (me *tArrayHeap) Push(value interface{}) {
    me.version++

    me.ensureSize(me.size + 1)
    me.items[me.size] = value
    me.size++

    me.shiftUp(me.size - 1)
    me.version++
}


func (me *tArrayHeap) ensureSize(size int) {
    for ;len(me.items) < size; {
        me.items = append(me.items, nil)
    }
}

func (me *tArrayHeap) parentOf(i int) int {
    return (i - 1) / 2
}

func (me *tArrayHeap) leftChildOf(i int) int {
    return i*2 + 1
}

func (me *tArrayHeap) rightChildOf(i int) int {
    return me.leftChildOf(i) + 1
}

func (me *tArrayHeap) last() (i int, v interface{}) {
    if me.IsEmpty() {
        return -1, nil
    }

    i = me.size - 1
    v = me.items[i]
    return i,v
}

func (me *tArrayHeap) shiftUp(i int) {
    if i <= 0 {
        return
    }
    v := me.items[i]

    pi := me.parentOf(i)
    pv := me.items[pi]

    if me.comparator(v, pv) == sorting.LESS {
        me.items[pi], me.items[i] = v, pv
        me.shiftUp(pi)
    }
}

func (me *tArrayHeap) Pop() (error, interface{}) {
    if me.IsEmpty() {
        return gNoMoreElementsError, nil
    }

    me.version++

    top := me.items[0]
    li, lv := me.last()
    me.items[0] = nil
    me.size--

    if me.IsEmpty() {
        return nil, top
    }

    me.items[0] = lv
    me.items[li] = nil

    me.shiftDown(0)
    me.version++

    return nil, top
}

func (me *tArrayHeap) shiftDown(i int) {
    pv := me.items[i]
    ok, ci, cv := me.minChildOf(i)
    if ok && me.comparator(cv, pv) == sorting.LESS {
        me.items[i], me.items[ci] = cv, pv
        me.shiftDown(ci)
    }
}

func (me *tArrayHeap) minChildOf(p int) (ok bool, i int, v interface{}) {
    li := me.leftChildOf(p)
    if li >= me.size {
        return false, 0, nil
    }
    lv := me.items[li]

    ri := me.rightChildOf(p)
    if ri >= me.size {
        return true, li, lv
    }
    rv := me.items[ri]

    if me.comparator(lv, rv) == sorting.LESS {
        return true, li, lv
    } else {
        return true, ri, rv
    }
}

var gNoMoreElementsError = errors.New("no more elements")

ISorter

  • Define sorting interface
  • Define a value comparison function to be compatible with any value type
  • The reverse order output can be realized by adjusting the comparison function
package sorting

type ISorter interface {
    Sort(data []interface{}, comparator CompareFunction) []interface{}
}

type CompareFunction func(a interface{}, b interface{}) CompareResult

type CompareResult int
const LESS CompareResult = -1
const EQUAL CompareResult = 0
const GREATER CompareResult = 1

tHeapSort

  • Heap sorter to implement the isorter interface
  • Heap sorting using tarrayheap
  • First push the entire array into the heap
  • Then pop them out one by one
package heap_sort

import "learning/gooop/sorting"

type tHeapSort struct {
}

func newHeapSort() sorting.ISorter {
    return &tHeapSort{}
}


func (me *tHeapSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} {
    heap := newArrayHeap(comparator)
    for _,it := range data {
        heap.Push(it)
    }

    for i,_ := range data {
        e,v := heap.Pop()
        if e == nil {
            data[i] = v
        } else {
            panic(e)
        }
    }

    return data
}


var HeapSort = newHeapSort()

(end)