Basic data structure and algorithm merging and sorting

Time:2021-10-22

origin

Recently read < < my first algorithm book > > ([Japan] Ishida Baohui; Miyazaki Xiuyi)
This series of notes is intended to use golang exercises

Merge sort

The merge sort algorithm divides the sequence into two subsequences with the same length,
When it is impossible to further divide (that is, there is only one data in each subsequence),
Merge subsequences.

Merging refers to merging two ordered subsequences into an ordered sequence.
This operation is repeated until all subsequences are merged into a whole.

The total run time is O (nlogn), which is the same as the heap sort mentioned earlier.

From < < my first algorithm book > > [Japan] Ishida Baohui; Miyazaki Xiuyi

technological process

  1. Given array to be sorted data [n]
  2. Create buffer [n]
  3. Set the initial merge step (length of sorted subsequence), span = 1, that is, treat each element as a sorted subsequence
  4. Merge all sorted subsequences into the buffer according to the current step size

    1. Set Q1 as the header subscript of subsequence 1, Q2 as the header subscript of subsequence 2, and P points to the corresponding area of the buffer
    2. Compare the value size of Q1 and Q2 positions
    3. If the value of Q1 is small, take the value of Q1, put it in P, and then add 1 to both Q1 and P
    4. If the value of Q2 is small, take the value of Q2, put it in P, and then add 1 to both Q2 and P
    5. If the subsequence has been fetched, copy the subsequence that has not been fetched to P
  5. Exchange pointers between data and buffer, treat data as buffer and buffer as data to be merged
  6. span = span*2
  7. Repeat steps 3-6 until span > n

Design

  • Isorter: defines the sorter interface. Defines the value comparison function to be compatible with any value type. Reverse sorting is realized by adjusting the comparison function
  • Tmergesort: merge sorter, which implements the iserter interface

unit testing

merge_ sort_ Test.go. Merging and sorting is relatively fast, so the test scale is set to 100000 elements

package sorting

import (
    "fmt"
    "learning/gooop/sorting"
    "learning/gooop/sorting/merge_sort"
    "math/rand"
    "testing"
    "time"
)

func Test_MergeSort(t *testing.T) {
    fnAssertTrue := func(b bool, msg string) {
        if !b {
            t.Fatal(msg)
        }
    }

    reversed := false
    fnCompare := func(a interface{}, b interface{}) sorting.CompareResult {
        i1 := a.(int)
        i2 := b.(int)

        if i1 < i2 {
            if reversed {
                return sorting.GREATER
            } else {
                return sorting.LESS
            }
        } else if i1 == i2 {
            return sorting.EQUAL
        } else {
            if reversed {
                return sorting.LESS
            } else {
                return sorting.GREATER
            }
        }
    }

    fnTestSorter := func(sorter sorting.ISorter) {
        reversed = false

        // test simple array
        samples := []interface{} { 2,3,1,5,4,7,6 }
        samples = sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 2 3 4 5 6 7]",  "expecting 1,2,3,4,5,6,7")
        t.Log("pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]")

        // test 10000 items sorting
        rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
        for plus := 0;plus < 3;plus++ {
            sampleCount := 100 * 1000 + plus
            t.Logf("prepare large array with %v items", sampleCount)
            samples = make([]interface{}, sampleCount)
            for i := 0; i < sampleCount; i++ {
                samples[i] = rnd.Intn(sampleCount * 10)
            }

            t.Logf("sorting large array with %v items", sampleCount)
            t0 := time.Now().UnixNano()
            samples = sorter.Sort(samples, fnCompare)
            cost := time.Now().UnixNano() - t0
            for i := 1; i < sampleCount; i++ {
                fnAssertTrue(fnCompare(samples[i-1], samples[i]) != sorting.GREATER, "expecting <=")
            }
            t.Logf("end sorting large array, cost = %v ms", cost/1000000)
        }

        // test 0-20
        sampleCount := 20
        t.Log("sorting 0-20")
        samples = make([]interface{}, sampleCount)
        for i := 0;i < sampleCount;i++ {
            for {
                p := rnd.Intn(sampleCount)
                if samples[p] == nil {
                    samples[p] = i
                    break
                }
            }
        }
        t.Logf("unsort = %v", samples)

        samples = sorter.Sort(samples, fnCompare)
        t.Logf("sorted = %v", samples)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]", "expecting 0-20")
        t.Log("pass sorting 0-20")

        // test special
        samples = []interface{} {}
        samples = sorter.Sort(samples, fnCompare)
        t.Log("pass sorting []")

        samples = []interface{} { 1 }
        samples = sorter.Sort(samples, fnCompare)
        t.Log("pass sorting [1]")

        samples = []interface{} { 3,1 }
        samples = sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[1 3]",  "expecting 1,3")
        t.Log("pass sorting [1 3]")

        reversed = true
        samples = []interface{} { 2, 3,1 }
        samples = sorter.Sort(samples, fnCompare)
        fnAssertTrue(fmt.Sprintf("%v", samples) == "[3 2 1]",  "expecting 3,2,1")
        t.Log("pass sorting [3 2 1]")
    }

    t.Log("\ntesting MergeSort")
    fnTestSorter(merge_sort.MergeSort)
}

Test output

  • Merge sort is quite fast, about twice as fast as heap sort
  • Compared with bubbling, selection and insertion, it has exponential improvement, which is in line with theoretical analysis
  • The cost is consistent with the heap sorting. Additional space of size n needs to be allocated as the merge buffer
  • It is speculated that the main reason for faster than heap sorting is that batch push operation may cause multiple array expansion during heap sorting initialization
$ go test -v merge_sort_test.go 
=== RUN   Test_MergeSort
    merge_sort_test.go:111: 
        testing MergeSort
    merge_sort_test.go:48: pass sorting [2 3 1 5 4 7 6] >> [1 2 3 4 5 6 7]
    merge_sort_test.go:54: prepare large array with 100000 items
    merge_sort_test.go:60: sorting large array with 100000 items
    merge_sort_test.go:67: end sorting large array, cost = 35 ms
    merge_sort_test.go:54: prepare large array with 100001 items
    merge_sort_test.go:60: sorting large array with 100001 items
    merge_sort_test.go:67: end sorting large array, cost = 36 ms
    merge_sort_test.go:54: prepare large array with 100002 items
    merge_sort_test.go:60: sorting large array with 100002 items
    merge_sort_test.go:67: end sorting large array, cost = 33 ms
    merge_sort_test.go:72: sorting 0-20
    merge_sort_test.go:83: unsort = [6 10 8 9 14 1 12 4 19 7 11 16 15 17 0 2 18 3 5 13]
    merge_sort_test.go:86: sorted = [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19]
    merge_sort_test.go:88: pass sorting 0-20
    merge_sort_test.go:93: pass sorting []
    merge_sort_test.go:97: pass sorting [1]
    merge_sort_test.go:102: pass sorting [1 3]
    merge_sort_test.go:108: pass sorting [3 2 1]
--- PASS: Test_MergeSort (0.12s)
PASS
ok      command-line-arguments  0.127s

ISorter.go

Define the sorter interface. Define the value comparison function to be compatible with any value type, and realize reverse sorting by adjusting the comparison function

package sorting

type ISorter interface {
    Sort(data []interface{}, comparator CompareFunction) []interface{}
}

type CompareFunction func(a interface{}, b interface{}) CompareResult

type CompareResult int
const LESS CompareResult = -1
const EQUAL CompareResult = 0
const GREATER CompareResult = 1

tMergeSort.go

Merge sorter to implement the isorter interface

package merge_sort

import (
    "learning/gooop/sorting"
)

type tMergeSort struct {}

func newMergeSort() sorting.ISorter {
    return &tMergeSort{}
}

func (me *tMergeSort) Sort(data []interface{}, comparator sorting.CompareFunction) []interface{} {
    if data == nil {
        return nil
    }

    size := len(data)
    if size <= 1 {
        return data
    }

    var result []interface{} = nil
    buffer := make([]interface{}, size)
    for span := 1; span <= size;span *= 2 {
        for i := 0;i < size;i += span * 2 {
            merge(size, data, i, i + span, span, buffer, i, comparator)
        }

        result = buffer
        data, buffer = buffer, data
    }

    if result == nil {
        result = data
    }

    return result
}

//Merge two subsequences in the data array: [Q1: Q1 + span], [Q2: Q2 + span) to the offset position of the target array result
func merge(size int, data []interface{}, q1 int, q2 int, span int, result []interface{}, offset int, comparator sorting.CompareFunction) {
    e1 := min(q1 + span, size)
    e2 := min(q2 + span, size)
    j := -1
    k := -1

    for i := 0;i < span*2;i++ {
        if q1 >= e1 {
            j = q2
            k = e2

        } else if q2 >= e2 {
            j = q1
            k = e1
        }

        if j >= 0 {
            for p := j;p < k;p++ {
                result[offset] = data[p]
                offset++
            }
            break
        }

        v1 := data[q1]
        v2 := data[q2]

        if lessEqual(v1, v2, comparator) {
            result[offset] = v1
            q1++
        } else {
            result[offset] = v2
            q2++
        }
        offset++
    }
}

func lessEqual(v1 interface{}, v2 interface{}, comparator sorting.CompareFunction) bool {
    return comparator(v1, v2) != sorting.GREATER
}

func min(a,b int) int {
    if a <= b {
        return a
    } else {
        return b
    }
}

var MergeSort = newMergeSort()

(end)