...

Source file src/sort/sort.go

Documentation: sort

		 1  // Copyright 2009 The Go Authors. All rights reserved.
		 2  // Use of this source code is governed by a BSD-style
		 3  // license that can be found in the LICENSE file.
		 4  
		 5  //go:generate go run genzfunc.go
		 6  
		 7  // Package sort provides primitives for sorting slices and user-defined collections.
		 8  package sort
		 9  
		10  // An implementation of Interface can be sorted by the routines in this package.
		11  // The methods refer to elements of the underlying collection by integer index.
		12  type Interface interface {
		13  	// Len is the number of elements in the collection.
		14  	Len() int
		15  
		16  	// Less reports whether the element with index i
		17  	// must sort before the element with index j.
		18  	//
		19  	// If both Less(i, j) and Less(j, i) are false,
		20  	// then the elements at index i and j are considered equal.
		21  	// Sort may place equal elements in any order in the final result,
		22  	// while Stable preserves the original input order of equal elements.
		23  	//
		24  	// Less must describe a transitive ordering:
		25  	//	- if both Less(i, j) and Less(j, k) are true, then Less(i, k) must be true as well.
		26  	//	- if both Less(i, j) and Less(j, k) are false, then Less(i, k) must be false as well.
		27  	//
		28  	// Note that floating-point comparison (the < operator on float32 or float64 values)
		29  	// is not a transitive ordering when not-a-number (NaN) values are involved.
		30  	// See Float64Slice.Less for a correct implementation for floating-point values.
		31  	Less(i, j int) bool
		32  
		33  	// Swap swaps the elements with indexes i and j.
		34  	Swap(i, j int)
		35  }
		36  
		37  // insertionSort sorts data[a:b] using insertion sort.
		38  func insertionSort(data Interface, a, b int) {
		39  	for i := a + 1; i < b; i++ {
		40  		for j := i; j > a && data.Less(j, j-1); j-- {
		41  			data.Swap(j, j-1)
		42  		}
		43  	}
		44  }
		45  
		46  // siftDown implements the heap property on data[lo:hi].
		47  // first is an offset into the array where the root of the heap lies.
		48  func siftDown(data Interface, lo, hi, first int) {
		49  	root := lo
		50  	for {
		51  		child := 2*root + 1
		52  		if child >= hi {
		53  			break
		54  		}
		55  		if child+1 < hi && data.Less(first+child, first+child+1) {
		56  			child++
		57  		}
		58  		if !data.Less(first+root, first+child) {
		59  			return
		60  		}
		61  		data.Swap(first+root, first+child)
		62  		root = child
		63  	}
		64  }
		65  
		66  func heapSort(data Interface, a, b int) {
		67  	first := a
		68  	lo := 0
		69  	hi := b - a
		70  
		71  	// Build heap with greatest element at top.
		72  	for i := (hi - 1) / 2; i >= 0; i-- {
		73  		siftDown(data, i, hi, first)
		74  	}
		75  
		76  	// Pop elements, largest first, into end of data.
		77  	for i := hi - 1; i >= 0; i-- {
		78  		data.Swap(first, first+i)
		79  		siftDown(data, lo, i, first)
		80  	}
		81  }
		82  
		83  // Quicksort, loosely following Bentley and McIlroy,
		84  // ``Engineering a Sort Function,'' SP&E November 1993.
		85  
		86  // medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1].
		87  func medianOfThree(data Interface, m1, m0, m2 int) {
		88  	// sort 3 elements
		89  	if data.Less(m1, m0) {
		90  		data.Swap(m1, m0)
		91  	}
		92  	// data[m0] <= data[m1]
		93  	if data.Less(m2, m1) {
		94  		data.Swap(m2, m1)
		95  		// data[m0] <= data[m2] && data[m1] < data[m2]
		96  		if data.Less(m1, m0) {
		97  			data.Swap(m1, m0)
		98  		}
		99  	}
	 100  	// now data[m0] <= data[m1] <= data[m2]
	 101  }
	 102  
	 103  func swapRange(data Interface, a, b, n int) {
	 104  	for i := 0; i < n; i++ {
	 105  		data.Swap(a+i, b+i)
	 106  	}
	 107  }
	 108  
	 109  func doPivot(data Interface, lo, hi int) (midlo, midhi int) {
	 110  	m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow.
	 111  	if hi-lo > 40 {
	 112  		// Tukey's ``Ninther,'' median of three medians of three.
	 113  		s := (hi - lo) / 8
	 114  		medianOfThree(data, lo, lo+s, lo+2*s)
	 115  		medianOfThree(data, m, m-s, m+s)
	 116  		medianOfThree(data, hi-1, hi-1-s, hi-1-2*s)
	 117  	}
	 118  	medianOfThree(data, lo, m, hi-1)
	 119  
	 120  	// Invariants are:
	 121  	//	data[lo] = pivot (set up by ChoosePivot)
	 122  	//	data[lo < i < a] < pivot
	 123  	//	data[a <= i < b] <= pivot
	 124  	//	data[b <= i < c] unexamined
	 125  	//	data[c <= i < hi-1] > pivot
	 126  	//	data[hi-1] >= pivot
	 127  	pivot := lo
	 128  	a, c := lo+1, hi-1
	 129  
	 130  	for ; a < c && data.Less(a, pivot); a++ {
	 131  	}
	 132  	b := a
	 133  	for {
	 134  		for ; b < c && !data.Less(pivot, b); b++ { // data[b] <= pivot
	 135  		}
	 136  		for ; b < c && data.Less(pivot, c-1); c-- { // data[c-1] > pivot
	 137  		}
	 138  		if b >= c {
	 139  			break
	 140  		}
	 141  		// data[b] > pivot; data[c-1] <= pivot
	 142  		data.Swap(b, c-1)
	 143  		b++
	 144  		c--
	 145  	}
	 146  	// If hi-c<3 then there are duplicates (by property of median of nine).
	 147  	// Let's be a bit more conservative, and set border to 5.
	 148  	protect := hi-c < 5
	 149  	if !protect && hi-c < (hi-lo)/4 {
	 150  		// Lets test some points for equality to pivot
	 151  		dups := 0
	 152  		if !data.Less(pivot, hi-1) { // data[hi-1] = pivot
	 153  			data.Swap(c, hi-1)
	 154  			c++
	 155  			dups++
	 156  		}
	 157  		if !data.Less(b-1, pivot) { // data[b-1] = pivot
	 158  			b--
	 159  			dups++
	 160  		}
	 161  		// m-lo = (hi-lo)/2 > 6
	 162  		// b-lo > (hi-lo)*3/4-1 > 8
	 163  		// ==> m < b ==> data[m] <= pivot
	 164  		if !data.Less(m, pivot) { // data[m] = pivot
	 165  			data.Swap(m, b-1)
	 166  			b--
	 167  			dups++
	 168  		}
	 169  		// if at least 2 points are equal to pivot, assume skewed distribution
	 170  		protect = dups > 1
	 171  	}
	 172  	if protect {
	 173  		// Protect against a lot of duplicates
	 174  		// Add invariant:
	 175  		//	data[a <= i < b] unexamined
	 176  		//	data[b <= i < c] = pivot
	 177  		for {
	 178  			for ; a < b && !data.Less(b-1, pivot); b-- { // data[b] == pivot
	 179  			}
	 180  			for ; a < b && data.Less(a, pivot); a++ { // data[a] < pivot
	 181  			}
	 182  			if a >= b {
	 183  				break
	 184  			}
	 185  			// data[a] == pivot; data[b-1] < pivot
	 186  			data.Swap(a, b-1)
	 187  			a++
	 188  			b--
	 189  		}
	 190  	}
	 191  	// Swap pivot into middle
	 192  	data.Swap(pivot, b-1)
	 193  	return b - 1, c
	 194  }
	 195  
	 196  func quickSort(data Interface, a, b, maxDepth int) {
	 197  	for b-a > 12 { // Use ShellSort for slices <= 12 elements
	 198  		if maxDepth == 0 {
	 199  			heapSort(data, a, b)
	 200  			return
	 201  		}
	 202  		maxDepth--
	 203  		mlo, mhi := doPivot(data, a, b)
	 204  		// Avoiding recursion on the larger subproblem guarantees
	 205  		// a stack depth of at most lg(b-a).
	 206  		if mlo-a < b-mhi {
	 207  			quickSort(data, a, mlo, maxDepth)
	 208  			a = mhi // i.e., quickSort(data, mhi, b)
	 209  		} else {
	 210  			quickSort(data, mhi, b, maxDepth)
	 211  			b = mlo // i.e., quickSort(data, a, mlo)
	 212  		}
	 213  	}
	 214  	if b-a > 1 {
	 215  		// Do ShellSort pass with gap 6
	 216  		// It could be written in this simplified form cause b-a <= 12
	 217  		for i := a + 6; i < b; i++ {
	 218  			if data.Less(i, i-6) {
	 219  				data.Swap(i, i-6)
	 220  			}
	 221  		}
	 222  		insertionSort(data, a, b)
	 223  	}
	 224  }
	 225  
	 226  // Sort sorts data.
	 227  // It makes one call to data.Len to determine n and O(n*log(n)) calls to
	 228  // data.Less and data.Swap. The sort is not guaranteed to be stable.
	 229  func Sort(data Interface) {
	 230  	n := data.Len()
	 231  	quickSort(data, 0, n, maxDepth(n))
	 232  }
	 233  
	 234  // maxDepth returns a threshold at which quicksort should switch
	 235  // to heapsort. It returns 2*ceil(lg(n+1)).
	 236  func maxDepth(n int) int {
	 237  	var depth int
	 238  	for i := n; i > 0; i >>= 1 {
	 239  		depth++
	 240  	}
	 241  	return depth * 2
	 242  }
	 243  
	 244  // lessSwap is a pair of Less and Swap function for use with the
	 245  // auto-generated func-optimized variant of sort.go in
	 246  // zfuncversion.go.
	 247  type lessSwap struct {
	 248  	Less func(i, j int) bool
	 249  	Swap func(i, j int)
	 250  }
	 251  
	 252  type reverse struct {
	 253  	// This embedded Interface permits Reverse to use the methods of
	 254  	// another Interface implementation.
	 255  	Interface
	 256  }
	 257  
	 258  // Less returns the opposite of the embedded implementation's Less method.
	 259  func (r reverse) Less(i, j int) bool {
	 260  	return r.Interface.Less(j, i)
	 261  }
	 262  
	 263  // Reverse returns the reverse order for data.
	 264  func Reverse(data Interface) Interface {
	 265  	return &reverse{data}
	 266  }
	 267  
	 268  // IsSorted reports whether data is sorted.
	 269  func IsSorted(data Interface) bool {
	 270  	n := data.Len()
	 271  	for i := n - 1; i > 0; i-- {
	 272  		if data.Less(i, i-1) {
	 273  			return false
	 274  		}
	 275  	}
	 276  	return true
	 277  }
	 278  
	 279  // Convenience types for common cases
	 280  
	 281  // IntSlice attaches the methods of Interface to []int, sorting in increasing order.
	 282  type IntSlice []int
	 283  
	 284  func (x IntSlice) Len() int					 { return len(x) }
	 285  func (x IntSlice) Less(i, j int) bool { return x[i] < x[j] }
	 286  func (x IntSlice) Swap(i, j int)			{ x[i], x[j] = x[j], x[i] }
	 287  
	 288  // Sort is a convenience method: x.Sort() calls Sort(x).
	 289  func (x IntSlice) Sort() { Sort(x) }
	 290  
	 291  // Float64Slice implements Interface for a []float64, sorting in increasing order,
	 292  // with not-a-number (NaN) values ordered before other values.
	 293  type Float64Slice []float64
	 294  
	 295  func (x Float64Slice) Len() int { return len(x) }
	 296  
	 297  // Less reports whether x[i] should be ordered before x[j], as required by the sort Interface.
	 298  // Note that floating-point comparison by itself is not a transitive relation: it does not
	 299  // report a consistent ordering for not-a-number (NaN) values.
	 300  // This implementation of Less places NaN values before any others, by using:
	 301  //
	 302  //	x[i] < x[j] || (math.IsNaN(x[i]) && !math.IsNaN(x[j]))
	 303  //
	 304  func (x Float64Slice) Less(i, j int) bool { return x[i] < x[j] || (isNaN(x[i]) && !isNaN(x[j])) }
	 305  func (x Float64Slice) Swap(i, j int)			{ x[i], x[j] = x[j], x[i] }
	 306  
	 307  // isNaN is a copy of math.IsNaN to avoid a dependency on the math package.
	 308  func isNaN(f float64) bool {
	 309  	return f != f
	 310  }
	 311  
	 312  // Sort is a convenience method: x.Sort() calls Sort(x).
	 313  func (x Float64Slice) Sort() { Sort(x) }
	 314  
	 315  // StringSlice attaches the methods of Interface to []string, sorting in increasing order.
	 316  type StringSlice []string
	 317  
	 318  func (x StringSlice) Len() int					 { return len(x) }
	 319  func (x StringSlice) Less(i, j int) bool { return x[i] < x[j] }
	 320  func (x StringSlice) Swap(i, j int)			{ x[i], x[j] = x[j], x[i] }
	 321  
	 322  // Sort is a convenience method: x.Sort() calls Sort(x).
	 323  func (x StringSlice) Sort() { Sort(x) }
	 324  
	 325  // Convenience wrappers for common cases
	 326  
	 327  // Ints sorts a slice of ints in increasing order.
	 328  func Ints(x []int) { Sort(IntSlice(x)) }
	 329  
	 330  // Float64s sorts a slice of float64s in increasing order.
	 331  // Not-a-number (NaN) values are ordered before other values.
	 332  func Float64s(x []float64) { Sort(Float64Slice(x)) }
	 333  
	 334  // Strings sorts a slice of strings in increasing order.
	 335  func Strings(x []string) { Sort(StringSlice(x)) }
	 336  
	 337  // IntsAreSorted reports whether the slice x is sorted in increasing order.
	 338  func IntsAreSorted(x []int) bool { return IsSorted(IntSlice(x)) }
	 339  
	 340  // Float64sAreSorted reports whether the slice x is sorted in increasing order,
	 341  // with not-a-number (NaN) values before any other values.
	 342  func Float64sAreSorted(x []float64) bool { return IsSorted(Float64Slice(x)) }
	 343  
	 344  // StringsAreSorted reports whether the slice x is sorted in increasing order.
	 345  func StringsAreSorted(x []string) bool { return IsSorted(StringSlice(x)) }
	 346  
	 347  // Notes on stable sorting:
	 348  // The used algorithms are simple and provable correct on all input and use
	 349  // only logarithmic additional stack space. They perform well if compared
	 350  // experimentally to other stable in-place sorting algorithms.
	 351  //
	 352  // Remarks on other algorithms evaluated:
	 353  //	- GCC's 4.6.3 stable_sort with merge_without_buffer from libstdc++:
	 354  //		Not faster.
	 355  //	- GCC's __rotate for block rotations: Not faster.
	 356  //	- "Practical in-place mergesort" from	Jyrki Katajainen, Tomi A. Pasanen
	 357  //		and Jukka Teuhola; Nordic Journal of Computing 3,1 (1996), 27-40:
	 358  //		The given algorithms are in-place, number of Swap and Assignments
	 359  //		grow as n log n but the algorithm is not stable.
	 360  //	- "Fast Stable In-Place Sorting with O(n) Data Moves" J.I. Munro and
	 361  //		V. Raman in Algorithmica (1996) 16, 115-160:
	 362  //		This algorithm either needs additional 2n bits or works only if there
	 363  //		are enough different elements available to encode some permutations
	 364  //		which have to be undone later (so not stable on any input).
	 365  //	- All the optimal in-place sorting/merging algorithms I found are either
	 366  //		unstable or rely on enough different elements in each step to encode the
	 367  //		performed block rearrangements. See also "In-Place Merging Algorithms",
	 368  //		Denham Coates-Evely, Department of Computer Science, Kings College,
	 369  //		January 2004 and the references in there.
	 370  //	- Often "optimal" algorithms are optimal in the number of assignments
	 371  //		but Interface has only Swap as operation.
	 372  
	 373  // Stable sorts data while keeping the original order of equal elements.
	 374  //
	 375  // It makes one call to data.Len to determine n, O(n*log(n)) calls to
	 376  // data.Less and O(n*log(n)*log(n)) calls to data.Swap.
	 377  func Stable(data Interface) {
	 378  	stable(data, data.Len())
	 379  }
	 380  
	 381  func stable(data Interface, n int) {
	 382  	blockSize := 20 // must be > 0
	 383  	a, b := 0, blockSize
	 384  	for b <= n {
	 385  		insertionSort(data, a, b)
	 386  		a = b
	 387  		b += blockSize
	 388  	}
	 389  	insertionSort(data, a, n)
	 390  
	 391  	for blockSize < n {
	 392  		a, b = 0, 2*blockSize
	 393  		for b <= n {
	 394  			symMerge(data, a, a+blockSize, b)
	 395  			a = b
	 396  			b += 2 * blockSize
	 397  		}
	 398  		if m := a + blockSize; m < n {
	 399  			symMerge(data, a, m, n)
	 400  		}
	 401  		blockSize *= 2
	 402  	}
	 403  }
	 404  
	 405  // symMerge merges the two sorted subsequences data[a:m] and data[m:b] using
	 406  // the SymMerge algorithm from Pok-Son Kim and Arne Kutzner, "Stable Minimum
	 407  // Storage Merging by Symmetric Comparisons", in Susanne Albers and Tomasz
	 408  // Radzik, editors, Algorithms - ESA 2004, volume 3221 of Lecture Notes in
	 409  // Computer Science, pages 714-723. Springer, 2004.
	 410  //
	 411  // Let M = m-a and N = b-n. Wolog M < N.
	 412  // The recursion depth is bound by ceil(log(N+M)).
	 413  // The algorithm needs O(M*log(N/M + 1)) calls to data.Less.
	 414  // The algorithm needs O((M+N)*log(M)) calls to data.Swap.
	 415  //
	 416  // The paper gives O((M+N)*log(M)) as the number of assignments assuming a
	 417  // rotation algorithm which uses O(M+N+gcd(M+N)) assignments. The argumentation
	 418  // in the paper carries through for Swap operations, especially as the block
	 419  // swapping rotate uses only O(M+N) Swaps.
	 420  //
	 421  // symMerge assumes non-degenerate arguments: a < m && m < b.
	 422  // Having the caller check this condition eliminates many leaf recursion calls,
	 423  // which improves performance.
	 424  func symMerge(data Interface, a, m, b int) {
	 425  	// Avoid unnecessary recursions of symMerge
	 426  	// by direct insertion of data[a] into data[m:b]
	 427  	// if data[a:m] only contains one element.
	 428  	if m-a == 1 {
	 429  		// Use binary search to find the lowest index i
	 430  		// such that data[i] >= data[a] for m <= i < b.
	 431  		// Exit the search loop with i == b in case no such index exists.
	 432  		i := m
	 433  		j := b
	 434  		for i < j {
	 435  			h := int(uint(i+j) >> 1)
	 436  			if data.Less(h, a) {
	 437  				i = h + 1
	 438  			} else {
	 439  				j = h
	 440  			}
	 441  		}
	 442  		// Swap values until data[a] reaches the position before i.
	 443  		for k := a; k < i-1; k++ {
	 444  			data.Swap(k, k+1)
	 445  		}
	 446  		return
	 447  	}
	 448  
	 449  	// Avoid unnecessary recursions of symMerge
	 450  	// by direct insertion of data[m] into data[a:m]
	 451  	// if data[m:b] only contains one element.
	 452  	if b-m == 1 {
	 453  		// Use binary search to find the lowest index i
	 454  		// such that data[i] > data[m] for a <= i < m.
	 455  		// Exit the search loop with i == m in case no such index exists.
	 456  		i := a
	 457  		j := m
	 458  		for i < j {
	 459  			h := int(uint(i+j) >> 1)
	 460  			if !data.Less(m, h) {
	 461  				i = h + 1
	 462  			} else {
	 463  				j = h
	 464  			}
	 465  		}
	 466  		// Swap values until data[m] reaches the position i.
	 467  		for k := m; k > i; k-- {
	 468  			data.Swap(k, k-1)
	 469  		}
	 470  		return
	 471  	}
	 472  
	 473  	mid := int(uint(a+b) >> 1)
	 474  	n := mid + m
	 475  	var start, r int
	 476  	if m > mid {
	 477  		start = n - b
	 478  		r = mid
	 479  	} else {
	 480  		start = a
	 481  		r = m
	 482  	}
	 483  	p := n - 1
	 484  
	 485  	for start < r {
	 486  		c := int(uint(start+r) >> 1)
	 487  		if !data.Less(p-c, c) {
	 488  			start = c + 1
	 489  		} else {
	 490  			r = c
	 491  		}
	 492  	}
	 493  
	 494  	end := n - start
	 495  	if start < m && m < end {
	 496  		rotate(data, start, m, end)
	 497  	}
	 498  	if a < start && start < mid {
	 499  		symMerge(data, a, start, mid)
	 500  	}
	 501  	if mid < end && end < b {
	 502  		symMerge(data, mid, end, b)
	 503  	}
	 504  }
	 505  
	 506  // rotate rotates two consecutive blocks u = data[a:m] and v = data[m:b] in data:
	 507  // Data of the form 'x u v y' is changed to 'x v u y'.
	 508  // rotate performs at most b-a many calls to data.Swap,
	 509  // and it assumes non-degenerate arguments: a < m && m < b.
	 510  func rotate(data Interface, a, m, b int) {
	 511  	i := m - a
	 512  	j := b - m
	 513  
	 514  	for i != j {
	 515  		if i > j {
	 516  			swapRange(data, m-i, m, j)
	 517  			i -= j
	 518  		} else {
	 519  			swapRange(data, m-i, m+j-i, i)
	 520  			j -= i
	 521  		}
	 522  	}
	 523  	// i == j
	 524  	swapRange(data, m-i, m, i)
	 525  }
	 526  
	 527  /*
	 528  Complexity of Stable Sorting
	 529  
	 530  
	 531  Complexity of block swapping rotation
	 532  
	 533  Each Swap puts one new element into its correct, final position.
	 534  Elements which reach their final position are no longer moved.
	 535  Thus block swapping rotation needs |u|+|v| calls to Swaps.
	 536  This is best possible as each element might need a move.
	 537  
	 538  Pay attention when comparing to other optimal algorithms which
	 539  typically count the number of assignments instead of swaps:
	 540  E.g. the optimal algorithm of Dudzinski and Dydek for in-place
	 541  rotations uses O(u + v + gcd(u,v)) assignments which is
	 542  better than our O(3 * (u+v)) as gcd(u,v) <= u.
	 543  
	 544  
	 545  Stable sorting by SymMerge and BlockSwap rotations
	 546  
	 547  SymMerg complexity for same size input M = N:
	 548  Calls to Less:	O(M*log(N/M+1)) = O(N*log(2)) = O(N)
	 549  Calls to Swap:	O((M+N)*log(M)) = O(2*N*log(N)) = O(N*log(N))
	 550  
	 551  (The following argument does not fuzz over a missing -1 or
	 552  other stuff which does not impact the final result).
	 553  
	 554  Let n = data.Len(). Assume n = 2^k.
	 555  
	 556  Plain merge sort performs log(n) = k iterations.
	 557  On iteration i the algorithm merges 2^(k-i) blocks, each of size 2^i.
	 558  
	 559  Thus iteration i of merge sort performs:
	 560  Calls to Less	O(2^(k-i) * 2^i) = O(2^k) = O(2^log(n)) = O(n)
	 561  Calls to Swap	O(2^(k-i) * 2^i * log(2^i)) = O(2^k * i) = O(n*i)
	 562  
	 563  In total k = log(n) iterations are performed; so in total:
	 564  Calls to Less O(log(n) * n)
	 565  Calls to Swap O(n + 2*n + 3*n + ... + (k-1)*n + k*n)
	 566  	 = O((k/2) * k * n) = O(n * k^2) = O(n * log^2(n))
	 567  
	 568  
	 569  Above results should generalize to arbitrary n = 2^k + p
	 570  and should not be influenced by the initial insertion sort phase:
	 571  Insertion sort is O(n^2) on Swap and Less, thus O(bs^2) per block of
	 572  size bs at n/bs blocks:	O(bs*n) Swaps and Less during insertion sort.
	 573  Merge sort iterations start at i = log(bs). With t = log(bs) constant:
	 574  Calls to Less O((log(n)-t) * n + bs*n) = O(log(n)*n + (bs-t)*n)
	 575  	 = O(n * log(n))
	 576  Calls to Swap O(n * log^2(n) - (t^2+t)/2*n) = O(n * log^2(n))
	 577  
	 578  */
	 579  

View as plain text