Dual pivot quicksort
(2009-11-05追è¨ãheap sortã«ãã°ããã£ãã®ã§ã³ã¼ããå·®ãæ¿ããè¨æ¸¬ããªããã)
(2009-11-06追è¨ãheap sortã®ã³ã¼ãã¨çµæãåé¤ãã)
(2010-06-17追è¨ãquick sortã«èª¤ãããã£ãã®ã§çµæã¨ã³ã¼ããåé¤ãã)
å æ¥ãdual pivot quicksortã¨ããã½ã¼ãæ³ãããã¨ãããã¨ãè³ã«ããã®ã§ã空ãæéã«ä»ã®ã½ã¼ãæ³ã®å¾©ç¿ãå ¼ãã¦æ¸ãã¦ã¿ãããªããdual pivot quicksortã«ã¤ãã¦ã¯DualPivotQuicksort.pdf(PDF)ããä»ã®ã½ã¼ãæ³ã«ã¤ãã¦ã¯Wikipediaã®ã½ã¼ãã¢ã«ã´ãªãºã ã®é ãåç §ããããã¹ããä¸ååãªnaiveãªå®è£ ã§ãããã¨ã«ã¤ãã¦ã¯ã容赦ããã ãããã
Cã®intã®é åã対象ã¨ããé åã®é·ããå¤ãã¦åææ³ã®å®è¡æéãè¨æ¸¬ããã対象ã¨ããé åã¯ã©ã³ãã ã«åæåããã·ã£ããã«ãã¦100åã½ã¼ãããã以ä¸ã¯ãã®100åã®ã½ã¼ãã«è¦ããåè¨æéï¼åä½ã¯ç§ï¼ã§ãããã³ã³ãã¤ã«ã¯GCC 4.23ï¼Ubuntu 8.04ï¼ã§-O3ãªãã·ã§ã³ãç¨ããPentium4ã®3GHzãããã®ãã·ã³ä¸ã§å®è¡ããã
algorithm | 10^2 | 10^3 | 10^4 | 10^5 | 10^6 | 10^7 |
---|---|---|---|---|---|---|
comb | 0.00080 | 0.01187 | 0.13390 | 1.77721 | 22.96319 | 275.91818 |
merge | 0.00065 | 0.00870 | 0.09282 | 1.14258 | 14.43083 | 181.81334 |
dual pivot quick | 0.00055 | 0.00789 | 0.07945 | 0.99821 | 11.98756 | 139.99590 |
stdlib | 0.00174 | 0.02288 | 0.20753 | 2.47627 | 29.83281 | 348.54542 |
ã©ã³ãã ãªinté åã«å¯¾ãã¦ã¯dual pivot quicksortã¯ãªããªãè¯ããããªçµæãåºããstdlibã®sortãæ¯ãããªãããæããç§ã®ç¨ãæ¹ãæªãã®ã ããã
以ä¸ã³ã¼ããC++ã§ãã³ãã¬ã¼ãã¨ã¤ãã¬ã¼ã¿ã使ã£ã¦æ¸ãããªãããã¨ãæã£ãããä»åã¯è¦éã£ãã
#include <stdio.h> //fprintf #include <stdlib.h> //malloc, srand, rand, qsort #include <string.h> //memcpy #include <sys/time.h> //gettimeofday void *my_malloc(int size) { void *ptr = malloc(size); if (ptr == NULL) { fprintf(stderr, "malloc error\n"); } return ptr; } double get_time(){ struct timeval tv; gettimeofday(&tv, NULL); return tv.tv_sec + (double)tv.tv_usec*1.0E-6; } inline void swap_int_value(int *a, int *b) { int tmp = *a; *a = *b; *b = tmp; } void show_int_array(int *start, int *end) { int i = 0; int *pos = start; for (; pos != end; ++pos) { fprintf(stdout, "%d", *pos); if (pos == end-1) { fprintf(stdout,"\n"); } else { fprintf(stdout,", "); } } } void shuffle_int_array(int *start, int *end, const unsigned int seed) { int *pos1 = start; int *pos2 = NULL; srand(seed); for (; pos1 != end; ++pos1) { pos2 = pos1 + rand()%(end-pos1); swap_int_value(pos1, pos2); } } void fill_int_array_random(int *start, int *end, const unsigned int seed) { int *pos = start; srand(seed); for (; pos != end; ++pos) { *pos = rand(); } } /* insertion sort */ void isort_int_array(int *start, int *end) { int *pos1 = start+1; int *pos2 = pos1; if (start < end) { for (; pos1 != end; ++pos1) { pos2 = pos1; for (; pos2 != start && *pos2 < *(pos2-1); --pos2) { swap_int_value(pos2, pos2-1); } } } } /* comb sort */ void csort_int_array(int *start, int *end) { int *pos1 = start; int length = end - start; double shrink_factor = 1.25; int swap_count = 0; int i = (int)(length/shrink_factor); while (1) { //true swap_count = 0; for (pos1 = start; pos1+i != end; ++pos1) { if (*pos1 > *(pos1+i)){ swap_int_value(pos1, pos1+i); swap_count++; } } if (i == 1) { if (swap_count == 0) { break; } } else { i /= shrink_factor; } if (i == 9 || i == 10) { i = 11; } //comb sort 11? } } /* merge sort(work space required) */ void merge_int_array(int *start, int *end, int *med, int *work) { int *start_w = work; int *med_w = start_w + (med - start); int *end_w = start_w + (end - start); int *pos1 = work; int *pos2 = med_w; int *pos3 = start; memcpy(work, start, sizeof(int)*(end-start)); for (; pos1 != med_w && pos2 != end_w; ++pos3) { if (*pos1 < *pos2) { *pos3 = *pos1; ++pos1; } else { *pos3 = *pos2; ++pos2; } } if (pos1 == med_w) { memcpy(pos3, pos2, sizeof(int)*(end_w-pos2)); } else if (pos2 == end_w) { memcpy(pos3, pos1, sizeof(int)*(med_w-pos1)); } } void msort_int_array_sub(int *start, int *end, int *work) { int length = end - start; int *med = start + (length / 2); const int threshold = 17; if (length < threshold) { isort_int_array(start, end); } else if (length > 1) { msort_int_array_sub(start, med, work); msort_int_array_sub(med, end, work); merge_int_array(start, end, med, work); } } void msort_int_array(int *start, int *end) { int length = end - start; int *work = (int *)my_malloc(sizeof(int)*length); msort_int_array_sub(start, end, work); free(work); work = NULL; } /* quick sort */ /* my quick sort is wrong, sorry... */ /* dual pivot quick sort */ void dp_qsort_int_array(int *start, int *end) { int *pos1 = start+1; int *pos2 = start+1; int *pos3 = end-1; int pivot1 = *start; int pivot2 = *(end-1); const int threshold = 17; if (end - start < threshold) { isort_int_array(start, end); } else { if (pivot1 > pivot2) { swap_int_value(&pivot1, &pivot2); *start = pivot1; *(end-1) = pivot2; } while (pos2 != pos3) { if (*pos2 > pivot2) { --pos3; swap_int_value(pos3, pos2); } else { if (*pos2 < pivot1) { swap_int_value(pos2, pos1); ++pos1; } ++pos2; } } --pos1; swap_int_value(start, pos1); swap_int_value(end-1, pos3); ++pos3; dp_qsort_int_array(start, pos1); dp_qsort_int_array((pos1+1), pos2); dp_qsort_int_array(pos3, end); } } /* standard quick sort */ int comp_int(const int *a, const int *b) { return *a - *b; } void std_qsort_int_array(int *start, int *end) { int num = end - start; int size = sizeof(int); qsort(start, num, size, (int (*)(const void*, const void*))comp_int); } void benchmark(const char *name, void (*sort_func)(int*, int*), const int length, const int restart) { const unsigned int seed = 1000; int i = 0; double time = 0.0; double t1 = 0.0; double t2 = 0.0; int *array = (int *)my_malloc(sizeof(int)*length); srand(seed); fill_int_array_random(array, array+length, rand()); for (i=0; i<restart; i++) { shuffle_int_array(array, array+length, rand()); t1 = get_time(); sort_func(array, array+length); t2 = get_time(); time += t2 - t1; } fprintf(stdout, "%s%.10f\n", name, time); free(array); array = NULL; } void test(void (*sort_func)(int*, int*)) { const unsigned int seed = 1000; const int length = 10; int array[] = {0,1,2,3,4,5,6,7,8,9}; shuffle_int_array(array, array+length, seed); show_int_array(array, array+length); sort_func(array, array+length); show_int_array(array, array+length); } int main(int argc, char **argv) { const int length = 1E7; const int restart = 1E2; fprintf(stdout, "array length = %d, restart = %d\n", length, restart); benchmark("hsort ", hsort_int_array, length, restart); //benchmark("csort ", csort_int_array, length, restart); //benchmark("msort ", msort_int_array, length, restart); //benchmark("dpqsort ", dp_qsort_int_array, length, restart); //benchmark("stdqsort ", std_qsort_int_array, length, restart); return 0; }