summaryrefslogtreecommitdiff
path: root/samples/sorting.cppR
blob: f15380cad29e44567aaed478c22a0c6ab549e7b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
/**
 * @title Sorting Numeric Vectors in C++ and R
 * @author Ross Bennett
 * @license GPL (>= 2)
 * @tags stl benchmark
 * @summary Illustrates the comparison of different sorting algorithms with R
 *     and the C++ STL.
 */

/**
 * Consider the problem to sort all elements of the given vector in ascending
 * order. We can simply use the function `std::sort` from the C++ STL.
 */

#include <Rcpp.h>
using namespace Rcpp;

// [[Rcpp::export]]
NumericVector stl_sort(NumericVector x) {
   NumericVector y = clone(x);
   std::sort(y.begin(), y.end());
   return y;
} 
 
   /*** R
   library(rbenchmark)
   set.seed(123)
   z <- rnorm(100000)
   x <- rnorm(100)

   ## check that stl_sort is the same as sort
   stopifnot(all.equal(stl_sort(x), sort(x)))
   
   ## benchmark stl_sort and sort
   benchmark(stl_sort(z), sort(z), order="relative")[,1:4]
   */

/**
 * Consider the problem of sorting the first `n` elements of a given vector.
 * The function `std::partial_sort` from the C++ STL does just this. 
 */

// [[Rcpp::export]]
NumericVector stl_partial_sort(NumericVector x, int n) {
   NumericVector y = clone(x);
   std::partial_sort(y.begin(), y.begin()+n, y.end());
   return y;
}

/**
 * An alternate implementation of a partial sort algorithm is to use 
 * `std::nth_element` to partition the given vector at the nth sorted
 * element and then use `std::sort`, both from the STL,  to sort the vector
 * from the beginning to the nth element.
 * 
 * For an equivalent implementation in R, we can use the `sort` function by
 * specifying a vector of `1:n` for the partial argument (i.e. `partial=1:n`).
 */

// [[Rcpp::export]]
NumericVector nth_partial_sort(NumericVector x, int nth) {
   NumericVector y = clone(x);
   std::nth_element(y.begin(), y.begin()+nth, y.end());
   std::sort(y.begin(), y.begin()+nth);
   return y;
}

/*** R
n <- 25000

# check that stl_partial_sort is equal to nth_partial_sort
stopifnot(all.equal(stl_partial_sort(x, 50)[1:50], 
                    nth_partial_sort(x, 50)[1:50]))

# benchmark stl_partial_sort, nth_element_sort, and sort
benchmark(stl_partial_sort(z, n),
          nth_partial_sort(z, n),
          sort(z, partial=1:n),
          order="relative")[,1:4]
*/

/**
 * An interesting result to note is the gain in speed of 
 * `nth_partial_sort` over `stl_partial_sort`. In this case, for the given
 * data, it is faster to use the combination of`std::nth_element` and 
 * `std::sort` rather than `std::partial_sort` to sort the first `n` elements 
 * of a vector.
 */

// [[Rcpp::export]]
NumericVector stl_nth_element(NumericVector x, int n) {
   NumericVector y = clone(x);
   std::nth_element(y.begin(), y.begin()+n, y.end());
   return y;
}

/**
 * Finally, consider a problem where you only need a single element of a
 * sorted vector. The function `std::nth_element` from the C++ STL does just 
 * this. An example of this type of problem is computing the median of a given
 * vector.
 * 
 * For an equivalent implementation in R, we can use the `sort` function by
 * specifying a scalar value for the argument partial (i.e. `partial=n`).
 */

/*** R
# check that the nth sorted elements of the vectors are equal
stopifnot(all.equal(stl_nth_element(x, 43)[43], sort(x, partial=43)[43]))

# benchmark nth_element and sort
benchmark(stl_nth_element(z, n),
         sort(z, partial=n),
         order="relative")[,1:4]
*/


/**
 * While these are not huge speed improvements over the base R sort function, 
 * this post demonstrates how to easily access sorting functions in the C++
 * STL and is a good exercise to better understand the differences and 
 * performance of the sorting algorithms available in C++ and R.
 */