选出第i个最小元素
第i个最小元素
实现
//generate a radom number in the interval [left, right]
int random(int left, int right)
{
srand(time(NULL));
return rand() % (right - left + 1) + left;
}
void swap(int *left, int *right)
{
int tmp = *left;
*left = *right;
*right = tmp;
}
//randomly select one element from the array as the pivot,
//partition around the pivot, such that all the elements to
//the left of the pivot is less or equal to the pivot element,
//all the elements to the right of the pivot is greater than
//the pivot element
int random_partition(int *data, int left, int right){
int j;
int i = random(left, right);
int k = i;
swap(&data[right], &data[i]);
i = left - 1;
for (j = left; j < right; j++){
if (data[j] <= data[right]){
i++;
swap(&data[i], &data[j]);
}
}
swap(&data[i+1], &data[right]);
return i + 1;
}
//select the ith smallest element from the array
int select_ith(int *data, int left, int right, int ith)
{
int p;
static int fist_time = 1;
static int smallest_index;
if (fist_time){
smallest_index = left;
fist_time = 0;
}
if (left == ith)
return data[left];
p = random_partition(data, left, right);
if (ith == p - smallest_index + 1)
return data[p];
else if (ith < p - smallest_index + 1)
return select_ith(data, left, p - 1, ith);
else
return select_ith(data, p + 1, right, ith);
}
复杂度
//generate a radom number in the interval [left, right]
int random(int left, int right)
{
srand(time(NULL));
return rand() % (right - left + 1) + left;
}
void swap(int *left, int *right)
{
int tmp = *left;
*left = *right;
*right = tmp;
}
//randomly select one element from the array as the pivot,
//partition around the pivot, such that all the elements to
//the left of the pivot is less or equal to the pivot element,
//all the elements to the right of the pivot is greater than
//the pivot element
int random_partition(int *data, int left, int right){
int j;
int i = random(left, right);
int k = i;
swap(&data[right], &data[i]);
i = left - 1;
for (j = left; j < right; j++){
if (data[j] <= data[right]){
i++;
swap(&data[i], &data[j]);
}
}
swap(&data[i+1], &data[right]);
return i + 1;
}
//select the ith smallest element from the array
int select_ith(int *data, int left, int right, int ith)
{
int p;
static int fist_time = 1;
static int smallest_index;
if (fist_time){
smallest_index = left;
fist_time = 0;
}
if (left == ith)
return data[left];
p = random_partition(data, left, right);
if (ith == p - smallest_index + 1)
return data[p];
else if (ith < p - smallest_index + 1)
return select_ith(data, left, p - 1, ith);
else
return select_ith(data, p + 1, right, ith);
}
算法在最坏情况下的复杂度为O(N*N),如果每次我们都选择最大元素作为pivot进行划分,那么每次划分的复杂度为O(N),如果i是随机数的
话,需要O(N/2)次这样的划分,这样就会使得总的复杂度成为O(N*N)。
但是注意到这里采用随机划分,几乎没有什么特别的输入使得最坏情况发生。算法在一般情况下表现良好,第i个最小元素可以在线性时间内给
出,具体的复杂度证明参见《算法导论》9.3节。
前i个最小元素
假如我们把条件
if (ith == p - smallest_index + 1)
return data[p];
改成 return data[smallest_index..ith], 那么就可以通过该函数得到前i个最小元素。
另一种算法
实现
template<typename T>
vector<T> topn(const vector<T>& base, unsigned n)
{
if (base.size() <= n)
return base;
else{
int mid = base.size()/2;
vector<T> part1 = topn(vector<T>(base.begin(), base.begin()+mid), n);
vector<T> part2 = topn(vector<T>(base.begin()+mid, base.end()), n);
part1.insert(part1.end(), part2.begin(), part2.end());
sort(part1.begin(), part1.end());
return vector<T>(part1.begin(), part1.begin()+n);
}
}
复杂度
template<typename T>
vector<T> topn(const vector<T>& base, unsigned n)
{
if (base.size() <= n)
return base;
else{
int mid = base.size()/2;
vector<T> part1 = topn(vector<T>(base.begin(), base.begin()+mid), n);
vector<T> part2 = topn(vector<T>(base.begin()+mid, base.end()), n);
part1.insert(part1.end(), part2.begin(), part2.end());
sort(part1.begin(), part1.end());
return vector<T>(part1.begin(), part1.begin()+n);
}
}
算法采用“divide-combine”的方案,复杂度T(N)可以描述为:
其中T为“combine”的时间,为常数
Python实现
from random import random
base = [random() for i in range(10000)]
def topn(base, n):
if len(base) <= n:
return base
else:
part1 = base[0:len(base)/2]
part2 = base[len(base)/2:]
top2n = (topn(part1, n) + topn(part2, n))
top2n.sort()
return top2n[0:n]
if __name__ == '__main__':
top10 = topn(base, 10)
浙公网安备 33010602011771号