Leetcode 数据流中的中位数:双堆

先看一道经典题,用两个堆维护中位数

LC 295. 数据流的中位数

题目:元素逐渐增加,求当前数剧的中位数
方法:用一个大根堆维护左边,用一个小根堆维护右边,插入元素时保持两者差不超过1。查找时只需要考虑两个顶点。
其实一种直观的思路是维护有序列表,保持插入和查找都是logn,比如跳表,但是很难写啊

class MedianFinder {
public:
    /** initialize your data structure here. */
    priority_queue<int>leftQ;  // 左边是大根堆
    priority_queue<int, vector<int>, greater<int>>rightQ;
    MedianFinder() {

    }
    
    void addNum(int num) {
        if(leftQ.empty() || num <= leftQ.top()) {
            leftQ.push(num);
            if(leftQ.size() - rightQ.size() > 1) {
                int tmp = leftQ.top();leftQ.pop();
                rightQ.push(tmp);
            }
        } else {
            rightQ.push(num);
            if(rightQ.size() - leftQ.size() > 1) {
                int tmp = rightQ.top();rightQ.pop();
                leftQ.push(tmp);
            }
        }
    }
    
    double findMedian() {
        if(leftQ.size() > rightQ.size())  return leftQ.top();
        else if(leftQ.size() == rightQ.size())  return (leftQ.top() + rightQ.top()) *1.0 / 2;
        else  return rightQ.top();
    }
};

/**
 * Your MedianFinder object will be instantiated and called as such:
 * MedianFinder* obj = new MedianFinder();
 * obj->addNum(num);
 * double param_2 = obj->findMedian();
 */

一道进阶版的,除了添加还是删除操作

LC 480. 滑动窗口中位数

题目:求长度为k的窗口的中位数

方法一:双堆+延迟删除

插入是一样的,更新堆和个数,删除只做记录和更新个数,不更新堆。不管插入和删除都需要调整平衡。

class Solution {
public:
    priority_queue<int>leftQ;
    priority_queue<int, vector<int>, greater<int>>rightQ;
    unordered_map<int, int>mp;
    int leftCnt, rightCnt;
    void adjust() {
        if(leftCnt - rightCnt > 1) {
            int tmp = leftQ.top();leftQ.pop();leftCnt--;
            rightQ.push(tmp);rightCnt++;
        }
        if(rightCnt- leftCnt > 1) {
            int tmp = rightQ.top();rightQ.pop();rightCnt--;
            leftQ.push(tmp);leftCnt++;
        }
    }
    void addItem(int num) {
        if(leftQ.empty() && (!rightQ.empty())) {  // k=1的时候,会出现左边为空,而右边有元素的情况
            int tmp = rightQ.top();rightQ.pop();rightCnt--;
            leftQ.push(tmp);leftCnt++;
        }
        if(leftQ.empty() || num < leftQ.top()) {
            leftQ.push(num);leftCnt++;
        } else { 
            rightQ.push(num);rightCnt++;
        }
        adjust();
    }
    void deleteItem(int num) {
        mp[num]++;   // 记录删除了的
        if(num <= leftQ.top())  leftCnt--; // 要用小于等于,因为可能要删除的就是队首
        else  rightCnt--;
        adjust();
    }
    void pure() {
        while((!leftQ.empty()) && mp[leftQ.top()]) {
            mp[leftQ.top()]--;
            leftQ.pop();
        }
        while((!rightQ.empty()) && mp[rightQ.top()]) {
            mp[rightQ.top()]--;
            rightQ.pop(); 
        }
    }
    double getMedian() {
        pure();  // 清理堆头    
        if(leftCnt > rightCnt)  return leftQ.top();
        else if(leftCnt < rightCnt)  return rightQ.top();
        return ((long long)leftQ.top() + (long long)rightQ.top()) * 1.0 / 2;
    }

    vector<double> medianSlidingWindow(vector<int>& nums, int k) {   
        int i = 0, j = 0;
        while(j < k) {
            addItem(nums[j]);
            j++;
        }
        vector<double>ans;
        ans.push_back(getMedian());

        while(j < nums.size()) {
            addItem(nums[j++]);
            deleteItem(nums[i++]);
            ans.push_back(getMedian());
        }
        return ans;
    }
};

方法二:pbds tree

前面将了,我们可以用跳表做,其实只要能有序列表logn查找第k个的数据结构都行,pbds库就提供了很多tree,都是平衡树。但是有一个问题,他们都不支持重复元素,在这里我们采用pair类型就可以了。

#include <ext/pb_ds/tree_policy.hpp>
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;

// alias template
template <typename T>
using orderd_set = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;

class Solution {
public:
    tree<pair<int,int>, null_type, less<pair<int, int>>, rb_tree_tag, tree_order_statistics_node_update> order_set;
    vector<double> medianSlidingWindow(vector<int>& nums, int k) {
        vector<double>ans;
        orderd_set<pair<int, int>>myset;
        for(int i = 0;i < nums.size();i++) {
            if(i < k)  myset.insert({nums[i], i});
            else {
                if(k&1)  ans.push_back((*myset.find_by_order(k/2)).first);
                else ans.push_back(((long long)(*myset.find_by_order(k/2)).first + (long long)(*myset.find_by_order(k/2-1)).first)*1.0/2);
                myset.insert({nums[i], i});
                myset.erase({nums[i-k], i-k});
            }
        }
        if(k&1)  ans.push_back((*myset.find_by_order(k/2)).first);
        else ans.push_back(((long long)(*myset.find_by_order(k/2)).first + (long long)(*myset.find_by_order(k/2-1)).first)*1.0/2);
        return ans;
    }
};

方法三:multiset+advance

multiset支持重复元素,advance支持查找第K个

class Solution {
public:
    vector<double> medianSlidingWindow(vector<int>& nums, int k) {
        vector<double>ans;
        multiset<double>myset;
        for(int i = 0;i < nums.size();i++) {
            if(i < k)  myset.insert(nums[i]);
            else {
                auto p = myset.begin();
                advance(p, k/2);
                if(k&1)  ans.push_back(*p);
                else ans.push_back(((long long)*p+ (long long)(*prev(p, 1)))*1.0/2);
                myset.insert(nums[i]);
                myset.erase(myset.find(nums[i-k]));
            }
        }
        auto p = myset.begin();
        advance(p, k/2);
        if(k&1)  ans.push_back(*p);  // 最后还有一次
        else ans.push_back(((long long)*p+ (long long)(*prev(p, 1)))*1.0/2);
        return ans;
    }
};

参考链接

  1. 《风 险 对 冲》:双堆对顶,大堆小堆同时维护,44ms
  2. C++福利!仅23行代码!!!O(nlogk) 我觉得我的题解能冲到前三吧!!!
  3. 【微扰理论】直接基于红黑树解决
  4. ACM_template/常用技巧/pbds库用法.md
posted @ 2022-01-13 18:02  Rogn  阅读(17)  评论(0编辑  收藏  举报