project1

一 extendible_hash_table

1 先认识可拓展哈希简介

可拓展哈希简介

译文如下:
译文链接

2 实现分析

2022 fall project1中的可拓展哈希只有一层, 只用实现目录扩张,不用实现收缩(2023 fall project1是三层, 有收缩)

1 初始状态

2 插入

只有插入难一点,下面都分析插入实现:

1 插入流程

注意:这是一个循环,循环条件是: 桶是否已满(也可以用递归)

2 怎么区分原桶和新分裂的桶?

利用位运算, 比如原桶的depth = 2(分裂前,即桶深度还没+1), mask = (1 << local_depth) = 100,

拿着目录索引与mask相与即可, 通过看与(假如桶分裂后)第一位相与的结果: 1->在新分裂的桶 ; 0->在原桶

auto mask = (1 << local_depth);
if ((index & mask) != 0) {
    bucket1->Insert(it->first, it->second);
} else {
    bucket0->Insert(it->first, it->second); //bucket0是原桶
}

3 特殊情况及处理

特殊情况: 原桶满了, 但是桶分裂后, 原桶的元素还是在原桶中


想一个问题, 图3->图4这种情况, 就是多个目录指针指向同一个桶, 一旦这个桶开始分裂, 那么我们要重新为多个目录项设定指向->循环解决

for (size_t i = dir_index & mask - 1; i < dir_.size(); i += mask) {
    if ((i & mask) != 0) {
        dir_[i] = bucket1;
    } else {
        dir_[i] = bucket0;
    }
 }
 // 循环初始值: i = dir_index & mask - 1,  "-"优先级大于"&",  目录索引与11(原桶深度为2,分裂前)相与, 即循环从分裂前的原桶开始
 // i += mask ;  从图5可以看出分裂导致目录扩容->索引相差100

4 桶的个数 != 目录.size()

目录扩容->增加一倍

global_depth_++;
      dir_.reserve(1 << global_depth_);

只有桶分裂才导致 -> 桶个数+1

num_buckets_++;

5 区分reserve(n)和resize(n)

容器.reserve(n) : 预分配n个内存空间, 但只是增加了容量, size不变

容器.resize(n) / 容器.resize(n , 默认初始化值) : 分配n个内存空间, 增加了容量, 也增加了size

3 上代码:

extendible_hash_table.h

//===----------------------------------------------------------------------===//
//
//                         BusTub
//
// extendible_hash_table.h
//
// Identification: src/include/container/hash/extendible_hash_table.h
//
// Copyright (c) 2015-2021, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//
/**
 * extendible_hash_table.h
 *
 * Implementation of in-memory hash table using extendible hashing
 */

#pragma once

#include <list>
#include <memory>
#include <mutex>  // NOLINT
#include <utility>
#include <vector>

#include "container/hash/hash_table.h"

namespace bustub {

/**
 * ExtendibleHashTable implements a hash table using the extendible hashing algorithm.
 * @tparam K key type
 * @tparam V value type
 */
template <typename K, typename V>
class ExtendibleHashTable : public HashTable<K, V> {
 public:
  /**
   *
   * TODO(P1): Add implementation
   *
   * @brief Create a new ExtendibleHashTable.
   * @param bucket_size: fixed size for each bucket
   */
  explicit ExtendibleHashTable(size_t bucket_size);

  /**
   * @brief Get the global depth of the directory.
   * @return The global depth of the directory.
   */
  auto GetGlobalDepth() const -> int;

  /**
   * @brief Get the local depth of the bucket that the given directory index points to.
   * @param dir_index The index in the directory.
   * @return The local depth of the bucket.
   */
  auto GetLocalDepth(int dir_index) const -> int;

  /**
   * @brief Get the number of buckets in the directory.
   * @return The number of buckets in the directory.
   */
  auto GetNumBuckets() const -> int;

  /**
   *
   * TODO(P1): Add implementation
   *
   * @brief Find the value associated with the given key.
   *
   * Use IndexOf(key) to find the directory index the key hashes to.
   *
   * @param key The key to be searched.
   * @param[out] value The value associated with the key.
   * @return True if the key is found, false otherwise.
   */
  auto Find(const K &key, V &value) -> bool override;

  /**
   *
   * TODO(P1): Add implementation
   *
   * @brief Insert the given key-value pair into the hash table.
   * If a key already exists, the value should be updated.
   * If the bucket is full and can't be inserted, do the following steps before retrying:
   *    1. If the local depth of the bucket is equal to the global depth,
   *        increment the global depth and double the size of the directory.
   *    2. Increment the local depth of the bucket.
   *    3. Split the bucket and redistribute directory pointers & the kv pairs in the bucket.
   *
   * @param key The key to be inserted.
   * @param value The value to be inserted.
   */
  void Insert(const K &key, const V &value) override;

  /**
   *
   * TODO(P1): Add implementation
   *
   * @brief Given the key, remove the corresponding key-value pair in the hash table.
   * Shrink & Combination is not required for this project
   * @param key The key to be deleted.
   * @return True if the key exists, false otherwise.
   */
  auto Remove(const K &key) -> bool override;

  /**
   * Bucket class for each hash table bucket that the directory points to.
   */
  class Bucket {
   public:
    explicit Bucket(size_t size, int depth = 0);

    /** @brief Check if a bucket is full. */
    inline auto IsFull() const -> bool { return list_.size() == size_; }

    /** @brief Get the local depth of the bucket. */
    inline auto GetDepth() const -> int { return depth_; }

    /** @brief Increment the local depth of a bucket. */
    inline void IncrementDepth() { depth_++; }

    inline auto GetItems() -> std::list<std::pair<K, V>> & { return list_; }

    /**
     *
     * TODO(P1): Add implementation
     *
     * @brief Find the value associated with the given key in the bucket.
     * @param key The key to be searched.
     * @param[out] value The value associated with the key.
     * @return True if the key is found, false otherwise.
     */
    auto Find(const K &key, V &value) -> bool;

    /**
     *
     * TODO(P1): Add implementation
     *
     * @brief Given the key, remove the corresponding key-value pair in the bucket.
     * @param key The key to be deleted.
     * @return True if the key exists, false otherwise.
     */
    auto Remove(const K &key) -> bool;

    /**
     *
     * TODO(P1): Add implementation
     *
     * @brief Insert the given key-value pair into the bucket.
     *      1. If a key already exists, the value should be updated.
     *      2. If the bucket is full, do nothing and return false.
     * @param key The key to be inserted.
     * @param value The value to be inserted.
     * @return True if the key-value pair is inserted, false otherwise.
     */
    auto Insert(const K &key, const V &value) -> bool;

   private:
    // TODO(student): You may add additional private members and helper functions
    size_t size_;
    int depth_;
    std::list<std::pair<K, V>> list_;
  };

 private:
  // TODO(student): You may add additional private members and helper functions and remove the ones
  // you don't need.

  int global_depth_{0};  // The global depth of the directory
  size_t bucket_size_;   // The size of a bucket
  int num_buckets_{1};   // The number of buckets in the hash table
  mutable std::mutex latch_;
  std::vector<std::shared_ptr<Bucket>> dir_;  // The directory of the hash table

  // The following functions are completely optional, you can delete them if you have your own ideas.

  /**
   * @brief Redistribute the kv pairs in a full bucket.
   * @param bucket The bucket to be redistributed.
   */
  auto RedistributeBucket(std::shared_ptr<Bucket> full_bucket, size_t expend_bucket_dir) -> void;

  /*****************************************************************
   * Must acquire latch_ first before calling the below functions. *
   *****************************************************************/

  /**
   * @brief For the given key, return the entry index in the directory where the key hashes to.
   * @param key The key to be hashed.
   * @return The entry index in the directory.
   */
  auto IndexOf(const K &key) -> size_t;

  auto GetGlobalDepthInternal() const -> int;
  auto GetLocalDepthInternal(int dir_index) const -> int;
  auto GetNumBucketsInternal() const -> int;
};

}  // namespace bustub

extendible_hash_table.cpp
实现代码不展示了

测试

在线测试做不了, 难受,不知道什么原因,各种能找到的方法都试了

只能把在线测试用例, 整到本地

测试用例如下:

/**
 * extendible_hash_test.cpp
 */

#include <random>
#include <thread>  // NOLINT

#include "container/hash/extendible_hash_table.h"
#include "gtest/gtest.h"

namespace bustub {

TEST(ExtendibleHashTableTest, InsertSplit) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);

  ASSERT_EQ(1, table->GetNumBuckets());
  ASSERT_EQ(0, table->GetLocalDepth(0));
  ASSERT_EQ(0, table->GetGlobalDepth());

  table->Insert(1, "a");
  table->Insert(2, "b");
  ASSERT_EQ(1, table->GetNumBuckets());
  ASSERT_EQ(0, table->GetLocalDepth(0));
  ASSERT_EQ(0, table->GetGlobalDepth());

  table->Insert(3, "c");  // first split
  ASSERT_EQ(2, table->GetNumBuckets());
  ASSERT_EQ(1, table->GetLocalDepth(0));
  ASSERT_EQ(1, table->GetLocalDepth(1));
  ASSERT_EQ(1, table->GetGlobalDepth());
  table->Insert(4, "d");

  table->Insert(5, "e");  // second split
  ASSERT_EQ(3, table->GetNumBuckets());
  ASSERT_EQ(1, table->GetLocalDepth(0));
  ASSERT_EQ(2, table->GetLocalDepth(1));
  ASSERT_EQ(1, table->GetLocalDepth(2));
  ASSERT_EQ(2, table->GetLocalDepth(3));
  ASSERT_EQ(2, table->GetGlobalDepth());

  table->Insert(6, "f");  // third split (global depth doesn't increase)
  ASSERT_EQ(4, table->GetNumBuckets());
  ASSERT_EQ(2, table->GetLocalDepth(0));
  ASSERT_EQ(2, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(2, table->GetLocalDepth(3));
  ASSERT_EQ(2, table->GetGlobalDepth());

  table->Insert(7, "g");
  table->Insert(8, "h");
  table->Insert(9, "i");
  ASSERT_EQ(5, table->GetNumBuckets());
  ASSERT_EQ(2, table->GetLocalDepth(0));
  ASSERT_EQ(3, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(2, table->GetLocalDepth(3));
  ASSERT_EQ(2, table->GetLocalDepth(0));
  ASSERT_EQ(3, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(2, table->GetLocalDepth(3));
  ASSERT_EQ(3, table->GetGlobalDepth());

  // find table
  std::string result;
  table->Find(9, result);
  ASSERT_EQ("i", result);
  table->Find(8, result);
  ASSERT_EQ("h", result);
  table->Find(2, result);
  ASSERT_EQ("b", result);
  ASSERT_EQ(false, table->Find(10, result));

  // delete table
  ASSERT_EQ(true, table->Remove(8));
  ASSERT_EQ(true, table->Remove(4));
  ASSERT_EQ(true, table->Remove(1));
  ASSERT_EQ(false, table->Remove(20));
}

TEST(ExtendibleHashTableTest, InsertMultipleSplit) {
  {
    auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);

    table->Insert(0, "0");
    table->Insert(1024, "1024");
    table->Insert(4, "4");  // this causes 3 splits

    ASSERT_EQ(4, table->GetNumBuckets());
    ASSERT_EQ(3, table->GetGlobalDepth());
    ASSERT_EQ(3, table->GetLocalDepth(0));
    ASSERT_EQ(1, table->GetLocalDepth(1));
    ASSERT_EQ(2, table->GetLocalDepth(2));
    ASSERT_EQ(1, table->GetLocalDepth(3));
    ASSERT_EQ(3, table->GetLocalDepth(4));
    ASSERT_EQ(1, table->GetLocalDepth(5));
    ASSERT_EQ(2, table->GetLocalDepth(6));
    ASSERT_EQ(1, table->GetLocalDepth(7));
  }
  {
    auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);

    table->Insert(0, "0");
    table->Insert(1024, "1024");
    table->Insert(16, "16");  // this causes 5 splits

    ASSERT_EQ(6, table->GetNumBuckets());
    ASSERT_EQ(5, table->GetGlobalDepth());
  }
}

TEST(ExtendibleHashTableTest, ConcurrentInsertFind) {
  const int num_runs = 50;
  const int num_threads = 5;

  // Run concurrent test multiple times to guarantee correctness.
  for (int run = 0; run < num_runs; run++) {
    auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);
    std::vector<std::thread> threads;
    threads.reserve(num_threads);
    for (int tid = 0; tid < num_threads; tid++) {
      threads.emplace_back([tid, &table]() {
        // for random number generation
        std::random_device rd;
        std::mt19937 gen(rd());
        std::uniform_int_distribution<> dis(0, num_threads * 10);

        for (int i = 0; i < 10; i++) {
          table->Insert(tid * 10 + i, std::to_string(tid * 10 + i));

          // Run Find on random keys to let Thread Sanitizer check for race conditions
          std::string val;
          table->Find(dis(gen), val);
        }
      });
    }
    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    for (int i = 0; i < num_threads * 10; i++) {
      std::string val;
      ASSERT_TRUE(table->Find(i, val));
      ASSERT_EQ(std::to_string(i), val);
    }
  }
}

TEST(ExtendibleHashTableTest, ConcurrentRemoveInsert) {
  const int num_threads = 5;
  const int num_runs = 50;

  for (int run = 0; run < num_runs; run++) {
    auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);
    std::vector<std::thread> threads;
    std::vector<std::string> values;
    values.reserve(100);
    for (int i = 0; i < 100; i++) {
      values.push_back(std::to_string(i));
    }
    for (unsigned int i = 0; i < values.size(); i++) {
      table->Insert(i, values[i]);
    }

    threads.reserve(num_threads);
    for (int tid = 0; tid < num_threads; tid++) {
      threads.emplace_back([tid, &table]() {
        for (int i = tid * 20; i < tid * 20 + 20; i++) {
          table->Remove(i);
          table->Insert(i + 400, std::to_string(i + 400));
        }
      });
    }
    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    std::string val;
    for (int i = 0; i < 100; i++) {
      ASSERT_FALSE(table->Find(i, val));
    }
    for (int i = 400; i < 500; i++) {
      ASSERT_TRUE(table->Find(i, val));
      ASSERT_EQ(std::to_string(i), val);
    }
  }
}

TEST(ExtendibleHashTableTest, InitiallyEmpty) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(2);

  ASSERT_EQ(0, table->GetGlobalDepth());
  ASSERT_EQ(0, table->GetLocalDepth(0));

  std::string result;
  ASSERT_FALSE(table->Find(1, result));
  ASSERT_FALSE(table->Find(0, result));
  ASSERT_FALSE(table->Find(-1, result));
}

TEST(ExtendibleHashTableTest, InsertAndFind) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(4);

  std::vector<std::string> val;
  for (int i = 0; i <= 100; i++) {
    val.push_back(std::to_string(i));
  }
  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  table->Insert(5, val[5]);
  table->Insert(10, val[10]);
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  table->Insert(20, val[20]);
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);

  std::string result;
  ASSERT_TRUE(table->Find(4, result));
  ASSERT_EQ(val[4], result);
  ASSERT_TRUE(table->Find(12, result));
  ASSERT_EQ(val[12], result);
  ASSERT_TRUE(table->Find(16, result));
  ASSERT_EQ(val[16], result);
  ASSERT_TRUE(table->Find(64, result));
  ASSERT_EQ(val[64], result);
  ASSERT_TRUE(table->Find(5, result));
  ASSERT_EQ(val[5], result);
  ASSERT_TRUE(table->Find(10, result));
  ASSERT_EQ(val[10], result);
  ASSERT_TRUE(table->Find(51, result));
  ASSERT_EQ(val[51], result);
  ASSERT_TRUE(table->Find(15, result));
  ASSERT_EQ(val[15], result);
  ASSERT_TRUE(table->Find(18, result));
  ASSERT_EQ(val[18], result);
  ASSERT_TRUE(table->Find(20, result));
  ASSERT_EQ(val[20], result);
  ASSERT_TRUE(table->Find(7, result));
  ASSERT_EQ(val[7], result);
  ASSERT_TRUE(table->Find(21, result));
  ASSERT_EQ(val[21], result);
  ASSERT_TRUE(table->Find(11, result));
  ASSERT_EQ(val[11], result);
  ASSERT_TRUE(table->Find(19, result));
  ASSERT_EQ(val[19], result);

  ASSERT_FALSE(table->Find(0, result));
  ASSERT_FALSE(table->Find(1, result));
  ASSERT_FALSE(table->Find(-1, result));
  ASSERT_FALSE(table->Find(2, result));
  ASSERT_FALSE(table->Find(3, result));
  for (int i = 65; i < 1000; i++) {
    ASSERT_FALSE(table->Find(i, result));
  }
}

TEST(ExtendibleHashTableTest, GlobalDepth) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(4);

  std::vector<std::string> val;
  for (int i = 0; i <= 100; i++) {
    val.push_back(std::to_string(i));
  }

  // Inserting 4 keys belong to the same bucket
  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  ASSERT_EQ(0, table->GetGlobalDepth());

  // Inserting into another bucket
  table->Insert(5, val[5]);
  ASSERT_EQ(1, table->GetGlobalDepth());

  // Inserting into filled bucket 0
  table->Insert(10, val[10]);
  ASSERT_EQ(2, table->GetGlobalDepth());

  // Inserting 3 keys into buckets with space
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  ASSERT_EQ(2, table->GetGlobalDepth());

  // Inserting into filled buckets with local depth = global depth
  table->Insert(20, val[20]);
  ASSERT_EQ(3, table->GetGlobalDepth());

  // Inserting 2 keys into filled buckets with local depth < global depth
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  ASSERT_EQ(3, table->GetGlobalDepth());

  // More Insertions(2 keys)
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);
  ASSERT_EQ(3, table->GetGlobalDepth());
}

TEST(ExtendibleHashTableTest, LocalDepth) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(4);

  std::vector<std::string> val;
  for (int i = 0; i <= 100; i++) {
    val.push_back(std::to_string(i));
  }

  // Inserting 4 keys belong to the same bucket
  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  ASSERT_EQ(0, table->GetLocalDepth(0));

  // Inserting into another bucket
  table->Insert(5, val[5]);
  ASSERT_EQ(1, table->GetLocalDepth(0));
  ASSERT_EQ(1, table->GetLocalDepth(1));

  // Inserting into filled bucket 0
  table->Insert(10, val[10]);
  ASSERT_EQ(2, table->GetLocalDepth(0));
  ASSERT_EQ(1, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(1, table->GetLocalDepth(3));

  // Inserting 3 keys into buckets with space
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  ASSERT_EQ(2, table->GetLocalDepth(0));
  ASSERT_EQ(1, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(1, table->GetLocalDepth(3));

  // Inserting into filled buckets with local depth = global depth
  table->Insert(20, val[20]);
  ASSERT_EQ(3, table->GetLocalDepth(0));
  ASSERT_EQ(1, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(1, table->GetLocalDepth(3));
  ASSERT_EQ(3, table->GetLocalDepth(4));
  ASSERT_EQ(1, table->GetLocalDepth(5));
  ASSERT_EQ(2, table->GetLocalDepth(6));
  ASSERT_EQ(1, table->GetLocalDepth(7));

  // Inserting 2 keys into filled buckets with local depth < global depth
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  ASSERT_EQ(3, table->GetLocalDepth(0));
  ASSERT_EQ(2, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(2, table->GetLocalDepth(3));
  ASSERT_EQ(3, table->GetLocalDepth(4));
  ASSERT_EQ(2, table->GetLocalDepth(5));
  ASSERT_EQ(2, table->GetLocalDepth(6));
  ASSERT_EQ(2, table->GetLocalDepth(7));

  // More Insertions(2 keys)
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);
  ASSERT_EQ(3, table->GetLocalDepth(0));
  ASSERT_EQ(2, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(3, table->GetLocalDepth(3));
  ASSERT_EQ(3, table->GetLocalDepth(4));
  ASSERT_EQ(2, table->GetLocalDepth(5));
  ASSERT_EQ(2, table->GetLocalDepth(6));
  ASSERT_EQ(3, table->GetLocalDepth(7));
}

TEST(ExtendibleHashTableTest, InsertAndReplace) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(4);

  std::vector<std::string> val;
  std::vector<std::string> newval;
  for (int i = 0; i <= 100; i++) {
    val.push_back(std::to_string(i));
    newval.push_back(std::to_string(i + 1));
  }

  std::string result;

  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  table->Insert(5, val[5]);
  table->Insert(10, val[10]);
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  table->Insert(20, val[20]);
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);
  table->Insert(4, newval[4]);
  table->Insert(12, newval[12]);
  table->Insert(16, newval[16]);
  table->Insert(64, newval[64]);
  table->Insert(5, newval[5]);
  table->Insert(10, newval[10]);
  table->Insert(51, newval[51]);
  table->Insert(15, newval[15]);
  table->Insert(18, newval[18]);
  table->Insert(20, newval[20]);
  table->Insert(7, newval[7]);
  table->Insert(21, newval[21]);
  table->Insert(11, newval[11]);
  table->Insert(19, newval[19]);

  ASSERT_TRUE(table->Find(4, result));
  ASSERT_EQ(newval[4], result);
  ASSERT_TRUE(table->Find(12, result));
  ASSERT_EQ(newval[12], result);
  ASSERT_TRUE(table->Find(16, result));
  ASSERT_EQ(newval[16], result);
  ASSERT_TRUE(table->Find(64, result));
  ASSERT_EQ(newval[64], result);
  ASSERT_TRUE(table->Find(5, result));
  ASSERT_EQ(newval[5], result);
  ASSERT_TRUE(table->Find(10, result));
  ASSERT_EQ(newval[10], result);
  ASSERT_TRUE(table->Find(51, result));
  ASSERT_EQ(newval[51], result);
  ASSERT_TRUE(table->Find(15, result));
  ASSERT_EQ(newval[15], result);
  ASSERT_TRUE(table->Find(18, result));
  ASSERT_EQ(newval[18], result);
  ASSERT_TRUE(table->Find(20, result));
  ASSERT_EQ(newval[20], result);
  ASSERT_TRUE(table->Find(7, result));
  ASSERT_EQ(newval[7], result);
  ASSERT_TRUE(table->Find(21, result));
  ASSERT_EQ(newval[21], result);
  ASSERT_TRUE(table->Find(11, result));
  ASSERT_EQ(newval[11], result);
  ASSERT_TRUE(table->Find(19, result));
  ASSERT_EQ(newval[19], result);
}

TEST(ExtendibleHashTableTest, Remove) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(4);

  std::vector<std::string> val;
  for (int i = 0; i <= 100; i++) {
    val.push_back(std::to_string(i));
  }

  std::string result;

  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  table->Insert(5, val[5]);
  table->Insert(10, val[10]);
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  table->Insert(20, val[20]);
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);

  ASSERT_TRUE(table->Remove(4));
  ASSERT_TRUE(table->Remove(12));
  ASSERT_TRUE(table->Remove(16));
  ASSERT_TRUE(table->Remove(64));
  ASSERT_TRUE(table->Remove(5));
  ASSERT_TRUE(table->Remove(10));

  ASSERT_FALSE(table->Find(4, result));
  ASSERT_FALSE(table->Find(12, result));
  ASSERT_FALSE(table->Find(16, result));
  ASSERT_FALSE(table->Find(64, result));
  ASSERT_FALSE(table->Find(5, result));
  ASSERT_FALSE(table->Find(10, result));
  ASSERT_TRUE(table->Find(51, result));
  ASSERT_EQ(val[51], result);
  ASSERT_TRUE(table->Find(15, result));
  ASSERT_EQ(val[15], result);
  ASSERT_TRUE(table->Find(18, result));
  ASSERT_EQ(val[18], result);
  ASSERT_TRUE(table->Find(20, result));
  ASSERT_EQ(val[20], result);
  ASSERT_TRUE(table->Find(7, result));
  ASSERT_EQ(val[7], result);
  ASSERT_TRUE(table->Find(21, result));
  ASSERT_EQ(val[21], result);
  ASSERT_TRUE(table->Find(11, result));
  ASSERT_EQ(val[11], result);
  ASSERT_TRUE(table->Find(19, result));
  ASSERT_EQ(val[19], result);

  ASSERT_TRUE(table->Remove(51));
  ASSERT_TRUE(table->Remove(15));
  ASSERT_TRUE(table->Remove(18));

  ASSERT_FALSE(table->Remove(5));
  ASSERT_FALSE(table->Remove(10));
  ASSERT_FALSE(table->Remove(51));
  ASSERT_FALSE(table->Remove(15));
  ASSERT_FALSE(table->Remove(18));

  ASSERT_TRUE(table->Remove(20));
  ASSERT_TRUE(table->Remove(7));
  ASSERT_TRUE(table->Remove(21));
  ASSERT_TRUE(table->Remove(11));
  ASSERT_TRUE(table->Remove(19));

  for (int i = 0; i < 1000; i++) {
    ASSERT_FALSE(table->Find(i, result));
  }

  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  table->Insert(5, val[5]);
  table->Insert(10, val[10]);
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  table->Insert(20, val[20]);
  table->Insert(7, val[7]);
  table->Insert(21, val[21]);
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);

  ASSERT_TRUE(table->Find(4, result));
  ASSERT_EQ(val[4], result);
  ASSERT_TRUE(table->Find(12, result));
  ASSERT_EQ(val[12], result);
  ASSERT_TRUE(table->Find(16, result));
  ASSERT_EQ(val[16], result);
  ASSERT_TRUE(table->Find(64, result));
  ASSERT_EQ(val[64], result);
  ASSERT_TRUE(table->Find(5, result));
  ASSERT_EQ(val[5], result);
  ASSERT_TRUE(table->Find(10, result));
  ASSERT_EQ(val[10], result);
  ASSERT_TRUE(table->Find(51, result));
  ASSERT_EQ(val[51], result);
  ASSERT_TRUE(table->Find(15, result));
  ASSERT_EQ(val[15], result);
  ASSERT_TRUE(table->Find(18, result));
  ASSERT_EQ(val[18], result);
  ASSERT_TRUE(table->Find(20, result));
  ASSERT_EQ(val[20], result);
  ASSERT_TRUE(table->Find(7, result));
  ASSERT_EQ(val[7], result);
  ASSERT_TRUE(table->Find(21, result));
  ASSERT_EQ(val[21], result);
  ASSERT_TRUE(table->Find(11, result));
  ASSERT_EQ(val[11], result);
  ASSERT_TRUE(table->Find(19, result));
  ASSERT_EQ(val[19], result);

  ASSERT_EQ(3, table->GetLocalDepth(0));
  ASSERT_EQ(2, table->GetLocalDepth(1));
  ASSERT_EQ(2, table->GetLocalDepth(2));
  ASSERT_EQ(3, table->GetLocalDepth(3));
  ASSERT_EQ(3, table->GetLocalDepth(4));
  ASSERT_EQ(2, table->GetLocalDepth(5));
  ASSERT_EQ(2, table->GetLocalDepth(6));
  ASSERT_EQ(3, table->GetLocalDepth(7));
  ASSERT_EQ(3, table->GetGlobalDepth());
}

TEST(ExtendibleHashTableTest, GetNumBuckets) {
  auto table = std::make_unique<ExtendibleHashTable<int, int>>(4);

  std::vector<int> val;
  for (int i = 0; i <= 100; i++) {
    val.push_back(i);
  }

  // Inserting 4 keys belong to the same bucket
  table->Insert(4, val[4]);
  table->Insert(12, val[12]);
  table->Insert(16, val[16]);
  table->Insert(64, val[64]);
  ASSERT_EQ(1, table->GetNumBuckets());
  // Inserting into another bucket

  table->Insert(31, val[31]);
  ASSERT_EQ(2, table->GetNumBuckets());

  // Inserting into filled bucket 0
  table->Insert(10, val[10]);
  ASSERT_EQ(3, table->GetNumBuckets());

  // Inserting 3 keys into buckets with space
  table->Insert(51, val[51]);
  table->Insert(15, val[15]);
  table->Insert(18, val[18]);
  ASSERT_EQ(3, table->GetNumBuckets());

  // Inserting into filled buckets with local depth = global depth
  table->Insert(20, val[20]);
  ASSERT_EQ(4, table->GetNumBuckets());

  // Inserting 2 keys into filled buckets with local depth < global depth
  // Adding a new bucket and inserting will still be full so
  // will test if they add another bucket again.
  table->Insert(7, val[7]);
  table->Insert(23, val[21]);
  ASSERT_EQ(6, table->GetNumBuckets());

  // More Insertions(2 keys)
  table->Insert(11, val[11]);
  table->Insert(19, val[19]);
  ASSERT_EQ(6, table->GetNumBuckets());
}

TEST(ExtendibleHashTableTest, IntegratedTest) {
  auto table = std::make_unique<ExtendibleHashTable<int, std::string>>(7);

  std::vector<std::string> val;

  for (int i = 0; i <= 2000; i++) {
    val.push_back(std::to_string(i));
  }

  for (int i = 1; i <= 1000; i++) {
    table->Insert(i, val[i]);
  }

  int global_depth = table->GetGlobalDepth();
  ASSERT_EQ(8, global_depth);

  for (int i = 1; i <= 1000; i++) {
    std::string result;
    ASSERT_TRUE(table->Find(i, result));
    ASSERT_EQ(val[i], result);
  }

  for (int i = 1; i <= 500; i++) {
    ASSERT_TRUE(table->Remove(i));
  }

  for (int i = 1; i <= 500; i++) {
    std::string result;
    ASSERT_FALSE(table->Find(i, result));
    ASSERT_FALSE(table->Remove(i));
  }

  for (int i = 501; i <= 1000; i++) {
    std::string result;
    ASSERT_TRUE(table->Find(i, result));
    ASSERT_EQ(val[i], result);
  }

  for (int i = 1; i <= 2000; i++) {
    table->Insert(i, val[i]);
  }

  global_depth = table->GetGlobalDepth();
  ASSERT_EQ(9, global_depth);

  for (int i = 1; i <= 2000; i++) {
    std::string result;
    ASSERT_TRUE(table->Find(i, result));
    ASSERT_EQ(val[i], result);
  }

  for (int i = 1; i <= 2000; i++) {
    ASSERT_TRUE(table->Remove(i));
  }

  for (int i = 1; i <= 2000; i++) {
    std::string result;
    ASSERT_FALSE(table->Find(i, result));
    ASSERT_FALSE(table->Remove(i));
  }
}
}  // namespace bustub

结果展示:

二 lru_k_replacer

1 要求:

驱逐的时候先驱逐

  • 不满k次的(有多个不满k次的就驱逐有最早timestamp的)
  • 然后驱逐满k次的, 没有必要计算当前时间戳与倒数第K次时间戳的差值, 只要每次驱逐第K次时间戳最小的即可

2 实现思路:

1 历史记录

  // <frame_id, <timestamp列表>>, timestamp列表用头插法, 即timestamp大的在最前面, 并且只保存k个
  std::shared_ptr<std::unordered_map<frame_id_t, std::list<size_t>>> history_map_;
  // 用共享指针可不管内存释放(在析构的时候)
  // 用std::unordered_map是为了方便查找

用两个容器分别存:

  • 满K次的

    // <第K次timestamp, frame_id>, 只存放可驱逐的
      // set会自动按照字段顺序排序(先按照timestamp排序, 如果timestamp相同再按照frame_id排序)
      using fid_time_pair = std::pair<size_t, frame_id_t>;
      std::shared_ptr<std::set<fid_time_pair>> ge_k_set_;  // 按timestamp从小到大排列
      std::shared_ptr<std::unordered_map<frame_id_t, std::set<fid_time_pair>::iterator>> ge_k_map_iter_;  // 存迭代器便于用frame_id找到 ge_k_set_中对应的具体的std::pair<size_t, frame_id_t>
      // 并且 auto it =  ge_k_set_->insert() / ge_k_set_->emplace()返回值类型为:
      // std::pair<iterator, bool> , 即it->first
    
  • 不满K次的

// <不满k次中最早的timestamp, frame_id>, 只存放可驱逐的
  std::shared_ptr<std::set<fid_time_pair>> le_k_set_;
  std::shared_ptr<std::unordered_map<frame_id_t, std::set<fid_time_pair>::iterator>> le_k_map_iter_;

写代码时以访问次数展开

std::shared_ptr<std::unordered_map<frame_id_t, size_t>> count_map_;

下列三者只有在驱逐或删除的时候, 才删除对应frame_id的记录, SetEvictable时不要改

std::shared_ptr<std::unordered_map<frame_id_t, std::list<size_t>>> history_map_;
std::shared_ptr<std::unordered_map<frame_id_t, bool>> evictable_map_;
std::shared_ptr<std::unordered_map<frame_id_t, size_t>> count_map_;

3 上代码:

lru_k_replacer.h

//===----------------------------------------------------------------------===//
//
//                         BusTub
//
// lru_k_replacer.h
//
// Identification: src/include/buffer/lru_k_replacer.h
//
// Copyright (c) 2015-2022, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once

#include <cstddef>
#include <list>
#include <memory>
#include <mutex>  // NOLINT
#include <set>
#include <unordered_map>
#include <utility>

#include "common/config.h"
#include "common/macros.h"

namespace bustub {

/**
 * LRUKReplacer implements the LRU-k replacement policy.
 *
 * The LRU-k algorithm evicts a frame whose backward k-distance is maximum
 * of all frames. Backward k-distance is computed as the difference in time between
 * current timestamp and the timestamp of kth previous access.
 *
 * A frame with less than k historical references is given
 * +inf as its backward k-distance. When multiple frames have +inf backward k-distance,
 * classical LRU algorithm is used to choose victim.
 */
class LRUKReplacer {
 public:
  /**
   *
   * TODO(P1): Add implementation
   *
   * @brief a new LRUKReplacer.
   * @param num_frames the maximum number of frames the LRUReplacer will be required to store
   */
  explicit LRUKReplacer(size_t num_frames, size_t k);

  DISALLOW_COPY_AND_MOVE(LRUKReplacer);

  /**
   * TODO(P1): Add implementation
   *
   * @brief Destroys the LRUReplacer.
   */
  ~LRUKReplacer() = default;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Find the frame with largest backward k-distance and evict that frame. Only frames
   * that are marked as 'evictable' are candidates for eviction.
   *
   * A frame with less than k historical references is given +inf as its backward k-distance.
   * If multiple frames have inf backward k-distance, then evict the frame with the earliest
   * timestamp overall.
   *
   * Successful eviction of a frame should decrement the size of replacer and remove the frame's
   * access history.
   *
   * @param[out] frame_id id of frame that is evicted.
   * @return true if a frame is evicted successfully, false if no frames can be evicted.
   */
  auto Evict(frame_id_t *frame_id) -> bool;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Record the event that the given frame id is accessed at current timestamp.
   * Create a new entry for access history if frame id has not been seen before.
   *
   * If frame id is invalid (ie. larger than replacer_size_), throw an exception. You can
   * also use BUSTUB_ASSERT to abort the process if frame id is invalid.
   *
   * @param frame_id id of frame that received a new access.
   */
  void RecordAccess(frame_id_t frame_id);

  /**
   * TODO(P1): Add implementation
   *
   * @brief Toggle whether a frame is evictable or non-evictable. This function also
   * controls replacer's size. Note that size is equal to number of evictable entries.
   *
   * If a frame was previously evictable and is to be set to non-evictable, then size should
   * decrement. If a frame was previously non-evictable and is to be set to evictable,
   * then size should increment.
   *
   * If frame id is invalid, throw an exception or abort the process.
   *
   * For other scenarios, this function should terminate without modifying anything.
   *
   * @param frame_id id of frame whose 'evictable' status will be modified
   * @param set_evictable whether the given frame is evictable or not
   */
  void SetEvictable(frame_id_t frame_id, bool set_evictable);

  /**
   * TODO(P1): Add implementation
   *
   * @brief Remove an evictable frame from replacer, along with its access history.
   * This function should also decrement replacer's size if removal is successful.
   *
   * Note that this is different from evicting a frame, which always remove the frame
   * with largest backward k-distance. This function removes specified frame id,
   * no matter what its backward k-distance is.
   *
   * If Remove is called on a non-evictable frame, throw an exception or abort the
   * process.
   *
   * If specified frame is not found, directly return from this function.
   *
   * @param frame_id id of frame to be removed
   */
  void Remove(frame_id_t frame_id);

  /**
   * TODO(P1): Add implementation
   *
   * @brief Return replacer's size, which tracks the number of evictable frames.
   *
   * @return size_t
   */
  auto Size() -> size_t;

 private:
  // TODO(student): implement me! You can replace these member variables as you like.
  // Remove maybe_unused if you start using them.
  size_t current_timestamp_{0};
  size_t curr_size_{0};
  size_t replacer_size_;
  size_t k_;
  std::mutex latch_;

  // <第K次timestamp, frame_id>, 只存放可驱逐的
  // set会自动按照字段顺序排序(先按照timestamp排序, 如果timestamp相同再按照frame_id排序)
  using fid_time_pair = std::pair<size_t, frame_id_t>;
  std::shared_ptr<std::set<fid_time_pair>> ge_k_set_;
  std::shared_ptr<std::unordered_map<frame_id_t, std::set<fid_time_pair>::iterator>> ge_k_map_iter_;
  // <不满k次中最早的timestamp, frame_id>, 只存放可驱逐的
  std::shared_ptr<std::set<fid_time_pair>> le_k_set_;
  std::shared_ptr<std::unordered_map<frame_id_t, std::set<fid_time_pair>::iterator>> le_k_map_iter_;
  // <frame_id, <timestamp列表>>, timestamp列表用头插法, 即timestamp大的在最前面, 并且只保存k个
  std::shared_ptr<std::unordered_map<frame_id_t, std::list<size_t>>> history_map_;

  std::shared_ptr<std::unordered_map<frame_id_t, bool>> evictable_map_;
  std::shared_ptr<std::unordered_map<frame_id_t, size_t>> count_map_;
};

}  // namespace bustub

lru_k_replacer.cpp
实现代码删了,不展示了

3 测试:

还是把在线测试用例整到本地

测试用例如下

/**
 * lru_k_replacer_test.cpp
 */

#include <algorithm>
#include <cstdio>
#include <memory>
#include <random>
#include <set>
#include <thread>  // NOLINT
#include <vector>

#include "buffer/lru_k_replacer.h"
#include "gtest/gtest.h"

namespace bustub {

TEST(LRUKReplacerTest, SampleTest) {
  LRUKReplacer lru_replacer(7, 2);

  // Scenario: add six elements to the replacer. We have [1,2,3,4,5]. Frame 6 is non-evictable.
  lru_replacer.RecordAccess(1);
  lru_replacer.RecordAccess(2);
  lru_replacer.RecordAccess(3);
  lru_replacer.RecordAccess(4);
  lru_replacer.RecordAccess(5);
  lru_replacer.RecordAccess(6);
  lru_replacer.SetEvictable(1, true);
  lru_replacer.SetEvictable(2, true);
  lru_replacer.SetEvictable(3, true);
  lru_replacer.SetEvictable(4, true);
  lru_replacer.SetEvictable(5, true);
  lru_replacer.SetEvictable(6, false);
  ASSERT_EQ(5, lru_replacer.Size());

  // Scenario: Insert access history for frame 1. Now frame 1 has two access histories.
  // All other frames have max backward k-dist. The order of eviction is [2,3,4,5,1].
  lru_replacer.RecordAccess(1);

  // Scenario: Evict three pages from the replacer. Elements with max k-distance should be popped
  // first based on LRU.
  int value;
  lru_replacer.Evict(&value);
  ASSERT_EQ(2, value);
  lru_replacer.Evict(&value);
  ASSERT_EQ(3, value);
  lru_replacer.Evict(&value);
  ASSERT_EQ(4, value);
  ASSERT_EQ(2, lru_replacer.Size());

  // Scenario: Now replacer has frames [5,1].
  // Insert new frames 3, 4, and update access history for 5. We should end with [3,1,5,4]
  lru_replacer.RecordAccess(3);
  lru_replacer.RecordAccess(4);
  lru_replacer.RecordAccess(5);
  lru_replacer.RecordAccess(4);
  lru_replacer.SetEvictable(3, true);
  lru_replacer.SetEvictable(4, true);
  ASSERT_EQ(4, lru_replacer.Size());

  // Scenario: continue looking for victims. We expect 3 to be evicted next.
  lru_replacer.Evict(&value);
  ASSERT_EQ(3, value);
  ASSERT_EQ(3, lru_replacer.Size());

  // Set 6 to be evictable. 6 Should be evicted next since it has max backward k-dist.
  lru_replacer.SetEvictable(6, true);
  ASSERT_EQ(4, lru_replacer.Size());
  lru_replacer.Evict(&value);
  ASSERT_EQ(6, value);
  ASSERT_EQ(3, lru_replacer.Size());

  // Now we have [1,5,4]. Continue looking for victims.
  lru_replacer.SetEvictable(1, false);
  ASSERT_EQ(2, lru_replacer.Size());
  ASSERT_EQ(true, lru_replacer.Evict(&value));
  ASSERT_EQ(5, value);
  ASSERT_EQ(1, lru_replacer.Size());

  // Update access history for 1. Now we have [4,1]. Next victim is 4.
  lru_replacer.RecordAccess(1);
  lru_replacer.RecordAccess(1);
  lru_replacer.SetEvictable(1, true);
  ASSERT_EQ(2, lru_replacer.Size());
  ASSERT_EQ(true, lru_replacer.Evict(&value));
  ASSERT_EQ(value, 4);

  ASSERT_EQ(1, lru_replacer.Size());
  lru_replacer.Evict(&value);
  ASSERT_EQ(value, 1);
  ASSERT_EQ(0, lru_replacer.Size());

  // These operations should not modify size
  ASSERT_EQ(false, lru_replacer.Evict(&value));
  ASSERT_EQ(0, lru_replacer.Size());
  lru_replacer.Remove(1);
  ASSERT_EQ(0, lru_replacer.Size());
}

TEST(LRUKReplacerTest, Evict) {
  {
    // Empty and try removing
    LRUKReplacer lru_replacer(10, 2);
    int result;
    auto success = lru_replacer.Evict(&result);
    ASSERT_EQ(success, false) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // Can only evict element if evictable=true
    int result;
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(2);
    lru_replacer.SetEvictable(2, false);
    ASSERT_EQ(false, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    lru_replacer.SetEvictable(2, true);
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(2, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // Elements with less than k history should have max backward k-dist and get evicted first based on LRU
    LRUKReplacer lru_replacer(10, 3);
    int result;
    // 1 has three access histories, where as 2 has two access histories
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);

    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(2, result) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // Select element with largest backward k-dist to evict
    // Evicted page should not maintain previous history
    LRUKReplacer lru_replacer(10, 3);
    int result;
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);
    lru_replacer.SetEvictable(3, true);

    // Should evict in this order
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(3, result) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(2, result) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // Evicted page should not maintain previous history
    LRUKReplacer lru_replacer(10, 3);
    int result;
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);

    // At this point, page 1 should be evicted since it has higher backward k distance
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";

    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(1, true);

    // 1 should still be evicted since it has max backward k distance
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    LRUKReplacer lru_replacer(10, 3);
    int result;
    lru_replacer.RecordAccess(1);  // ts=0
    lru_replacer.RecordAccess(2);  // ts=1
    lru_replacer.RecordAccess(3);  // ts=2
    lru_replacer.RecordAccess(4);  // ts=3
    lru_replacer.RecordAccess(1);  // ts=4
    lru_replacer.RecordAccess(2);  // ts=5
    lru_replacer.RecordAccess(3);  // ts=6
    lru_replacer.RecordAccess(1);  // ts=7
    lru_replacer.RecordAccess(2);  // ts=8
    lru_replacer.SetEvictable(1, true);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(3, true);
    lru_replacer.SetEvictable(4, true);

    // Max backward k distance follow lru
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(3, result) << "Check your return value behavior for LRUKReplacer::Evict";
    lru_replacer.RecordAccess(4);  // ts=9
    lru_replacer.RecordAccess(4);  // ts=10

    // Now 1 has largest backward k distance, followed by 2 and 4
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(2, result) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(4, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // New unused page with max backward k-dist should be evicted first
    LRUKReplacer lru_replacer(10, 2);
    int result;
    lru_replacer.RecordAccess(1);  // ts=0
    lru_replacer.RecordAccess(2);  // ts=1
    lru_replacer.RecordAccess(3);  // ts=2
    lru_replacer.RecordAccess(4);  // ts=3
    lru_replacer.RecordAccess(1);  // ts=4
    lru_replacer.RecordAccess(2);  // ts=5
    lru_replacer.RecordAccess(3);  // ts=6
    lru_replacer.RecordAccess(4);  // ts=7

    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);

    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(1, result) << "Check your return value behavior for LRUKReplacer::Evict";

    lru_replacer.RecordAccess(5);  // ts=9
    lru_replacer.SetEvictable(5, true);
    ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
    ASSERT_EQ(5, result) << "Check your return value behavior for LRUKReplacer::Evict";
  }

  {
    // 1/4 page has one access history, 1/4 has two accesses, 1/4 has three, and 1/4 has four
    LRUKReplacer lru_replacer(1000, 3);
    int result;
    for (int j = 0; j < 4; ++j) {
      for (int i = j * 250; i < 1000; ++i) {
        lru_replacer.RecordAccess(i);
        lru_replacer.SetEvictable(i, true);
      }
    }
    ASSERT_EQ(1000, lru_replacer.Size());

    // Set second 1/4 to be non-evictable
    for (int i = 250; i < 500; ++i) {
      lru_replacer.SetEvictable(i, false);
    }
    ASSERT_EQ(750, lru_replacer.Size());

    // Remove first 100 elements
    for (int i = 0; i < 100; ++i) {
      lru_replacer.Remove(i);
    }
    ASSERT_EQ(650, lru_replacer.Size());

    // Try to evict some elements
    for (int i = 100; i < 600; ++i) {
      if (i < 250 || i >= 500) {
        ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
        ASSERT_EQ(i, result) << "Check your return value behavior for LRUKReplacer::Evict";
      }
    }
    ASSERT_EQ(400, lru_replacer.Size());

    // Add second 1/4 elements back and modify access history for the last 150 elements of third 1/4 elements.
    for (int i = 250; i < 500; ++i) {
      lru_replacer.SetEvictable(i, true);
    }
    ASSERT_EQ(650, lru_replacer.Size());
    for (int i = 600; i < 750; ++i) {
      lru_replacer.RecordAccess(i);
      lru_replacer.RecordAccess(i);
    }
    ASSERT_EQ(650, lru_replacer.Size());

    // We expect the following eviction pattern
    for (int i = 250; i < 500; ++i) {
      ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
      ASSERT_EQ(i, result) << "Check your return value behavior for LRUKReplacer::Evict";
    }
    ASSERT_EQ(400, lru_replacer.Size());
    for (int i = 750; i < 1000; ++i) {
      ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
      ASSERT_EQ(i, result) << "Check your return value behavior for LRUKReplacer::Evict";
    }
    ASSERT_EQ(150, lru_replacer.Size());
    for (int i = 600; i < 750; ++i) {
      ASSERT_EQ(true, lru_replacer.Evict(&result)) << "Check your return value behavior for LRUKReplacer::Evict";
      ASSERT_EQ(i, result) << "Check your return value behavior for LRUKReplacer::Evict";
    }
    ASSERT_EQ(0, lru_replacer.Size());
  }
}

TEST(LRUKReplacerTest, Size) {
  {
    // Size is increased/decreased if SetEvictable's argument is different from node state
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(1, true);
    ASSERT_EQ(1, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.SetEvictable(1, true);
    ASSERT_EQ(1, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.SetEvictable(1, false);
    ASSERT_EQ(0, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.SetEvictable(1, false);
    ASSERT_EQ(0, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
  }

  {
    // Insert new history. Calling SetEvictable = false should not modify Size.
    // Calling SetEvictable = true should increase Size.
    // Size should only be called when SetEvictable is called for every inserted node.
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.SetEvictable(1, false);
    lru_replacer.SetEvictable(2, false);
    lru_replacer.SetEvictable(3, false);
    ASSERT_EQ(0, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";

    LRUKReplacer lru_replacer2(10, 2);
    lru_replacer2.RecordAccess(1);
    lru_replacer2.RecordAccess(2);
    lru_replacer2.RecordAccess(3);
    lru_replacer2.SetEvictable(1, true);
    lru_replacer2.SetEvictable(2, true);
    lru_replacer2.SetEvictable(3, true);
    ASSERT_EQ(3, lru_replacer2.Size()) << "Check your return value for LRUKReplacer::Size";
  }

  // Size depends on how many nodes have evictable=true
  {
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.SetEvictable(1, false);
    lru_replacer.SetEvictable(2, false);
    lru_replacer.SetEvictable(3, false);
    lru_replacer.SetEvictable(4, false);
    ASSERT_EQ(0, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(3);
    lru_replacer.RecordAccess(4);
    lru_replacer.SetEvictable(1, true);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);
    lru_replacer.SetEvictable(2, true);
    ASSERT_EQ(2, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    // Evicting a page should decrement Size
    lru_replacer.RecordAccess(4);
  }

  {
    // Remove a page to decrement its size
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(1, true);
    lru_replacer.RecordAccess(2);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.RecordAccess(3);
    lru_replacer.SetEvictable(3, true);
    ASSERT_EQ(3, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.Remove(1);
    ASSERT_EQ(2, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    lru_replacer.Remove(2);
    ASSERT_EQ(1, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
  }

  {
    // Victiming a page should decrement its size
    LRUKReplacer lru_replacer(10, 3);
    int result;
    // 1 has three access histories, where as 2 only has two access histories
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(1);
    lru_replacer.RecordAccess(2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(1, true);
    ASSERT_EQ(2, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";

    ASSERT_EQ(true, lru_replacer.Evict(&result));
    ASSERT_EQ(2, result);
    ASSERT_EQ(1, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
    ASSERT_EQ(true, lru_replacer.Evict(&result));
    ASSERT_EQ(1, result);
    ASSERT_EQ(0, lru_replacer.Size()) << "Check your return value for LRUKReplacer::Size";
  }

  {
    LRUKReplacer lru_replacer(10, 2);
    lru_replacer.RecordAccess(1);
    lru_replacer.SetEvictable(1, true);
    lru_replacer.RecordAccess(2);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.RecordAccess(3);
    lru_replacer.SetEvictable(3, true);
    ASSERT_EQ(3, lru_replacer.Size());
    lru_replacer.Remove(1);
    ASSERT_EQ(2, lru_replacer.Size());

    lru_replacer.SetEvictable(1, true);
    lru_replacer.SetEvictable(2, true);
    lru_replacer.SetEvictable(3, true);
    lru_replacer.Remove(2);
    ASSERT_EQ(1, lru_replacer.Size());

    // Delete non existent page should do nothing
    lru_replacer.Remove(1);
    lru_replacer.Remove(4);
    ASSERT_EQ(1, lru_replacer.Size());
  }
}

TEST(LRUKReplacerTest, ConcurrencyTest) {  // NOLINT
  // 1/4 page has one access history, 1/4 has two accesses, 1/4 has three, and 1/4 has four
  LRUKReplacer lru_replacer(1000, 3);
  std::vector<std::thread> threads;

  auto record_access_task = [&](int i, bool if_set_evict, bool evictable) {
    lru_replacer.RecordAccess(i);
    if (if_set_evict) {
      lru_replacer.SetEvictable(i, evictable);
    }
  };
  auto remove_task = [&](int i) { lru_replacer.Remove(i); };
  auto set_evictable_task = [&](int i, bool evictable) { lru_replacer.SetEvictable(i, evictable); };

  auto record_access_thread = [&](int from_i, int to_i, bool if_set_evict, bool evictable) {
    for (auto i = from_i; i < to_i; i++) {
      record_access_task(i, if_set_evict, evictable);
    }
  };

  auto remove_task_thread = [&](int from_i, int to_i) {
    for (auto i = from_i; i < to_i; i++) {
      remove_task(i);
    }
  };

  auto set_evictable_thread = [&](int from_i, int to_i, bool evictable) {
    for (auto i = from_i; i < to_i; i++) {
      set_evictable_task(i, evictable);
    }
  };

  // Record first 1000 accesses. Set all frames to be evictable.
  for (int i = 0; i < 1000; i += 100) {
    threads.emplace_back(std::thread{record_access_thread, i, i + 100, true, true});
  }
  for (auto &thread : threads) {
    thread.join();
  }
  threads.clear();
  ASSERT_EQ(1000, lru_replacer.Size());

  // Remove frame id 250-500, set some keys to be non-evictable, and insert accesses concurrently
  threads.emplace_back(std::thread{record_access_thread, 250, 1000, true, true});
  threads.emplace_back(std::thread{record_access_thread, 500, 1000, true, true});
  threads.emplace_back(std::thread{record_access_thread, 750, 1000, true, true});
  threads.emplace_back(std::thread{remove_task_thread, 250, 400});
  threads.emplace_back(std::thread{remove_task_thread, 400, 500});
  threads.emplace_back(std::thread{set_evictable_thread, 0, 150, false});
  threads.emplace_back(std::thread{set_evictable_thread, 150, 250, false});

  for (auto &thread : threads) {
    thread.join();
  }
  threads.clear();

  // Call remove again to ensure all items between 250, 500 have been removed.
  threads.emplace_back(std::thread{remove_task_thread, 250, 400});
  threads.emplace_back(std::thread{remove_task_thread, 400, 500});

  for (auto &thread : threads) {
    thread.join();
  }
  threads.clear();

  ASSERT_EQ(500, lru_replacer.Size());

  std::mutex mutex;
  std::vector<int> evicted_elements;

  auto evict_task = [&](bool success) -> int {
    int evicted_value;
    BUSTUB_ASSERT(success == lru_replacer.Evict(&evicted_value), "evict not successful!");
    return evicted_value;
  };

  auto evict_task_thread = [&](int from_i, int to_i, bool success) {
    std::vector<int> local_evicted_elements;

    for (auto i = from_i; i < to_i; i++) {
      local_evicted_elements.push_back(evict_task(success));
    }
    {
      std::scoped_lock lock(mutex);
      for (const auto &evicted_value : local_evicted_elements) {
        evicted_elements.push_back(evicted_value);
      }
    }
  };

  // Remove elements, append history, and evict concurrently
  // Some of these frames are non-evictable and should not be removed.
  threads.emplace_back(std::thread{remove_task_thread, 250, 400});
  threads.emplace_back(std::thread{remove_task_thread, 400, 500});
  threads.emplace_back(std::thread{record_access_thread, 500, 700, false, true});
  threads.emplace_back(std::thread{record_access_thread, 700, 1000, false, true});
  threads.emplace_back(std::thread{evict_task_thread, 500, 600, true});
  threads.emplace_back(std::thread{evict_task_thread, 600, 800, true});
  threads.emplace_back(std::thread{evict_task_thread, 800, 1000, true});

  for (auto &thread : threads) {
    thread.join();
  }
  threads.clear();
  ASSERT_EQ(0, lru_replacer.Size());
  ASSERT_EQ(evicted_elements.size(), 500);

  std::sort(evicted_elements.begin(), evicted_elements.end());
  for (int i = 500; i < 1000; ++i) {
    ASSERT_EQ(i, evicted_elements[i - 500]);
  }
}
}  // namespace bustub

结果展示:

三 buffer_pool_manager_instance

1 理解

难度主要在理解上

下图有三种方式由物理page_id, 先获取frame_id -> 然后获取内存中具体page页指针

buffer_pool就是实现磁盘内page_id到内存池中page_id的映射,

  • 给一个磁盘内page_id, 你要先找到它在内存池中的位置frame_id -> 通过快表(避免低效遍历数组)
 /** Page table for keeping track of buffer pool pages. */
  ExtendibleHashTable<page_id_t, frame_id_t> *page_table_; //就是第一个小实验 可拓展哈希实现该映射
  • 内存池满了你要找一个位置(帧),把它的页驱逐
/** Replacer to find unpinned pages for replacement. */
  LRUKReplacer *replacer_; //对frame_id的跟踪

1 frame vs page

frame:只是一个说法,实际上并不存在, 你可以把它理解为物理page在内存池中存放的位置(下标)

buffer pool 无非就是一个内存数组

/** Array of buffer pool pages. */
  Page *pages_; // frame_id 就是该内存数组的下标
  
  // 在lru_k_replacer中实现的Evict
  // auto LRUKReplacer::Evict(frame_id_t *frame_id) -> bool
  // 实际上啥也没有驱逐, 只是得到了一个可以驱逐当前页的frame_id, 即在内存数组中的下标
  // 通过该下标pages_[frame_id]去访问具体的页, 去修改具体页的信息,来实现真正的驱逐页(就是把业内信息写入磁盘,然后初始化业内信息)

2 Page类型成员变量如下:

private:
  /** Zeroes out the data that is held within the page. */
  inline void ResetMemory() { memset(data_, OFFSET_PAGE_START, BUSTUB_PAGE_SIZE); }

  /** The actual data that is stored within a page. */
  char data_[BUSTUB_PAGE_SIZE]{};
  /** The ID of this page. */
  page_id_t page_id_ = INVALID_PAGE_ID;
  /** The pin count of this page. */
  int pin_count_ = 0;
  /** True if the page is dirty, i.e. it is different from its corresponding page on disk. */
  bool is_dirty_ = false;
  /** Page latch. */
  ReaderWriterLatch rwlatch_;

2 buffer_pool_manager类成员解释:

成员变量:

/** Number of pages in the buffer pool. */
  const size_t pool_size_;
  /** The next page id to be allocated  */
  std::atomic<page_id_t> next_page_id_ = 0;
  /** Bucket size for the extendible hash table */
  const size_t bucket_size_ = 4;

  /** Array of buffer pool pages. */
  Page *pages_;  // 内存数组
  /** Pointer to the disk manager. */
  DiskManager *disk_manager_ __attribute__((__unused__));
  /** Pointer to the log manager. Please ignore this for P1. */
  LogManager *log_manager_ __attribute__((__unused__));
  /** Page table for keeping track of buffer pool pages. */
  ExtendibleHashTable<page_id_t, frame_id_t> *page_table_;  // 快表
  /** Replacer to find unpinned pages for replacement. */
  LRUKReplacer *replacer_; // 跟踪frame
  /** List of free frames that don't have any pages on them. */
  std::list<frame_id_t> free_list_;  // 存储空闲的frame_id
  /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */
  std::mutex latch_;

成员函数:

auto BufferPoolManagerInstance::AllocatePage() -> page_id_t { return next_page_id_++; }
// 在newNewPgImp()中使用, 得到可驱逐的frame_id(即内存数组pages_中的下标)后, 旧页面的东西销毁, 初始化为新的页面
// 通过这个函数获取新页面的page_id

2 上代码

buffer_pool_manager_instance.h

//===----------------------------------------------------------------------===//
//
//                         BusTub
//
// buffer_pool_manager_instance.h
//
// Identification: src/include/buffer/buffer_pool_manager.h
//
// Copyright (c) 2015-2021, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#pragma once

#include <list>
#include <mutex>  // NOLINT
#include <unordered_map>

#include "buffer/buffer_pool_manager.h"
#include "buffer/lru_k_replacer.h"
#include "common/config.h"
#include "container/hash/extendible_hash_table.h"
#include "recovery/log_manager.h"
#include "storage/disk/disk_manager.h"
#include "storage/page/page.h"

namespace bustub {

/**
 * BufferPoolManager reads disk pages to and from its internal buffer pool.
 */
class BufferPoolManagerInstance : public BufferPoolManager {
 public:
  /**
   * @brief Creates a new BufferPoolManagerInstance.
   * @param pool_size the size of the buffer pool
   * @param disk_manager the disk manager
   * @param replacer_k the lookback constant k for the LRU-K replacer
   * @param log_manager the log manager (for testing only: nullptr = disable logging). Please ignore this for P1.
   */
  BufferPoolManagerInstance(size_t pool_size, DiskManager *disk_manager, size_t replacer_k = LRUK_REPLACER_K,
                            LogManager *log_manager = nullptr);

  /**
   * @brief Destroy an existing BufferPoolManagerInstance.
   */
  ~BufferPoolManagerInstance() override;

  /** @brief Return the size (number of frames) of the buffer pool. */
  auto GetPoolSize() -> size_t override { return pool_size_; }

  /** @brief Return the pointer to all the pages in the buffer pool. */
  auto GetPages() -> Page * { return pages_; }

 protected:
  auto GetFrameId(frame_id_t *frame_id) -> bool;
  /**
   * TODO(P1): Add implementation
   *
   * @brief Create a new page in the buffer pool. Set page_id to the new page's id, or nullptr if all frames
   * are currently in use and not evictable (in another word, pinned).
   *
   * You should pick the replacement frame from either the free list or the replacer (always find from the free list
   * first), and then call the AllocatePage() method to get a new page id. If the replacement frame has a dirty page,
   * you should write it back to the disk first. You also need to reset the memory and metadata for the new page.
   *
   * Remember to "Pin" the frame by calling replacer.SetEvictable(frame_id, false)
   * so that the replacer wouldn't evict the frame before the buffer pool manager "Unpin"s it.
   * Also, remember to record the access history of the frame in the replacer for the lru-k algorithm to work.
   *
   * @param[out] page_id id of created page
   * @return nullptr if no new pages could be created, otherwise pointer to new page
   */
  auto NewPgImp(page_id_t *page_id) -> Page * override;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Fetch the requested page from the buffer pool. Return nullptr if page_id needs to be fetched from the disk
   * but all frames are currently in use and not evictable (in another word, pinned).
   *
   * First search for page_id in the buffer pool. If not found, pick a replacement frame from either the free list or
   * the replacer (always find from the free list first), read the page from disk by calling disk_manager_->ReadPage(),
   * and replace the old page in the frame. Similar to NewPgImp(), if the old page is dirty, you need to write it back
   * to disk and update the metadata of the new page
   *
   * In addition, remember to disable eviction and record the access history of the frame like you did for NewPgImp().
   *
   * @param page_id id of page to be fetched
   * @return nullptr if page_id cannot be fetched, otherwise pointer to the requested page
   */
  auto FetchPgImp(page_id_t page_id) -> Page * override;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Unpin the target page from the buffer pool. If page_id is not in the buffer pool or its pin count is already
   * 0, return false.
   *
   * Decrement the pin count of a page. If the pin count reaches 0, the frame should be evictable by the replacer.
   * Also, set the dirty flag on the page to indicate if the page was modified.
   *
   * @param page_id id of page to be unpinned
   * @param is_dirty true if the page should be marked as dirty, false otherwise
   * @return false if the page is not in the page table or its pin count is <= 0 before this call, true otherwise
   */
  auto UnpinPgImp(page_id_t page_id, bool is_dirty) -> bool override;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Flush the target page to disk.
   *
   * Use the DiskManager::WritePage() method to flush a page to disk, REGARDLESS of the dirty flag.
   * Unset the dirty flag of the page after flushing.
   *
   * @param page_id id of page to be flushed, cannot be INVALID_PAGE_ID
   * @return false if the page could not be found in the page table, true otherwise
   */
  auto FlushPgImp(page_id_t page_id) -> bool override;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Flush all the pages in the buffer pool to disk.
   */
  void FlushAllPgsImp() override;

  /**
   * TODO(P1): Add implementation
   *
   * @brief Delete a page from the buffer pool. If page_id is not in the buffer pool, do nothing and return true. If the
   * page is pinned and cannot be deleted, return false immediately.
   *
   * After deleting the page from the page table, stop tracking the frame in the replacer and add the frame
   * back to the free list. Also, reset the page's memory and metadata. Finally, you should call DeallocatePage() to
   * imitate freeing the page on the disk.
   *
   * @param page_id id of page to be deleted
   * @return false if the page exists but could not be deleted, true if the page didn't exist or deletion succeeded
   */
  auto DeletePgImp(page_id_t page_id) -> bool override;

  /** Number of pages in the buffer pool. */
  const size_t pool_size_;
  /** The next page id to be allocated  */
  std::atomic<page_id_t> next_page_id_ = 0;
  /** Bucket size for the extendible hash table */
  const size_t bucket_size_ = 4;

  /** Array of buffer pool pages. */
  Page *pages_;
  /** Pointer to the disk manager. */
  DiskManager *disk_manager_ __attribute__((__unused__));
  /** Pointer to the log manager. Please ignore this for P1. */
  LogManager *log_manager_ __attribute__((__unused__));
  /** Page table for keeping track of buffer pool pages. */
  ExtendibleHashTable<page_id_t, frame_id_t> *page_table_;
  /** Replacer to find unpinned pages for replacement. */
  LRUKReplacer *replacer_;
  /** List of free frames that don't have any pages on them. */
  std::list<frame_id_t> free_list_;
  /** This latch protects shared data structures. We recommend updating this comment to describe what it protects. */
  std::mutex latch_;

  /**
   * @brief Allocate a page on disk. Caller should acquire the latch before calling this function.
   * @return the id of the allocated page
   */
  auto AllocatePage() -> page_id_t;

  /**
   * @brief Deallocate a page on disk. Caller should acquire the latch before calling this function.
   * @param page_id id of the page to deallocate
   */
  void DeallocatePage(__attribute__((unused)) page_id_t page_id) {
    // This is a no-nop right now without a more complex data structure to track deallocated pages
  }

  // TODO(student): You may add additional private members and helper functions
};
}  // namespace bustub

buffer_pool_manager_instance.cpp
实现代码删了,不展示了

3 测试

测试用例如下:

//===----------------------------------------------------------------------===//
//
//                         BusTub
//
// buffer_pool_manager_instance_test.cpp
//
// Identification: test/buffer/buffer_pool_manager_instance_test.cpp
//
// Copyright (c) 2015-2019, Carnegie Mellon University Database Group
//
//===----------------------------------------------------------------------===//

#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <memory>
#include <random>
#include <string>
#include <thread>  // NOLINT
#include <vector>

#include "buffer/buffer_pool_manager_instance.h"
#include "mock_buffer_pool_manager.h"  // NOLINT

namespace bustub {

#define BufferPoolManager MockBufferPoolManager

// NOLINTNEXTLINE
TEST(BufferPoolManagerInstanceTest, SampleTest) {
  const std::string db_name = "test.db";
  const size_t buffer_pool_size = 10;
  const size_t k = 5;

  auto *disk_manager = new DiskManager(db_name);
  auto *bpm = new BufferPoolManagerInstance(buffer_pool_size, disk_manager, k);

  page_id_t page_id_temp;
  auto *page0 = bpm->NewPage(&page_id_temp);

  // Scenario: The buffer pool is empty. We should be able to create a new page.
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(0, page_id_temp);

  // Scenario: Once we have a page, we should be able to read and write content.
  snprintf(page0->GetData(), sizeof(page0->GetData()), "Hello");
  ASSERT_EQ(0, strcmp(page0->GetData(), "Hello"));

  // Scenario: We should be able to create new pages until we fill up the buffer pool.
  for (size_t i = 1; i < buffer_pool_size; ++i) {
    ASSERT_NE(nullptr, bpm->NewPage(&page_id_temp));
  }

  // Scenario: Once the buffer pool is full, we should not be able to create any new pages.
  for (size_t i = buffer_pool_size; i < buffer_pool_size * 2; ++i) {
    ASSERT_EQ(nullptr, bpm->NewPage(&page_id_temp));
  }

  // Scenario: After unpinning pages {0, 1, 2, 3, 4} and pinning another 4 new pages,
  // there would still be one cache frame left for reading page 0.
  for (int i = 0; i < 5; ++i) {
    ASSERT_EQ(true, bpm->UnpinPage(i, true));
  }
  for (int i = 0; i < 4; ++i) {
    ASSERT_NE(nullptr, bpm->NewPage(&page_id_temp));
  }

  // Scenario: We should be able to fetch the data we wrote a while ago.
  page0 = bpm->FetchPage(0);
  ASSERT_EQ(0, strcmp(page0->GetData(), "Hello"));
  ASSERT_EQ(true, bpm->UnpinPage(0, true));
  // NewPage again, and now all buffers are pinned. Page 0 would be failed to fetch.
  ASSERT_NE(nullptr, bpm->NewPage(&page_id_temp));
  ASSERT_EQ(nullptr, bpm->FetchPage(0));

  // Shutdown the disk manager and remove the temporary file we created.
  disk_manager->ShutDown();
  remove("test.db");

  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, BinaryDataTest) {  // NOLINT
  const std::string db_name = "test.db";
  const size_t buffer_pool_size = 10;
  const size_t k = 5;

  auto *disk_manager = new DiskManager(db_name);
  auto *bpm = new BufferPoolManagerInstance(buffer_pool_size, disk_manager, k);

  page_id_t page_id_temp;
  auto *page0 = bpm->NewPage(&page_id_temp);

  // Scenario: The buffer pool is empty. We should be able to create a new page.
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(0, page_id_temp);
  int PAGE_SIZE = 4096;
  char random_binary_data[PAGE_SIZE];
  unsigned int seed = 15645;
  for (char &i : random_binary_data) {
    i = static_cast<char>(rand_r(&seed) % 256);
  }

  random_binary_data[PAGE_SIZE / 2] = '\0';
  random_binary_data[PAGE_SIZE - 1] = '\0';

  // Scenario: Once we have a page, we should be able to read and write content.
  std::strncpy(page0->GetData(), random_binary_data, PAGE_SIZE);
  ASSERT_EQ(0, std::strcmp(page0->GetData(), random_binary_data));

  // Scenario: We should be able to create new pages until we fill up the buffer pool.
  for (size_t i = 1; i < buffer_pool_size; ++i) {
    ASSERT_NE(nullptr, bpm->NewPage(&page_id_temp));
  }

  // Scenario: Once the buffer pool is full, we should not be able to create any new pages.
  for (size_t i = buffer_pool_size; i < buffer_pool_size * 2; ++i) {
    ASSERT_EQ(nullptr, bpm->NewPage(&page_id_temp));
  }

  // Scenario: After unpinning pages {0, 1, 2, 3, 4} and pinning another 4 new pages,
  // there would still be one cache frame left for reading page 0.
  for (int i = 0; i < 5; ++i) {
    ASSERT_EQ(true, bpm->UnpinPage(i, true));
    bpm->FlushPage(i);
  }
  for (int i = 0; i < 5; ++i) {
    ASSERT_NE(nullptr, bpm->NewPage(&page_id_temp));
    bpm->UnpinPage(page_id_temp, false);
  }
  // Scenario: We should be able to fetch the data we wrote a while ago.
  page0 = bpm->FetchPage(0);
  ASSERT_EQ(0, strcmp(page0->GetData(), random_binary_data));
  ASSERT_EQ(true, bpm->UnpinPage(0, true));

  // Shutdown the disk manager and remove the temporary file we created.
  disk_manager->ShutDown();
  remove("test.db");

  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, NewPage) {  // NOLINT
  page_id_t temp_page_id;
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(10, disk_manager, 5);

  std::vector<page_id_t> page_ids;

  for (int i = 0; i < 10; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    strcpy(new_page->GetData(), std::to_string(i).c_str());  // NOLINT
    page_ids.push_back(temp_page_id);
  }

  // all the pages are pinned, the buffer pool is full
  for (int i = 0; i < 100; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_EQ(nullptr, new_page);
  }

  // upin the first five pages, add them to LRU list, set as dirty
  for (int i = 0; i < 5; ++i) {
    ASSERT_EQ(true, bpm->UnpinPage(page_ids[i], true));
  }

  // we have 5 empty slots in LRU list, evict page zero out of buffer pool
  for (int i = 0; i < 5; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    page_ids[i] = temp_page_id;
  }

  // all the pages are pinned, the buffer pool is full
  for (int i = 0; i < 100; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_EQ(nullptr, new_page);
  }

  // upin the first five pages, add them to LRU list
  for (int i = 0; i < 5; ++i) {
    ASSERT_EQ(true, bpm->UnpinPage(page_ids[i], false));
  }

  // we have 5 empty slots in LRU list, evict page zero out of buffer pool
  for (int i = 0; i < 5; ++i) {
    ASSERT_NE(nullptr, bpm->NewPage(&temp_page_id));
  }

  // all the pages are pinned, the buffer pool is full
  for (int i = 0; i < 100; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_EQ(nullptr, new_page);
  }

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, UnpinPage) {  // NOLINT
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(2, disk_manager, 5);

  page_id_t pageid0;
  auto *page0 = bpm->NewPage(&pageid0);
  ASSERT_NE(nullptr, page0);
  strcpy(page0->GetData(), "page0");  // NOLINT

  page_id_t pageid1;
  auto *page1 = bpm->NewPage(&pageid1);
  ASSERT_NE(nullptr, page1);
  strcpy(page1->GetData(), "page1");  // NOLINT

  ASSERT_EQ(1, bpm->UnpinPage(pageid0, true));
  ASSERT_EQ(1, bpm->UnpinPage(pageid1, true));

  for (int i = 0; i < 2; i++) {
    page_id_t temp_page_id;
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    bpm->UnpinPage(temp_page_id, true);
  }

  auto *page = bpm->FetchPage(pageid0);
  ASSERT_EQ(0, strcmp(page->GetData(), "page0"));
  strcpy(page->GetData(), "page0updated");  // NOLINT

  page = bpm->FetchPage(pageid1);
  ASSERT_EQ(0, strcmp(page->GetData(), "page1"));
  strcpy(page->GetData(), "page1updated");  // NOLINT

  ASSERT_EQ(1, bpm->UnpinPage(pageid0, false));
  ASSERT_EQ(1, bpm->UnpinPage(pageid1, true));

  for (int i = 0; i < 2; i++) {
    page_id_t temp_page_id;
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    bpm->UnpinPage(temp_page_id, true);
  }

  page = bpm->FetchPage(pageid0);
  ASSERT_EQ(0, strcmp(page->GetData(), "page0"));
  strcpy(page->GetData(), "page0updated");  // NOLINT

  page = bpm->FetchPage(pageid1);
  ASSERT_EQ(0, strcmp(page->GetData(), "page1updated"));
  strcpy(page->GetData(), "page1againupdated");  // NOLINT

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, FetchPage) {  // NOLINT
  page_id_t temp_page_id;
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(10, disk_manager, 5);

  std::vector<Page *> pages;
  std::vector<page_id_t> page_ids;
  std::vector<std::string> content;

  for (int i = 0; i < 10; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    strcpy(new_page->GetData(), std::to_string(i).c_str());  // NOLINT
    pages.push_back(new_page);
    page_ids.push_back(temp_page_id);
    content.push_back(std::to_string(i));
  }

  for (int i = 0; i < 10; ++i) {
    auto *page = bpm->FetchPage(page_ids[i]);
    ASSERT_NE(nullptr, page);
    ASSERT_EQ(pages[i], page);
    ASSERT_EQ(0, std::strcmp(std::to_string(i).c_str(), (page->GetData())));
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
    bpm->FlushPage(page_ids[i]);
  }

  for (int i = 0; i < 10; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    bpm->UnpinPage(temp_page_id, true);
  }

  for (int i = 0; i < 10; ++i) {
    auto *page = bpm->FetchPage(page_ids[i]);
    ASSERT_NE(nullptr, page);
  }

  ASSERT_EQ(1, bpm->UnpinPage(page_ids[4], true));
  auto *new_page = bpm->NewPage(&temp_page_id);
  ASSERT_NE(nullptr, new_page);
  ASSERT_EQ(nullptr, bpm->FetchPage(page_ids[4]));

  // Check Clock
  auto *page5 = bpm->FetchPage(page_ids[5]);
  auto *page6 = bpm->FetchPage(page_ids[6]);
  auto *page7 = bpm->FetchPage(page_ids[7]);
  ASSERT_NE(nullptr, page5);
  ASSERT_NE(nullptr, page6);
  ASSERT_NE(nullptr, page7);
  strcpy(page5->GetData(), "updatedpage5");  // NOLINT
  strcpy(page6->GetData(), "updatedpage6");  // NOLINT
  strcpy(page7->GetData(), "updatedpage7");  // NOLINT
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[5], false));
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[6], false));
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[7], false));

  ASSERT_EQ(1, bpm->UnpinPage(page_ids[5], false));
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[6], false));
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[7], false));

  // page5 would be evicted.
  new_page = bpm->NewPage(&temp_page_id);
  ASSERT_NE(nullptr, new_page);
  // page6 would be evicted.
  page5 = bpm->FetchPage(page_ids[5]);
  ASSERT_NE(nullptr, page5);
  ASSERT_EQ(0, std::strcmp("5", (page5->GetData())));
  page7 = bpm->FetchPage(page_ids[7]);
  ASSERT_NE(nullptr, page7);
  ASSERT_EQ(0, std::strcmp("updatedpage7", (page7->GetData())));
  // All pages pinned
  ASSERT_EQ(nullptr, bpm->FetchPage(page_ids[6]));
  bpm->UnpinPage(temp_page_id, false);
  page6 = bpm->FetchPage(page_ids[6]);
  ASSERT_NE(nullptr, page6);
  ASSERT_EQ(0, std::strcmp("6", page6->GetData()));

  strcpy(page6->GetData(), "updatedpage6");  // NOLINT

  // Remove from LRU and update pin_count on fetch
  new_page = bpm->NewPage(&temp_page_id);
  ASSERT_EQ(nullptr, new_page);

  ASSERT_EQ(1, bpm->UnpinPage(page_ids[7], false));
  ASSERT_EQ(1, bpm->UnpinPage(page_ids[6], false));

  new_page = bpm->NewPage(&temp_page_id);
  ASSERT_NE(nullptr, new_page);
  page6 = bpm->FetchPage(page_ids[6]);
  ASSERT_NE(nullptr, page6);
  ASSERT_EQ(0, std::strcmp("updatedpage6", page6->GetData()));
  page7 = bpm->FetchPage(page_ids[7]);
  ASSERT_EQ(nullptr, page7);
  bpm->UnpinPage(temp_page_id, false);
  page7 = bpm->FetchPage(page_ids[7]);
  ASSERT_NE(nullptr, page7);
  ASSERT_EQ(0, std::strcmp("7", (page7->GetData())));

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, DeletePage) {  // NOLINT
  page_id_t temp_page_id;
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(10, disk_manager, 5);

  std::vector<Page *> pages;
  std::vector<page_id_t> page_ids;
  std::vector<std::string> content;

  for (int i = 0; i < 10; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    strcpy(new_page->GetData(), std::to_string(i).c_str());  // NOLINT
    pages.push_back(new_page);
    page_ids.push_back(temp_page_id);
    content.push_back(std::to_string(i));
  }

  for (int i = 0; i < 10; ++i) {
    auto *page = bpm->FetchPage(page_ids[i]);
    ASSERT_NE(nullptr, page);
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
  }

  for (int i = 0; i < 10; ++i) {
    auto *new_page = bpm->NewPage(&temp_page_id);
    ASSERT_NE(nullptr, new_page);
    bpm->UnpinPage(temp_page_id, true);
  }

  for (int i = 0; i < 10; ++i) {
    auto *page = bpm->FetchPage(page_ids[i]);
    ASSERT_NE(nullptr, page);
  }

  auto *new_page = bpm->NewPage(&temp_page_id);
  ASSERT_EQ(nullptr, new_page);

  ASSERT_EQ(0, bpm->DeletePage(page_ids[4]));
  bpm->UnpinPage(4, false);
  ASSERT_EQ(1, bpm->DeletePage(page_ids[4]));

  new_page = bpm->NewPage(&temp_page_id);
  ASSERT_NE(nullptr, new_page);
  ASSERT_NE(nullptr, new_page);

  auto *page5 = bpm->FetchPage(page_ids[5]);
  ASSERT_NE(nullptr, page5);
  auto *page6 = bpm->FetchPage(page_ids[6]);
  ASSERT_NE(nullptr, page6);
  auto *page7 = bpm->FetchPage(page_ids[7]);
  ASSERT_NE(nullptr, page7);
  strcpy(page5->GetData(), "updatedpage5");  // NOLINT
  strcpy(page6->GetData(), "updatedpage6");  // NOLINT
  strcpy(page7->GetData(), "updatedpage7");  // NOLINT
  bpm->UnpinPage(5, false);
  bpm->UnpinPage(6, false);
  bpm->UnpinPage(7, false);

  bpm->UnpinPage(5, false);
  bpm->UnpinPage(6, false);
  bpm->UnpinPage(7, false);
  ASSERT_EQ(1, bpm->DeletePage(page_ids[7]));

  bpm->NewPage(&temp_page_id);
  page5 = bpm->FetchPage(page_ids[5]);
  page6 = bpm->FetchPage(page_ids[6]);
  ASSERT_NE(nullptr, page5);
  ASSERT_NE(nullptr, page6);
  ASSERT_EQ(0, std::strcmp(page5->GetData(), "updatedpage5"));
  ASSERT_EQ(0, std::strcmp(page6->GetData(), "updatedpage6"));

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, IsDirty) {  // NOLINT
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(1, disk_manager, 5);

  // Make new page and write to it
  page_id_t pageid0;
  auto *page0 = bpm->NewPage(&pageid0);
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(0, page0->IsDirty());
  strcpy(page0->GetData(), "page0");  // NOLINT
  ASSERT_EQ(1, bpm->UnpinPage(pageid0, true));

  // Fetch again but don't write. Assert it is still marked as dirty
  page0 = bpm->FetchPage(pageid0);
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(1, page0->IsDirty());
  ASSERT_EQ(1, bpm->UnpinPage(pageid0, false));

  // Fetch and assert it is still dirty
  page0 = bpm->FetchPage(pageid0);
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(1, page0->IsDirty());
  ASSERT_EQ(1, bpm->UnpinPage(pageid0, false));

  // Create a new page, assert it's not dirty
  page_id_t pageid1;
  auto *page1 = bpm->NewPage(&pageid1);
  ASSERT_NE(nullptr, page1);
  ASSERT_EQ(0, page1->IsDirty());

  // Write to the page, and then delete it
  strcpy(page1->GetData(), "page1");  // NOLINT
  ASSERT_EQ(1, bpm->UnpinPage(pageid1, true));
  ASSERT_EQ(1, page1->IsDirty());
  ASSERT_EQ(1, bpm->DeletePage(pageid1));

  // Fetch page 0 again, and confirm its not dirty
  page0 = bpm->FetchPage(pageid0);
  ASSERT_NE(nullptr, page0);
  ASSERT_EQ(0, page0->IsDirty());

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, ConcurrencyTest) {  // NOLINT
  const int num_threads = 5;
  const int num_runs = 50;
  for (int run = 0; run < num_runs; run++) {
    auto *disk_manager = new DiskManager("test.db");
    std::shared_ptr<BufferPoolManagerInstance> bpm{new BufferPoolManagerInstance(50, disk_manager)};
    std::vector<std::thread> threads;

    for (int tid = 0; tid < num_threads; tid++) {
      threads.push_back(std::thread([&bpm]() {  // NOLINT
        page_id_t temp_page_id;
        std::vector<page_id_t> page_ids;
        for (int i = 0; i < 10; i++) {
          auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
          ASSERT_NE(nullptr, new_page);
          strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
          page_ids.push_back(temp_page_id);
        }
        for (int i = 0; i < 10; i++) {
          ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
        }
        for (int j = 0; j < 10; j++) {
          auto *page = bpm->FetchPage(page_ids[j], nullptr);
          ASSERT_NE(nullptr, page);
          ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
          ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], true, nullptr));
        }
        for (int j = 0; j < 10; j++) {
          ASSERT_EQ(1, bpm->DeletePage(page_ids[j], nullptr));
        }
      }));
    }

    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    remove("test.db");
    remove("test.log");
    delete disk_manager;
  }
}

TEST(BufferPoolManagerInstanceTest, IntegratedTest) {  // NOLINT
  page_id_t temp_page_id;
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(10, disk_manager, 5);

  std::vector<page_id_t> page_ids;
  for (int j = 0; j < 1000; j++) {
    for (int i = 0; i < 10; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id);
      ASSERT_NE(nullptr, new_page);
      strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
      page_ids.push_back(temp_page_id);
    }
    for (unsigned int i = page_ids.size() - 10; i < page_ids.size(); i++) {
      ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
    }
  }

  for (int j = 0; j < 10000; j++) {
    auto *page = bpm->FetchPage(page_ids[j]);
    ASSERT_NE(nullptr, page);
    ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], true));
    page_ids.push_back(temp_page_id);
  }
  for (int j = 0; j < 10000; j++) {
    ASSERT_EQ(1, bpm->DeletePage(page_ids[j]));
  }
  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, HardTest_1) {  // NOLINT
  page_id_t temp_page_id;
  auto *disk_manager = new DiskManager("test.db");
  auto *bpm = new BufferPoolManagerInstance(10, disk_manager, 5);

  std::vector<page_id_t> page_ids;
  for (int j = 0; j < 1000; j++) {
    for (int i = 0; i < 10; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id);
      ASSERT_NE(nullptr, new_page);
      strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
      page_ids.push_back(temp_page_id);
    }
    for (unsigned int i = page_ids.size() - 10; i < page_ids.size() - 5; i++) {
      ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false));
    }
    for (unsigned int i = page_ids.size() - 5; i < page_ids.size(); i++) {
      ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true));
    }
  }

  for (int j = 0; j < 10000; j++) {
    auto *page = bpm->FetchPage(page_ids[j]);
    ASSERT_NE(nullptr, page);
    if (j % 10 < 5) {
      ASSERT_NE(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
    } else {
      ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
    }
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], true));
  }

  auto rng = std::default_random_engine{};
  std::shuffle(page_ids.begin(), page_ids.end(), rng);

  for (int j = 0; j < 5000; j++) {
    auto *page = bpm->FetchPage(page_ids[j]);
    ASSERT_NE(nullptr, page);
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false));
    ASSERT_EQ(1, bpm->DeletePage(page_ids[j]));
  }

  for (int j = 5000; j < 10000; j++) {
    auto *page = bpm->FetchPage(page_ids[j]);
    ASSERT_NE(nullptr, page);
    if (page_ids[j] % 10 < 5) {
      ASSERT_NE(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
    } else {
      ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
    }
    ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false));
    ASSERT_EQ(1, bpm->DeletePage(page_ids[j]));
  }

  remove("test.db");
  remove("test.log");
  delete bpm;
  delete disk_manager;
}

TEST(BufferPoolManagerInstanceTest, HardTest_2) {  // NOLINT
  const int num_threads = 5;
  const int num_runs = 50;
  for (int run = 0; run < num_runs; run++) {
    auto *disk_manager = new DiskManager("test.db");
    std::shared_ptr<BufferPoolManagerInstance> bpm{new BufferPoolManagerInstance(50, disk_manager)};
    std::vector<std::thread> threads;

    page_id_t temp_page_id;
    std::vector<page_id_t> page_ids;
    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
      page_ids.push_back(temp_page_id);
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int j = 0; j < 50; j++) {
      auto *page = bpm->FetchPage(page_ids[j], nullptr);
      ASSERT_NE(nullptr, page);
      strcpy(page->GetData(), (std::string("Hard") + std::to_string(page_ids[j])).c_str());  // NOLINT
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int tid = 0; tid < num_threads; tid++) {
      threads.push_back(std::thread([&bpm, tid, page_ids]() {  // NOLINT
        int j = (tid * 10);
        while (j < 50) {
          auto *page = bpm->FetchPage(page_ids[j], nullptr);
          while (page == nullptr) {
            page = bpm->FetchPage(page_ids[j], nullptr);
          }
          ASSERT_NE(nullptr, page);
          if (j % 2 == 0) {
            ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          } else {
            ASSERT_EQ(0, std::strcmp((std::string("Hard") + std::to_string(page_ids[j])).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          }
          j = (j + 1);
        }
      }));
    }

    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    for (int j = 0; j < 50; j++) {
      ASSERT_EQ(1, bpm->DeletePage(page_ids[j], nullptr));
    }

    remove("test.db");
    remove("test.log");
    delete disk_manager;
  }
}

TEST(BufferPoolManagerInstanceTest, HardTest_3) {  // NOLINT
  const int num_threads = 5;
  const int num_runs = 50;
  for (int run = 0; run < num_runs; run++) {
    auto *disk_manager = new DiskManager("test.db");
    std::shared_ptr<BufferPoolManagerInstance> bpm{new BufferPoolManagerInstance(50, disk_manager)};
    std::vector<std::thread> threads;

    page_id_t temp_page_id;
    std::vector<page_id_t> page_ids;
    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
      page_ids.push_back(temp_page_id);
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int j = 0; j < 50; j++) {
      auto *page = bpm->FetchPage(page_ids[j], nullptr);
      ASSERT_NE(nullptr, page);
      strcpy(page->GetData(), (std::string("Hard") + std::to_string(page_ids[j])).c_str());  // NOLINT
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int tid = 0; tid < num_threads; tid++) {
      threads.push_back(std::thread([&bpm, tid, page_ids]() {  // NOLINT
        page_id_t temp_page_id;
        int j = (tid * 10);
        while (j < 50) {
          if (j != tid * 10) {
            auto *page_local = bpm->FetchPage(temp_page_id, nullptr);
            while (page_local == nullptr) {
              page_local = bpm->FetchPage(temp_page_id, nullptr);
            }
            ASSERT_NE(nullptr, page_local);
            ASSERT_EQ(0, std::strcmp(std::to_string(temp_page_id).c_str(), (page_local->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, false, nullptr));
            // If the page is still in buffer pool then put it in free list,
            // else also we are happy
            ASSERT_EQ(1, bpm->DeletePage(temp_page_id, nullptr));
          }

          auto *page = bpm->FetchPage(page_ids[j], nullptr);
          while (page == nullptr) {
            page = bpm->FetchPage(page_ids[j], nullptr);
          }
          ASSERT_NE(nullptr, page);
          if (j % 2 == 0) {
            ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          } else {
            ASSERT_EQ(0, std::strcmp((std::string("Hard") + std::to_string(page_ids[j])).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          }
          j = (j + 1);

          page = bpm->NewPage(&temp_page_id, nullptr);
          while (page == nullptr) {
            page = bpm->NewPage(&temp_page_id, nullptr);
          }
          ASSERT_NE(nullptr, page);
          strcpy(page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
          ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
        }
      }));
    }

    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    for (int j = 0; j < 50; j++) {
      ASSERT_EQ(1, bpm->DeletePage(page_ids[j], nullptr));
    }

    remove("test.db");
    remove("test.log");
    delete disk_manager;
  }
}

TEST(BufferPoolManagerInstanceTest, HardTest_4) {  // NOLINT
  const int num_threads = 5;
  const int num_runs = 50;
  for (int run = 0; run < num_runs; run++) {
    auto *disk_manager = new DiskManager("test.db");
    std::shared_ptr<BufferPoolManagerInstance> bpm{new BufferPoolManagerInstance(50, disk_manager)};
    std::vector<std::thread> threads;

    page_id_t temp_page_id;
    std::vector<page_id_t> page_ids;
    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_NE(nullptr, new_page);
      strcpy(new_page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
      page_ids.push_back(temp_page_id);
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int j = 0; j < 50; j++) {
      auto *page = bpm->FetchPage(page_ids[j], nullptr);
      ASSERT_NE(nullptr, page);
      strcpy(page->GetData(), (std::string("Hard") + std::to_string(page_ids[j])).c_str());  // NOLINT
    }

    for (int i = 0; i < 50; i++) {
      if (i % 2 == 0) {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], false, nullptr));
      } else {
        ASSERT_EQ(1, bpm->UnpinPage(page_ids[i], true, nullptr));
      }
    }

    for (int i = 0; i < 50; i++) {
      auto *new_page = bpm->NewPage(&temp_page_id, nullptr);
      ASSERT_NE(nullptr, new_page);
      ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, true, nullptr));
    }

    for (int tid = 0; tid < num_threads; tid++) {
      threads.push_back(std::thread([&bpm, tid, page_ids]() {  // NOLINT
        page_id_t temp_page_id;
        int j = (tid * 10);
        while (j < 50) {
          if (j != tid * 10) {
            auto *page_local = bpm->FetchPage(temp_page_id, nullptr);
            while (page_local == nullptr) {
              page_local = bpm->FetchPage(temp_page_id, nullptr);
            }
            ASSERT_NE(nullptr, page_local);
            ASSERT_EQ(0, std::strcmp(std::to_string(temp_page_id).c_str(), (page_local->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, false, nullptr));
            // If the page is still in buffer pool then put it in free list,
            // else also we are happy
            ASSERT_EQ(1, bpm->DeletePage(temp_page_id, nullptr));
          }

          auto *page = bpm->FetchPage(page_ids[j], nullptr);
          while (page == nullptr) {
            page = bpm->FetchPage(page_ids[j], nullptr);
          }
          ASSERT_NE(nullptr, page);
          if (j % 2 == 0) {
            ASSERT_EQ(0, std::strcmp(std::to_string(page_ids[j]).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          } else {
            ASSERT_EQ(0, std::strcmp((std::string("Hard") + std::to_string(page_ids[j])).c_str(), (page->GetData())));
            ASSERT_EQ(1, bpm->UnpinPage(page_ids[j], false, nullptr));
          }
          j = (j + 1);

          page = bpm->NewPage(&temp_page_id, nullptr);
          while (page == nullptr) {
            page = bpm->NewPage(&temp_page_id, nullptr);
          }
          ASSERT_NE(nullptr, page);
          strcpy(page->GetData(), std::to_string(temp_page_id).c_str());  // NOLINT
          // FLush page instead of unpining with true
          ASSERT_EQ(1, bpm->FlushPage(temp_page_id, nullptr));
          ASSERT_EQ(1, bpm->UnpinPage(temp_page_id, false, nullptr));

          // Flood with new pages
          for (int k = 0; k < 10; k++) {
            page_id_t flood_page_id;
            auto *flood_page = bpm->NewPage(&flood_page_id, nullptr);
            while (flood_page == nullptr) {
              flood_page = bpm->NewPage(&flood_page_id, nullptr);
            }
            ASSERT_NE(nullptr, flood_page);
            ASSERT_EQ(1, bpm->UnpinPage(flood_page_id, false, nullptr));
            // If the page is still in buffer pool then put it in free list,
            // else also we are happy
            ASSERT_EQ(1, bpm->DeletePage(flood_page_id, nullptr));
          }
        }
      }));
    }

    for (int i = 0; i < num_threads; i++) {
      threads[i].join();
    }

    for (int j = 0; j < 50; j++) {
      ASSERT_EQ(1, bpm->DeletePage(page_ids[j], nullptr));
    }

    remove("test.db");
    remove("test.log");
    delete disk_manager;
  }
}

}  // namespace bustub

结果展示:

posted @ 2025-03-04 15:06  msnthh  阅读(20)  评论(0)    收藏  举报