🏆 18.483毫秒找到最优解的华容道程序,用了trie和cwisstable

const char* NM[][4] = { {"","","",""}, {"西",""}, {"",""}, {"",""}, {"",""}, {"",""}, {""}, {""}, {""}, {""} };
int W[] = { 2, 2, 2, 2, 2, 2, 1, 1, 1, 1 }; // 默认5个水平条,随后修改
int H[] = { 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
int T[10]; // Type
const int D[][2] = { {0, 1}, {0, -1}, {-1, 0}, {1, 0} }; // 后面有两处用下标判断移动方向

enum { MAX = 3600 * 10000 };
struct State {
  uint8_t aa[10][2];
#define CCY aa[0][0] // 曹操的y
  int p; // 局面路径的previous
#define cpy20(dst, src) _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((__m128i*)src)); *(int*)((uint8_t*)dst+16) = *(int*)((const uint8_t*)src+16)
  void operator= (const State& s) { cpy20(aa, s.aa); p = s.p; }
  void operator=(const char* s);
  void print(const char* s = "");
} q[MAX + 10 * 4]; // 最多40个move. MAX很大; 没判断qt < MAX
int qh, qt = 1; // queue head, tail

void State::operator= (const char* s) {
  int p = 6;
  for (int x = 3; x >= 0; x--)
    for (int y = 4; y >= 0; y--) {
    #define CASE(c, i) case c: aa[i][1] = x; aa[i][0] = y; break;
    switch (s[x * 5 + y]) {
    // 曹关张黄子龙(l)马
    CASE('c', 0) CASE('g', 1) CASE('z', 2) CASE('h', 3) CASE('l', 4) CASE('m', 5)
    case 'p': aa[p][1] = x; aa[p++][0] = y; break; // pawn
    }
  }
  static const char*  S[] = { "gg", "zz", "hh", "ll", "mm" };
  for (int i = 0; i < 5; i++) if (strstr(s, S[i])) W[i+1] = 1, H[i+1] = 2;
  for (int i = 0; i < 10; i++) T[i] = W[i] * 2 + H[i] - 2;
}

#define set2DArrayByCoordInA(a, b, what) for (int i = 0; i < 10; i++) { \
  const int x = a[i][1], y = a[i][0]; \
  for (int yy = y; yy < y + H[i]; yy++) \
    for (int xx = x; xx < x + W[i]; xx++) b[yy][xx] = what; \
}

void State::print (const char* s) {
  int idx[10] = {};
  const char* b[5][4] = {};
  set2DArrayByCoordInA(aa, b, NM[i][idx[i]++])
  for (int y = 0; y < 5; y++) {
    for (int x = 0; x < 4; x++) printf("%s", b[y][x] ? : " ");
    puts("");
  }
  printf("%s\n", s);
}

void print_path () { // 数组存的单链表就地翻转
  int prev = -1, next = -1, n = 0;
  for (int cur = qh; cur != -1; ++n) {
    next = q[cur].p; q[cur].p = prev;
    prev = cur; cur = next;
  }
  for (int p = 0; p != -1; p = q[p].p) q[p].print();
  printf("%d\n", n);
}

#pragma pack(1)
struct {
  uint8_t b[5][4];
  uint64_t n; // unique number
  uint8_t _[4];
} u __attribute__((aligned(32)));
#pragma pack()

#define _Ah { \
  _mm256_store_si256((__m256i*)&u, _mm256_setzero_si256()); \
  set2DArrayByCoordInA(a, u.b, T[i]) \
}

#define Ah { \
  _Ah \
  for (int y = 0; y < 5; y++) \
    for (int x = 0; x < 4; x++) u.n = u.n * 5 + u.b[y][x]; \
}

#ifdef trie
#pragma message "trie"
TrieNode* tr = TrieNode::alloc();
#define if_ok_add_state if (ok) {  \
  _Ah if (!tr->find_insert(u.b)) { cpy20(q[qt].aa, a); q[qt++].p = qh; } \
}
#elif defined(st)
#pragma message "cwisstable"
CWISS_DECLARE_FLAT_HASHSET(Set, uint64_t); Set set = Set_new(MAX);
#define if_ok_add_state if (ok) { \
  Ah if (!Set_contains(&set, &u.n)) { Set_insert(&set, &u.n); cpy20(q[qt].aa, a); q[qt++].p = qh; } \
}
#else
#pragma message "std"
std::set<uint64_t> set;
#define if_ok_add_state if (ok) { \
  Ah if (set.find(u.n) == set.end()) { set.insert(u.n); cpy20(q[qt].aa, a); q[qt++].p = qh; } \
}
#endif

int main (int argc, char* argv[]) {
  q[0] = (argc == 2) ? argv[1] : "pzzpg""cc  g""ccllm""phhpm"; q[0].p = -1;
  uint8_t a[10][2] __attribute__((aligned(32))); cpy20(a, q[0].aa);
#ifdef trie
  _Ah tr->find_insert(u.b);
#elif defined(st)
  Ah Set_insert(&set, &u.n);
#else
  Ah set.insert(u.n);
#endif

  double tm = clock();
  for (; qh < qt; qh++) {
    if (q[qh].CCY == 3) { print_path(); break; }

    cpy20(a, q[qh].aa);
    uint8_t b[5][4] __attribute__((aligned(32))), overflow[12];
    _mm256_store_si256((__m256i*)b, _mm256_set1_epi8(1));
    set2DArrayByCoordInA(a, b, 0)

    int qt1 = qt;
    for (int j = 0; j < 4; j++) {
      int ok;
      for (int i = 0; i < 6; i++) {
        const int ox = a[i][1], oy = a[i][0];
        int x = (a[i][1] += D[j][0]), y = (a[i][0] += D[j][1]);
        if (x < 0 || x + W[i] > 4 || y < 0 || y + H[i] > 5) ok = 0;
        else { // D[]的顺序不能换!
          if (j == 0) { y = oy + H[i]; ok = b[y][x] && (W[i] == 1 || b[y][x + 1]); }
          else if (j == 1) ok = b[y][x] & (!(W[i] - 1) | b[y][x + 1]); // W[i]1或2; 不更快
          else if (j == 2) ok = b[y][x] & (!(H[i] - 1) | b[y + 1][x]);
          else { x = ox + W[i]; ok = b[y][x] && (H[i] == 1 || b[y + 1][x]); }
        }
        if_ok_add_state
        a[i][1] -= D[j][0]; a[i][0] -= D[j][1];
      }
      for (int i = 6; i < 10; i++) {
        const int ox = a[i][1], oy = a[i][0];
        int x = (a[i][1] += D[j][0]), y = (a[i][0] += D[j][1]);
        if (x < 0 || x + W[i] > 4 || y < 0 || y + H[i] > 5) ok = 0;
        else {
          if (j == 0) y = oy + H[i];
          else if (j == 3) x = ox + W[i];
          ok = b[y][x];
        }
        if_ok_add_state
        a[i][1] -= D[j][0]; a[i][0] -= D[j][1];
      }
    }
#if 1
    int qt2 = qt - 1;
    if (qt2 > qt1 && q[qt2].CCY > q[qt1].CCY) { State t = q[qt1]; q[qt1] = q[qt2]; q[qt2] = t; }
#endif
  }

  printf("%.6f %d\n", (clock() - tm) / CLOCKS_PER_SEC, qt);
  return 0;
}

原本是(x,y),曹操的y是a[0][1],这不得每次"+1"? 改成a[0][0]了,D不改也能用。State::aa改成a的话,容易犯迷糊。

在Intel N100上跑的。注释掉找到局面则退出的代码,0.070324秒搜索了81,340个局面,判断出无解。cwisstable版要0.136341秒——它是为海量数据设计的。

支持各种块形状。我觉得华容道太难了,并不手动玩,也许可以改成3个长条6个小方块之类。

qh到qt是要搜索的局面,应从其中找最优的,用优先级队列,麻烦。最优的局面未必是曹操的y最小的局面。

trie5.h

struct TrieNode {
  TrieNode* kids[5];
  bool  hasWord;

  bool find_insert (const uint8_t s[5][4]) {
    const uint8_t* bytes = (const uint8_t*)s;
    TrieNode* p = this;
    for (int j = 0; j < 20; j++) {
      const int i = bytes[j];
      if (!p->kids[i]) p->kids[i] = alloc();
      p = p->kids[i];
    }
    bool found = p->hasWord;
    p->hasWord = true;
    return found;
  }

  static TrieNode* alloc();
};

inline TrieNode* TrieNode::alloc () {
  static TrieNode all[5000 * 1000];
  static int n = 0;
  return all + n++;
}
View Code

头文件

#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <time.h>
#include <immintrin.h>
#include <xmmintrin.h>
#include <set>
#include "cwisstable.h"
#include "trie5.h"
View Code

 〔从0.8秒到0.018秒〕〔BFS动画〕〔DFS动画

On a SUN Sparcstation 2, the timings increase (at a rate of 0.005 second/pixel) from about 20 seconds for a 64**2 pixel image to about 320 seconds for 256**2 pixels. 〔详情

posted @ 2025-10-27 08:23  华容道专家  阅读(86)  评论(0)    收藏  举报