华容道 BFS DFS C++ Python 短程序

test

图片来自百度华容道吧。第二/三步卒子像军棋的工兵在铁道上跑——32步比我们的局面变化数少。

注释掉if (a[0][1] == 3) { print_path(); break; } // 判断找到解

则:queue_tail = 14,950,080:从此局面出发有这么多,和这个不矛盾。程序没崩当然令人欣喜,不过该数还不到1500万。“诗云”版华容道?

在Intel N100上,C++版处理上述局面耗时0.017s, queue_tail=10,440.

究极优化版

  • 10个x放在xs里,10个y放在ys里,xs和ys可同时计算 (多发射、乱序执行)
  • 增加b[5][4], 不用popcnt了。1表示空,0表示占用,can_move时少些!
  • 曹操能向右移动的条件是新位置的上下两格都是空,能向下移动的条件是新位置的左右两格都是空
    • 能向左上移动时,大家都不用考虑W和H
    • 卒特殊处理。i >= 6判断是否是卒。横竖条个数不定,卒总是4个
  • 循环展开,用宏代替inline不了的函数(所有)
  • 简化和减少Hash,误判些局面走弯路,换来速度的提升(此图的连通性很强)
  • std::set底层是树,unordered_set底层是Hash表?Hash表用自己的。(未做)
  • unordered_set::reserve(500万)效果很好,多一条语句的事
  • Profile-Guided Optimization (PGO)没效果

Python程序blks没有做成函数的局部变量或参数的原因,我忘了。import了deepcopy但没有(再)用。应该不是负优化,比如100层,每层都只动了第一个块。

E = ' ' # 全角空格

class Brd:
  def __str__(m): return '\n'.join([''.join(r) for r in m.b])
  def totuple(m): return tuple(tuple(r) for r in m.b)

  def put(m, blks):
    m.b = [[E] * 4 for _ in range(5)]
    for b in blks:
      if b.x < 0 or b.x + b.w > 4 or b.y < 0 or b.y + b.h > 5: return False
      for y in range(b.y, b.y + b.h):
        for x in range(b.x, b.x + b.w):
          if m.b[y][x] != E: return False
          m.b[y][x] = b.name
    return True

class Blk:
  def __init__(m, name, x, y, w = 1, h = 1):
    m.name = name; m.x = x; m.y = y; m.w = w; m.h = h; m.old = []

  def step(m, dx, dy):
    m.old.append((m.x, m.y)); m.x += dx; m.y += dy

  def back(m): (m.x, m.y) = m.old.pop()

brd = Brd() # 全局临时brd.
# 每个blk有自己的old[],等于我们自己管理一些堆栈。
blks = [
  Blk('', 1, 0, 2, 2),
  Blk('', 1, 2, 2, 1),
  Blk('', 0, 3, 1, 2),
  Blk('', 1, 3, 1, 2),
  Blk('', 2, 3, 1, 2),
  Blk('', 3, 3, 1, 2),
  Blk('', 0, 0),
  Blk('', 0, 1),
  Blk('', 3, 0),
  Blk('', 3, 1)
]
cc = blks[0]
brd.put(blks); seen = set(); path = []

def search (n):
  if cc.y == 3: return True

  s = str(brd) # 都可以,速度本例看不出差别
  # s = brd.totuple()
  if s in seen: return False
  seen.add(s)

  for b in blks:
    for (dx, dy) in ((-1,0),(1,0),(0,-1),(0,1)):
      b.step(dx, dy)
      if brd.put(blks):
        path.append(str(brd))
        if search(n + 1): return True
        path.pop()
      b.back()

  return False

import sys
sys.setrecursionlimit(10000)

if search(0): print(len(path), path[-1], path[0], sep='\n\n')
// alias gcc='g++ -msse4.2'
#include <stdio.h> #include <stdint.h> #include <stdlib.h> #include <string.h> #include <nmmintrin.h> #include <set> using namespace std; const char* NM[] = { "", "", "", "", "", "", "", "", "", "" }; const int W[] = { 2, 2, 1, 1, 1, 1, 1, 1, 1, 1 }; const int H[] = { 2, 1, 2, 2, 2, 2, 1, 1, 1, 1 }; const int D[4][2] = { 0, -1, 0, 1, -1, 0, 1, 0 }; struct State { uint64_t xys; // xy坐标们 int p; // 局面路径的previous // int next; // Hash表里的next bool operator< (const State& that) const { return xys < that.xys; } State& operator= (int a[10][2]); void toary(int a[10][2]); void operator= (const char* s); void print(); }; State& State::operator= (int a[10][2]) { xys = 0; for (int i = 0; i < 10; i++) { uint64_t x = a[i][0], y = a[i][1]; xys |= ((x << 3) | y) << (i * 5); } return *this; } void State::toary (int a[10][2]) { for (int i = 0; i < 10; i++) { uint xy = xys >> (i * 5); a[i][0] = (xy >> 3) & 3; // x a[i][1] = xy & 7; // y } } void State::operator= (const char* s) { int a[10][2] = {}, p = 6; for (int x = 3; x >= 0; x--) for (int y = 4; y >= 0; y--) { #define CASE(c, i) case c: a[i][0] = x; a[i][1] = y; break; switch (s[x * 5 + y]) { CASE('c', 0) CASE('g', 1) // 曹关 CASE('z', 2) CASE('h', 3) // 张黄 CASE('l', 4) CASE('m', 5) // 子龙(l) 马 case 'p': a[p][0] = x; a[p++][1] = y; // pawn } } *this = a; } void State::print () { const char* b[5][4] = {}; int a[10][2]; toary(a); for (int i = 0; i < 10; i++) { int x = a[i][0], y = a[i][1]; for (int yy = y; yy < y + H[i]; yy++) for (int xx = x; xx < x + W[i]; xx++) b[yy][xx] = NM[i]; } for (int y = 0; y < 5; y++) { for (int x = 0; x < 4; x++) printf("%s", b[y][x] ? : " "); puts(""); } puts(""); } enum { QMAX = 100 * 1000 * 1000 }; State states[QMAX + 40]; int qh, qt = 1; // queue head, tail set<State> seen; // 改Hash表,和states二合一
// unordered_set<uint64_t>, states[i].xys
bool can_move (int a[10][2], int i, int j) { // 无论成功失败, a[i]都被修改,需要被恢复 // 别优化,a[i][0]和a[i][1]要一起改 const int x = (a[i][0] += D[j][0]); const int y = (a[i][1] += D[j][1]); if (x < 0 || x + W[i] > 4) return false; if (y < 0 || y + H[i] > 5) return false; uint32_t bits = 0; for (int i = 0; i < 10; i++) { const int x = a[i][0], y = a[i][1]; for (int yy = y; yy < y + H[i]; yy++) for (int xx = x; xx < x + W[i]; xx++) bits |= 1 << (yy * 4 + xx); } return _mm_popcnt_u32(bits) == 18; } // 逆序输出。图片里的一步可能等于我们的多步。 void print_path () { int p = qh; while (p != -1) { states[p].print(); p = states[p].p; } } int main () { states[0] = "pp zz""ccghh""ccgll""pp mm"; states[0].p = -1; seen.insert(states[0]); for (; (qt < QMAX) && (qh < qt); qh++) { int a[10][2]; states[qh].toary(a); if (a[0][1] == 3) { print_path(); break; } for (int i = 0; i < 10; i++) for (int j = 0; j < 4; j++) { if (can_move(a, i, j)) { states[qt] = a; if (seen.find(states[qt]) == seen.end()) { seen.insert(states[qt]); states[qt++].p = qh; } // a[0][1]和(xys & 7)都是曹操的y坐标,按它排序? } a[i][0] -= D[j][0]; a[i][1] -= D[j][1]; } } return 0; }
posted @ 2025-10-21 18:31  华容道专家  阅读(8)  评论(0)    收藏  举报