最强八皇后程序:位操作、SIMD、多线程、汇编

#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <mmintrin.h>
#include <pthread.h>
#include <algorithm>

/*
0 Q......Q
3 ..Q.....
4 ...Q....
5 ..Q.....
7 Q......Q
用+y和-y判断斜线
x 0..7 0..7位
x+y 0..14 8..22位
x-y -7..7 0..14 23..37位
*/

typedef unsigned char byte;
byte __attribute__((aligned(64))) _b[8][8][8];
uint64_t  _b2[8][8];
int cnt[8];
pthread_t tid[8];

void pr (uint64_t n) {
  // 1<<n是第n位,共n+1位
  for (uint64_t i = 1ull << 37; i; i >>= 1) putchar(n & i ? '1' : '.');
  puts(""); getchar();
}

void search (int i, int cy) { // idx, current_y
  byte (&b)[8][8] = _b[i];
  for (int x = 0; x < 8; x++) {
    byte (&c)[8] = b[cy];
    c[0] = x; c[1] = x + cy; c[2] = x - cy;
    // +7转非负,+23摆位。注意ull后缀!
    _b2[i][cy] = (1 << x) | (1 << (x + cy + 8)) | (1ull << (x - cy + 7 + 23));
    bool ok = true;
    for (int y = 0; y < cy; y++) {
      const uint64_t r = uint64_t(_mm_cmpeq_pi8(*(__m64*)c, *(__m64*)b[y]));
      _mm_empty();
      int f[] = {
        r & 0xFFFFFF ? 1 : 0, // 汇编里查找pcmpeqb
        _b2[i][cy] & _b2[i][y] ? 1 : 0,
        c[0] == b[y][0] | c[1] == b[y][1] | c[2] == b[y][2],
        c[0] == b[y][0] || c[1] == b[y][1] || c[2] == b[y][2]
      };
      if (!std::equal(f+1, f+4, f)) exit(0);
      if (f[0]) { ok = false; break; }
    }
    if (!ok) continue;
    if (cy == 7) ++cnt[i];
    else search(i, cy + 1);
  }
}

void* tfn (void* arg) {
  int x = int(long(arg)), cy = 0;
  _b[x][0][0] = _b[x][0][1] = _b[x][0][2] = x;
  _b2[x][cy] = (1 << x) | (1 << (x + cy + 8)) | (1ull << (x - cy + 7 + 23));
  search(x, 1); return 0;
}

int main () {
  enum { N = 8 };
  for (int i = 0; i < N; i++) pthread_create(tid+i, NULL, tfn, (void*)long(i));
  for (int i = 0; i < N; i++) pthread_join(tid[i], NULL);
  int n = 0;
  for (int i = 0; i < N; i++) printf("%d\n", n += cnt[i]);
}

和0xFFFFFF对应的是andl $16777215, %eax,汇编不用十六进制?!

movq  %rdx, -48(%rbp)
movq  %rax, -56(%rbp)
movq  -56(%rbp), %xmm0
movq  -48(%rbp), %xmm1
pcmpeqb %xmm1, %xmm0
movq  %xmm0, %rax
movq  %rax, -40(%rbp)
emms
nop
movq  -40(%rbp), %rax
andl  $16777215, %eax
testq %rax, %rax

太长了,再来个t.cpp看看|和||:

int main (int n, char**) { return (n | 123) ^ (n || 456); } -O0 出不来456

orl     $123, %eax
xorl    $1, %eax
 
int f () { return 456; }
int main (int n, char**) { return (n | 123) ^ (n || f()); }
 
call  _Z1fv
testl %eax, %eax
je  .L5
.L4:
movl  $1, %eax
jmp .L6
.L5:
movl  $0, %eax
.L6:
 
_Z1fv:
pushq %rbp
movq  %rsp, %rbp
movl  $456, %eax
popq  %rbp
ret

 

posted @ 2025-10-19 15:57  华容道专家  阅读(7)  评论(0)    收藏  举报