- #include <stdio.h>
- #include <stdint.h>
- #include <string.h>
- #include <time.h>
- #include <immintrin.h>
- #include <xmmintrin.h>
- #include <set>
- #include "cwisstable.h"
- const char* NM[][4] = { {"曹","贼","真","爽"}, {"西","施"}, {"昭","君"}, {"貂","蝉"}, {"甄","姬"}, {"玉","环"}, {"美"}, {"美"}, {"美"}, {"美"} };
- int W[] = { 2, 2, 2, 2, 2, 2, 1, 1, 1, 1 }; // 默认5个水平条,随后修改
- int H[] = { 2, 1, 1, 1, 1, 1, 1, 1, 1, 1 };
- int T[10]; // Type
- const int D[][2] = { {0, 1}, {0, -1}, {-1, 0}, {1, 0} }; // 后面有两处用下标判断移动方向
- enum { MAX = 3600 * 10000 };
- struct State {
- uint8_t aa[10][2];
- #define CCY aa[0][0] // 曹操的y
- int p; // 局面路径的previous
- #define cpy20(dst, src) _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((__m128i*)src)); *(int*)((uint8_t*)dst+16) = *(int*)((const uint8_t*)src+16)
- void operator= (const State& s) { cpy20(aa, s.aa); p = s.p; }
- void operator=(const char* s);
- void print(const char* s = "");
- } q[MAX + 10 * 4]; // 最多40个move. MAX很大; 没判断qt < MAX
- int qh, qt = 1; // queue head, tail
- void State::operator= (const char* s) {
- int p = 6;
- for (int x = 3; x >= 0; x--)
- for (int y = 4; y >= 0; y--) {
- #define CASE(c, i) case c: aa[i][1] = x; aa[i][0] = y; break;
- switch (s[x * 5 + y]) {
- // 曹关张黄子龙(l)马
- CASE('c', 0) CASE('g', 1) CASE('z', 2) CASE('h', 3) CASE('l', 4) CASE('m', 5)
- case 'p':
- aa[p][1] = x; aa[p++][0] = y; break; // pawn
- }
- }
- static const char* S[] = { "gg", "zz", "hh", "ll", "mm" };
- for (int i = 0; i < 5; i++) if (strstr(s, S[i])) W[i+1] = 1, H[i+1] = 2;
- for (int i = 0; i < 10; i++) T[i] = W[i] * 2 + H[i] - 2;
- }
- #define set2DArrayByCoordInA(a, b, what) for (int i = 0; i < 10; i++) { \
- const int x = a[i][1], y = a[i][0]; \
- for (int yy = y; yy < y + H[i]; yy++) \
- for (int xx = x; xx < x + W[i]; xx++) b[yy][xx] = what; \
- }
- void State::print (const char* s) {
- int idx[10] = {};
- const char* b[5][4] = {};
- set2DArrayByCoordInA(aa, b, NM[i][idx[i]++])
- for (int y = 0; y < 5; y++) {
- for (int x = 0; x < 4; x++) printf("%s", b[y][x] ? : " ");
- puts("");
- }
- printf("%s\n", s);
- }
- void print_path () { // 数组存的单链表就地翻转
- int prev = -1, next = -1, n = 0;
- for (int cur = qh; cur != -1; ++n) {
- next = q[cur].p; q[cur].p = prev;
- prev = cur; cur = next;
- }
- for (int p = 0; p != -1; p = q[p].p) q[p].print();
- printf("%d\n", n);
- }
- #pragma pack(1)
- struct {
- uint8_t b[5][4];
- uint64_t n; // unique number
- uint8_t _[4];
- } u __attribute__((aligned(32)));
- #pragma pack()
- #define Ah { \
- _mm256_store_si256((__m256i*)&u, _mm256_setzero_si256()); \
- set2DArrayByCoordInA(a, u.b, T[i]) \
- for (int y = 0; y < 5; y++) \
- for (int x = 0; x < 4; x++) u.n = u.n * 5 + u.b[y][x]; \
- }
- #ifdef st
- CWISS_DECLARE_FLAT_HASHSET(Set, uint64_t); Set set = Set_new(MAX);
- #define if_ok_add_state if (ok) { cpy20(q[qt].aa, a); \
- Ah if (!Set_contains(&set, &u.n)) { Set_insert(&set, &u.n); q[qt++].p = qh; } \
- }
- #else
- std::set<uint64_t> set;
- #define if_ok_add_state if (ok) { cpy20(q[qt].aa, a); \
- Ah if (set.find(u.n) == set.end()) { set.insert(u.n); q[qt++].p = qh; } \
- }
- #endif
- int main (int argc, char* argv[]) {
- //q[0] = "pp zz""ccghh""ccgll""pp mm";
- q[0] = (argc == 2) ? argv[1] : "pzzpg""cc g""ccllm""phhpm";
- q[0].p = -1;
- uint8_t a[10][2] __attribute__((aligned(32)));
- cpy20(a, q[0].aa);
- #ifdef st
- Ah Set_insert(&set, &u.n);
- #else
- Ah set.insert(u.n);
- #endif
- double tm = clock();
- for (; qh < qt; qh++) {
- if (q[qh].CCY == 3) { print_path(); break; }
- cpy20(a, q[qh].aa);
- uint8_t b[5][4] __attribute__((aligned(32))), overflow[12];
- _mm256_store_si256((__m256i*)b, _mm256_set1_epi8(1));
- set2DArrayByCoordInA(a, b, 0)
- int qt1 = qt;
- for (int j = 0; j < 4; j++) {
- int ok;
- for (int i = 0; i < 6; i++) {
- const int ox = a[i][1], oy = a[i][0];
- int x = (a[i][1] += D[j][0]), y = (a[i][0] += D[j][1]);
- if (x < 0 || x + W[i] > 4 || y < 0 || y + H[i] > 5) ok = 0;
- else { // D[]的顺序不能换!
- if (j == 0) { y = oy + H[i]; ok = b[y][x] && (W[i] == 1 || b[y][x + 1]); }
- else if (j == 1) ok = b[y][x] & (!(W[i] - 1) | b[y][x + 1]); // W[i]1或2; 不更快
- else if (j == 2) ok = b[y][x] & (!(H[i] - 1) | b[y + 1][x]);
- else { x = ox + W[i]; ok = b[y][x] && (H[i] == 1 || b[y + 1][x]); }
- }
- if_ok_add_state
- a[i][1] -= D[j][0]; a[i][0] -= D[j][1];
- }
- for (int i = 6; i < 10; i++) {
- const int ox = a[i][1], oy = a[i][0];
- int x = (a[i][1] += D[j][0]), y = (a[i][0] += D[j][1]);
- if (x < 0 || x + W[i] > 4 || y < 0 || y + H[i] > 5) ok = 0;
- else {
- if (j == 0) y = oy + H[i];
- else if (j == 3) x = ox + W[i];
- ok = b[y][x];
- }
- if_ok_add_state
- a[i][1] -= D[j][0]; a[i][0] -= D[j][1];
- }
- }
- #if 1
- int qt2 = qt - 1;
- if (qt2 > qt1 && q[qt2].CCY < q[qt1].CCY) { State t = q[qt1]; q[qt1] = q[qt2]; q[qt2] = t; }
- #endif
- }
- printf("%.6f %d\n", (clock() - tm) / CLOCKS_PER_SEC, qt);
- return 0;
- }
复制代码 有个负优化而不是bug. q[qt2].CCY < q[qt1].CCY的目的是优先搜索曹操靠下的局面。下方的y大,< 应为 >
既然优先把所有的块往下挪,为啥会出现q[qt2].CCY < q[qt1].CCY的情形呢?
当前局面是最左边时,程序确实优先往下挪,可挪不动。有2个合法移动:
美 美 美曹贼美 美 美
美曹贼貂 美真爽貂 美曹贼貂
昭真爽蝉 昭 蝉 昭真爽蝉
君甄玉环 君甄玉环 君甄玉环
美姬西施 美姬西施 美姬西施
我们是方向优先,且优先往下(前提是能挪动),块则是曹操第一,往下挪不动往上挪了。
qt1=11788 qt2=11789 qt=11790 (尚未使用)
把,和#if 0后发现,这个启发信息没用,速度一样(起码差别微乎其微)。
这个程序速度很快。以上结果是在Intel N100上跑的。注释掉找到局面则退出的代码,0.081639秒内搜索了81,340个局面,判断出无解。
On a SUN Sparcstation 2, the timings increase (at a rate of 0.005 second/pixel) from about 20 seconds for a 64**2 pixel image to about 320 seconds for 256**2 pixels. 〔详情〕
它功能强,支持各种块形状。我觉得华容道太难了,并不手动玩,也许可以改成3个长条6个小方块之类。
我alias gcc='g++ -mavx2'。Ah(q[0].aa)美吗?还是改成Emm?
“有2个合法移动”——不对啊,待查,结果:qh=10933 反正我0.02秒,不SEGV,不管啦!:-)
〔优化历史〕
AI写的五进制转换程序:
- #include <stdio.h>
- #include <stdint.h>
- #include <immintrin.h>
- // SSE优化版本
- uint64_t convert_to_quinary_sse(uint8_t b[5][4])
- {
- uint64_t n = 0;
- // 处理前16个元素(4行)使用SSE
- for (int y = 0; y < 4; y++) {
- // 加载4个uint8_t值到128位寄存器
- __m128i row = _mm_loadu_si128((__m128i *) b[y]);
- // 将uint8_t转换为uint64_t并进行五进制转换
- for (int x = 0; x < 4; x++) {
- uint8_t value = b[y][x];
- n = n * 5 + value;
- }
- }
- // 处理最后一行(第5行)
- for (int x = 0; x < 4; x++) {
- n = n * 5 + b[4][x];
- }
- return n;
- }
- // AVX2优化版本(处理所有20个元素)
- uint64_t convert_to_quinary_avx2(uint8_t b[5][4])
- {
- uint64_t n = 0;
- // 使用AVX2处理前16个元素
- for (int y = 0; y < 4; y++) {
- // 加载4个uint8_t值
- __m128i row = _mm_loadu_si128((__m128i *) b[y]);
- // 解包到32位整数
- __m128i zero = _mm_setzero_si128();
- __m128i row32 = _mm_cvtepu8_epi32(row);
- // 对每个元素进行五进制转换
- uint32_t values[4];
- _mm_storeu_si128((__m128i *) values, row32);
- for (int i = 0; i < 4; i++) {
- n = n * 5 + values[i];
- }
- }
- // 处理最后4个元素
- for (int x = 0; x < 4; x++) {
- n = n * 5 + b[4][x];
- }
- return n;
- }
- // 高度优化的AVX版本,使用并行计算
- uint64_t convert_to_quinary_avx_parallel(uint8_t b[5][4])
- {
- // 使用多个累加器并行计算
- uint64_t n1 = 0, n2 = 0, n3 = 0, n4 = 0;
- // 并行处理4行
- for (int y = 0; y < 4; y++) {
- // 加载当前行
- __m128i row = _mm_loadu_si128((__m128i *) b[y]);
- // 转换为32位整数
- __m128i row32 = _mm_cvtepu8_epi32(row);
- uint32_t values[4];
- _mm_storeu_si128((__m128i *) values, row32);
- // 并行累加(模拟并行处理)
- n1 = n1 * 625 + values[0] * 125 + values[1] * 25 + values[2] * 5 + values[3];
- }
- // 合并结果并添加最后一行
- uint64_t final_n = n1;
- for (int x = 0; x < 4; x++) {
- final_n = final_n * 5 + b[4][x];
- }
- return final_n;
- }
- // 基准版本(原始代码)
- uint64_t convert_to_quinary_baseline(uint8_t b[5][4])
- {
- uint64_t n = 0;
- for (int y = 0; y < 5; y++) {
- for (int x = 0; x < 4; x++) {
- n = n * 5 + b[y][x];
- }
- }
- return n;
- }
- int main()
- {
- uint8_t b[5][4] = {
- { 1, 2, 3, 4 },
- { 0, 4, 2, 1 },
- { 3, 1, 4, 2 },
- { 2, 0, 1, 3 },
- { 4, 3, 0, 2 }
- };
- uint64_t result_baseline = convert_to_quinary_baseline(b);
- uint64_t result_sse = convert_to_quinary_sse(b);
- uint64_t result_avx2 = convert_to_quinary_avx2(b);
- uint64_t result_avx_parallel = convert_to_quinary_avx_parallel(b);
- printf("基准版本结果: %lu\n", result_baseline);
- printf("SSE优化结果: %lu\n", result_sse);
- printf("AVX2优化结果: %lu\n", result_avx2);
- printf("AVX并行结果: %lu\n", result_avx_parallel);
- if (result_baseline == result_sse &&
- result_sse == result_avx2 && result_avx2 == result_avx_parallel) {
- printf("✓ 所有版本结果一致\n");
- } else
- printf("✗ 结果不一致\n");
- return 0;
- }
复制代码 View Code
来源:程序园用户自行投稿发布,如果侵权,请联系站长删除
免责声明:如果侵犯了您的权益,请联系站长,我们会及时删除侵权内容,谢谢合作! |