Closed Nyanyan closed 3 months ago
before ffo40-49
Egaroucid_for_Console.exe -l 60 -nobook -thread 32 -hash 25 -solve problem/ffo40-49.txt
| Level| Depth| Move| Score| Time| Nodes| NPS|
| 60| 20@100%| a2| +38| 000:00:00.027| 13778717| 510322851|
| 60| 22@100%| h4| +0| 000:00:00.082| 30713919| 374559987|
| 60| 22@100%| g2| +6| 000:00:00.110| 44439509| 403995536|
| 60| 23@100%| c7| -12| 000:00:00.249| 121471782| 487838481|
| 60| 23@100%| d2| -14| 000:00:00.057| 15384519| 269903842|
| 60| 24@100%| b2| +6| 000:00:00.505| 309139835| 612158089|
| 60| 24@100%| b3| -8| 000:00:00.195| 81527384| 418089148|
| 60| 25@100%| g2| +4| 000:00:00.146| 75374684| 516264958|
| 60| 25@100%| f6| +28| 000:00:00.473| 223350840| 472200507|
| 60| 26@100%| e1| +16| 000:00:00.698| 370742498| 531149710|
total 1285923687 nodes in 2.542s NPS 505870844
メモリ参照を使わないで書いてみたが、遅い
Egaroucid_for_Console.exe -l 60 -nobook -thread 32 -hash 25 -solve problem/ffo40-49.txt
| Level| Depth| Move| Score| Time| Nodes| NPS|
| 60| 20@100%| a2| +38| 000:00:00.030| 13099607| 436653566|
| 60| 22@100%| h4| +0| 000:00:00.109| 32234340| 295727889|
| 60| 22@100%| g2| +6| 000:00:00.133| 41773840| 314089022|
| 60| 23@100%| c7| -12| 000:00:00.282| 107729664| 382020085|
| 60| 23@100%| d2| -14| 000:00:00.065| 15585394| 239775292|
| 60| 24@100%| b2| +6| 000:00:00.629| 307969036| 489616909|
| 60| 24@100%| b3| -8| 000:00:00.290| 114248198| 393959303|
| 60| 25@100%| g2| +4| 000:00:00.160| 59093201| 369332506|
| 60| 25@100%| f6| +28| 000:00:00.594| 223755799| 376693264|
| 60| 26@100%| e1| +16| 000:00:00.893| 382967888| 428855417|
total 1298456967 nodes in 3.185s NPS 407678796
全体的に改善の余地はある(例えばgenetric版だとclzが遅いなど)ので、その辺を改善してみると良さそう
clzを組み込みの_lzcnt_u64
に置き換えると速くなるので、CLZが遅いので間違いなさそう
Egaroucid_for_Console.exe -l 60 -nobook -thread 32 -hash 25 -solve problem/ffo40-49.txt
| Level| Depth| Move| Score| Time| Nodes| NPS|
| 60| 20@100%| a2| +38| 000:00:00.024| 13603762| 566823416|
| 60| 22@100%| h4| +0| 000:00:00.073| 29770152| 407810301|
| 60| 22@100%| g2| +6| 000:00:00.089| 47413373| 532734528|
| 60| 23@100%| c7| -12| 000:00:00.192| 95624186| 498042635|
| 60| 23@100%| d2| -14| 000:00:00.053| 16642411| 314007754|
| 60| 24@100%| b2| +6| 000:00:00.441| 306822738| 695743170|
| 60| 24@100%| b3| -8| 000:00:00.215| 115481065| 537121232|
| 60| 25@100%| g2| +4| 000:00:00.126| 68325712| 542267555|
| 60| 25@100%| f6| +28| 000:00:00.406| 206639141| 508963401|
| 60| 26@100%| e1| +16| 000:00:00.604| 350398833| 580130518|
total 1250721373 nodes in 2.223s NPS 562627698
多分、以前の実装で良い気がするので戻す。
ビット演算のみでflip計算はこれを使った
inline uint64_t calc_flip(const uint64_t player, const uint64_t opponent, const int place){
uint64_t line, outflank;
uint64_t mopponent = opponent & 0x7E7E7E7E7E7E7E7EULL;
int rplace = HW2_M1 - place;
pos = place;
flip = 0;
// downward
line = 0x0080808080808080ULL >> rplace;
outflank = (0x8000000000000000ULL >> clz(~opponent & line)) & player;
flip |= ((~outflank + 1) * 2) & line;
// upward
line = 0x0101010101010100ULL << place;
outflank = ((opponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// right
line = 0x7f00000000000000ULL >> rplace;
outflank = (0x8000000000000000ULL >> clz(~mopponent & line)) & player;
flip |= ((~outflank + 1) * 2) & line;
// left
line = 0x00000000000000feULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// d7 downward
line = 0x0102040810204000ULL >> rplace;
outflank = (0x8000000000000000ULL >> clz(~mopponent & line)) & player;
flip |= ((~outflank + 1) * 2) & line;
// d7 upward
line = 0x0002040810204080ULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// d9 downward
line = 0x0040201008040201ULL >> rplace;
outflank = (0x8000000000000000ULL >> clz(~mopponent & line)) & player;
flip |= ((~outflank + 1) * 2) & line;
// d9 upward
line = 0x8040201008040200ULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
return flip;
}
盤面180度回転を使えばCLZを使わずに済むのでは?
そこそこ速いがまだ遅い
Egaroucid_for_Console.exe -l 60 -nobook -thread 32 -hash 25 -solve problem/ffo40-49.txt
| Level| Depth| Move| Score| Time| Nodes| NPS|
| 60| 20@100%| a2| +38| 000:00:00.032| 13208387| 412762093|
| 60| 22@100%| h4| +0| 000:00:00.098| 30914292| 315451959|
| 60| 22@100%| g2| +6| 000:00:00.138| 48752638| 353279985|
| 60| 23@100%| c7| -12| 000:00:00.266| 107603818| 404525631|
| 60| 23@100%| b8| -14| 000:00:00.067| 16600570| 247769701|
| 60| 24@100%| b2| +6| 000:00:00.577| 309184035| 535847547|
| 60| 24@100%| b3| -8| 000:00:00.277| 112278606| 405337927|
| 60| 25@100%| g2| +4| 000:00:00.154| 63370847| 411499006|
| 60| 25@100%| f6| +28| 000:00:00.476| 185716723| 390161182|
| 60| 26@100%| e1| +16| 000:00:00.766| 351147122| 458416608|
total 1238777038 nodes in 2.851s NPS 434506151
これがメモリ参照なしで最速なので、諦める
inline uint64_t calc_flip(uint64_t player, uint64_t opponent, const int place){
uint64_t line, outflank;
uint64_t mopponent = opponent & 0x7e7e7e7e7e7e7e7eULL;
uint64_t rplayer = rotate_180(player);
uint64_t ropponent = rotate_180(opponent);
uint64_t mropponent = ropponent & 0x7e7e7e7e7e7e7e7eULL;
int rplace = HW2_M1 - place;
pos = place;
flip = 0;
uint64_t rflip = 0;
// downward
line = 0x0101010101010100ULL << rplace;
outflank = ((ropponent | ~line) + 1) & line & rplayer;
rflip |= (outflank - (outflank != 0)) & line;
// upward
line = 0x0101010101010100ULL << place;
outflank = ((opponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// right
line = 0x00000000000000feULL << rplace;
outflank = ((mropponent | ~line) + 1) & line & rplayer;
rflip |= (outflank - (outflank != 0)) & line;
// left
line = 0x00000000000000feULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// d7 downward
line = 0x0002040810204080ULL << rplace;
outflank = ((mropponent | ~line) + 1) & line & rplayer;
rflip |= (outflank - (outflank != 0)) & line;
// d7 upward
line = 0x0002040810204080ULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
// d9 downward
line = 0x8040201008040200ULL << rplace;
outflank = ((mropponent | ~line) + 1) & line & rplayer;
rflip |= (outflank - (outflank != 0)) & line;
// d9 upward
line = 0x8040201008040200ULL << place;
outflank = ((mopponent | ~line) + 1) & line & player;
flip |= (outflank - (outflank != 0)) & line;
flip |= rotate_180(rflip);
return flip;
}
generic版のflipを高速化する