Closed Nucobi closed 1 year ago
This is an auto-generated grading output. Checking code of Nucobi [2m[36m(prob_check pid=448745)[0m Epoch: 100/20000 - w:tensor([-0.5336, -0.2007, 0.6326, 0.6350, 0.5690, -0.4186, -0.1082, -0.4121, [2m[36m(prob_check pid=448745)[0m 0.5985, 0.6941, 0.6034, 0.4877, 0.1214, -0.1670], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([-0.0525], requires_grad=True) , loss: 0.5051519870758057 [2m[36m(prob_check pid=448745)[0m Epoch: 200/20000 - w:tensor([-0.9321, -0.2558, 0.9225, 0.9618, 0.6936, -0.5457, -0.6740, -0.5345, [2m[36m(prob_check pid=448745)[0m 0.9488, 1.1888, 0.8262, 0.5288, 0.2015, -0.0386], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.1067], requires_grad=True) , loss: 0.47191765904426575 [2m[36m(prob_check pid=448745)[0m Epoch: 300/20000 - w:tensor([-1.2779, -0.3450, 1.0665, 1.1162, 0.7110, -0.5657, -1.2618, -0.5158, [2m[36m(prob_check pid=448745)[0m 1.1864, 1.6144, 0.8255, 0.5776, 0.2449, 0.0688], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.2412], requires_grad=True) , loss: 0.45633289217948914 [2m[36m(prob_check pid=448744)[0m Epoch: 100/20000 - w:tensor([ 0.0932, 0.0565, 0.0798, -0.0152, 0.0998], requires_grad=True),b:tensor([0.1787], requires_grad=True) , loss: 0.35039034485816956 [2m[36m(prob_check pid=448744)[0m Epoch: 200/20000 - w:tensor([ 0.1419, -0.0062, 0.1103, -0.0190, 0.0590], requires_grad=True),b:tensor([0.2766], requires_grad=True) , loss: 0.3207457363605499 [2m[36m(prob_check pid=448744)[0m Epoch: 300/20000 - w:tensor([ 0.1875, -0.0638, 0.1332, -0.0218, -0.0342], requires_grad=True),b:tensor([0.3684], requires_grad=True) , loss: 0.30451416969299316 [2m[36m(prob_check pid=448745)[0m Epoch: 400/20000 - w:tensor([-1.5948, -0.4584, 1.1578, 1.1954, 0.7108, -0.5474, -1.7397, -0.4356, [2m[36m(prob_check pid=448745)[0m 1.3700, 1.9687, 0.7283, 0.6325, 0.3010, 0.1550], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.3561], requires_grad=True) , loss: 0.4452763497829437 [2m[36m(prob_check pid=448745)[0m Epoch: 500/20000 - w:tensor([-1.8865, -0.5854, 1.2328, 1.2503, 0.7059, -0.5202, -2.1225, -0.3365, [2m[36m(prob_check pid=448745)[0m 1.5217, 2.2604, 0.6040, 0.6860, 0.3643, 0.2287], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.4590], requires_grad=True) , loss: 0.4368930459022522 [2m[36m(prob_check pid=448745)[0m Epoch: 600/20000 - w:tensor([-2.1506, -0.7192, 1.3004, 1.2997, 0.7022, -0.4949, -2.4405, -0.2381, [2m[36m(prob_check pid=448745)[0m 1.6511, 2.5051, 0.4822, 0.7361, 0.4282, 0.2938], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.5529], requires_grad=True) , loss: 0.43067678809165955 [2m[36m(prob_check pid=448745)[0m Epoch: 700/20000 - w:tensor([-2.3853, -0.8545, 1.3603, 1.3482, 0.7008, -0.4740, -2.7150, -0.1477, [2m[36m(prob_check pid=448745)[0m 1.7638, 2.7140, 0.3728, 0.7820, 0.4886, 0.3517], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.6390], requires_grad=True) , loss: 0.426195353269577 [2m[36m(prob_check pid=448745)[0m Epoch: 800/20000 - w:tensor([-2.5905, -0.9867, 1.4109, 1.3953, 0.7014, -0.4572, -2.9558, -0.0675, [2m[36m(prob_check pid=448745)[0m 1.8636, 2.8933, 0.2780, 0.8233, 0.5433, 0.4029], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.7173], requires_grad=True) , loss: 0.42303764820098877 [2m[36m(prob_check pid=448744)[0m Epoch: 400/20000 - w:tensor([ 0.2303, -0.1042, 0.1495, -0.0241, -0.1725], requires_grad=True),b:tensor([0.4553], requires_grad=True) , loss: 0.2963702380657196 [2m[36m(prob_check pid=448744)[0m Epoch: 500/20000 - w:tensor([ 0.2703, -0.1313, 0.1595, -0.0257, -0.3520], requires_grad=True),b:tensor([0.5370], requires_grad=True) , loss: 0.2920752167701721 [2m[36m(prob_check pid=448744)[0m Epoch: 600/20000 - w:tensor([ 0.3084, -0.1491, 0.1643, -0.0268, -0.5677], requires_grad=True),b:tensor([0.6151], requires_grad=True) , loss: 0.28940489888191223 [2m[36m(prob_check pid=448744)[0m Epoch: 700/20000 - w:tensor([ 0.3458, -0.1606, 0.1650, -0.0275, -0.8147], requires_grad=True),b:tensor([0.6919], requires_grad=True) , loss: 0.28733888268470764 [2m[36m(prob_check pid=448744)[0m Epoch: 800/20000 - w:tensor([ 0.3833, -0.1677, 0.1628, -0.0279, -1.0893], requires_grad=True),b:tensor([0.7691], requires_grad=True) , loss: 0.28543660044670105 [2m[36m(prob_check pid=448745)[0m Epoch: 900/20000 - w:tensor([-2.7675e+00, -1.1121e+00, 1.4517e+00, 1.4401e+00, 7.0356e-01, [2m[36m(prob_check pid=448745)[0m -4.4387e-01, -3.1666e+00, 2.5891e-03, 1.9535e+00, 3.0466e+00, [2m[36m(prob_check pid=448745)[0m 1.9718e-01, 8.5972e-01, 5.9155e-01, 4.4725e-01], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.7877], requires_grad=True) , loss: 0.42085450887680054 [2m[36m(prob_check pid=448745)[0m Epoch: 1000/20000 - w:tensor([-2.9185, -1.2279, 1.4829, 1.4815, 0.7066, -0.4331, -3.3491, 0.0631, [2m[36m(prob_check pid=448745)[0m 2.0355, 3.1765, 0.1289, 0.8910, 0.6331, 0.4850], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.8504], requires_grad=True) , loss: 0.4193711578845978 [2m[36m(prob_check pid=448745)[0m Epoch: 1100/20000 - w:tensor([-3.0460, -1.3321, 1.5054, 1.5188, 0.7101, -0.4244, -3.5048, 0.1148, [2m[36m(prob_check pid=448745)[0m 2.1115, 3.2857, 0.0717, 0.9171, 0.6680, 0.5164], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.9055], requires_grad=True) , loss: 0.4183802604675293 [2m[36m(prob_check pid=448745)[0m Epoch: 1200/20000 - w:tensor([-3.1526, -1.4239, 1.5205, 1.5518, 0.7139, -0.4172, -3.6358, 0.1585, [2m[36m(prob_check pid=448745)[0m 2.1829, 3.3764, 0.0244, 0.9383, 0.6966, 0.5417], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.9536], requires_grad=True) , loss: 0.4177302420139313 [2m[36m(prob_check pid=448745)[0m Epoch: 1300/20000 - w:tensor([-3.2408, -1.5030, 1.5296, 1.5806, 0.7177, -0.4113, -3.7443, 0.1951, [2m[36m(prob_check pid=448745)[0m 2.2507, 3.4511, -0.0145, 0.9547, 0.7191, 0.5614], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([0.9951], requires_grad=True) , loss: 0.4173118770122528 [2m[36m(prob_check pid=448744)[0m Epoch: 900/20000 - w:tensor([ 0.4218, -0.1721, 0.1583, -0.0281, -1.3879], requires_grad=True),b:tensor([0.8484], requires_grad=True) , loss: 0.28352001309394836 [2m[36m(prob_check pid=448744)[0m Epoch: 1000/20000 - w:tensor([ 0.4619, -0.1746, 0.1522, -0.0281, -1.7081], requires_grad=True),b:tensor([0.9310], requires_grad=True) , loss: 0.281524658203125 [2m[36m(prob_check pid=448744)[0m Epoch: 1100/20000 - w:tensor([ 0.5040, -0.1760, 0.1450, -0.0280, -2.0478], requires_grad=True),b:tensor([1.0177], requires_grad=True) , loss: 0.279432088136673 [2m[36m(prob_check pid=448744)[0m Epoch: 1200/20000 - w:tensor([ 0.5483, -0.1769, 0.1369, -0.0279, -2.4051], requires_grad=True),b:tensor([1.1090], requires_grad=True) , loss: 0.2772413194179535 [2m[36m(prob_check pid=448744)[0m Epoch: 1300/20000 - w:tensor([ 0.5951, -0.1774, 0.1280, -0.0277, -2.7789], requires_grad=True),b:tensor([1.2052], requires_grad=True) , loss: 0.2749571204185486 [2m[36m(prob_check pid=448745)[0m Epoch: 1400/20000 - w:tensor([-3.3128, -1.5698, 1.5343, 1.6052, 0.7213, -0.4062, -3.8331, 0.2254, [2m[36m(prob_check pid=448745)[0m 2.3156, 3.5119, -0.0460, 0.9667, 0.7362, 0.5760], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.0308], requires_grad=True) , loss: 0.4170481264591217 [2m[36m(prob_check pid=448745)[0m Epoch: 1500/20000 - w:tensor([-3.3709, -1.6250, 1.5356, 1.6261, 0.7249, -0.4019, -3.9045, 0.2502, [2m[36m(prob_check pid=448745)[0m 2.3784, 3.5609, -0.0712, 0.9746, 0.7482, 0.5859], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.0612], requires_grad=True) , loss: 0.41688546538352966 [2m[36m(prob_check pid=448745)[0m Epoch: 1600/20000 - w:tensor([-3.4172, -1.6698, 1.5349, 1.6435, 0.7282, -0.3981, -3.9611, 0.2702, [2m[36m(prob_check pid=448745)[0m 2.4396, 3.5999, -0.0909, 0.9790, 0.7557, 0.5917], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.0871], requires_grad=True) , loss: 0.41678735613822937 [2m[36m(prob_check pid=448745)[0m Epoch: 1700/20000 - w:tensor([-3.4534, -1.7055, 1.5329, 1.6579, 0.7315, -0.3946, -4.0053, 0.2862, [2m[36m(prob_check pid=448745)[0m 2.4995, 3.6306, -0.1062, 0.9800, 0.7593, 0.5939], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1091], requires_grad=True) , loss: 0.4167293310165405 [2m[36m(prob_check pid=448745)[0m Epoch: 1800/20000 - w:tensor([-3.4813, -1.7333, 1.5304, 1.6697, 0.7345, -0.3914, -4.0392, 0.2988, [2m[36m(prob_check pid=448745)[0m 2.5585, 3.6544, -0.1176, 0.9783, 0.7595, 0.5929], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1280], requires_grad=True) , loss: 0.41669565439224243 [2m[36m(prob_check pid=448744)[0m Epoch: 1400/20000 - w:tensor([ 0.6443, -0.1778, 0.1186, -0.0275, -3.1679], requires_grad=True),b:tensor([1.3064], requires_grad=True) , loss: 0.272585928440094 [2m[36m(prob_check pid=448744)[0m Epoch: 1500/20000 - w:tensor([ 0.6961, -0.1781, 0.1086, -0.0274, -3.5711], requires_grad=True),b:tensor([1.4126], requires_grad=True) , loss: 0.27013474702835083 [2m[36m(prob_check pid=448744)[0m Epoch: 1600/20000 - w:tensor([ 0.7504, -0.1784, 0.0982, -0.0271, -3.9877], requires_grad=True),b:tensor([1.5239], requires_grad=True) , loss: 0.2676107585430145 [2m[36m(prob_check pid=448744)[0m Epoch: 1700/20000 - w:tensor([ 0.8073, -0.1788, 0.0873, -0.0269, -4.4166], requires_grad=True),b:tensor([1.6403], requires_grad=True) , loss: 0.2650211751461029 [2m[36m(prob_check pid=448744)[0m Epoch: 1800/20000 - w:tensor([ 0.8666, -0.1792, 0.0759, -0.0267, -4.8572], requires_grad=True),b:tensor([1.7615], requires_grad=True) , loss: 0.2623734176158905 [2m[36m(prob_check pid=448745)[0m Epoch: 1900/20000 - w:tensor([-3.5025, -1.7546, 1.5277, 1.6794, 0.7375, -0.3884, -4.0647, 0.3087, [2m[36m(prob_check pid=448745)[0m 2.6169, 3.6727, -0.1260, 0.9741, 0.7568, 0.5893], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1443], requires_grad=True) , loss: 0.4166760742664337 [2m[36m(prob_check pid=448745)[0m Epoch: 2000/20000 - w:tensor([-3.5182, -1.7706, 1.5252, 1.6874, 0.7405, -0.3854, -4.0835, 0.3164, [2m[36m(prob_check pid=448745)[0m 2.6747, 3.6865, -0.1319, 0.9678, 0.7516, 0.5834], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1587], requires_grad=True) , loss: 0.41666460037231445 [2m[36m(prob_check pid=448745)[0m Epoch: 2100/20000 - w:tensor([-3.5296, -1.7823, 1.5231, 1.6940, 0.7434, -0.3825, -4.0971, 0.3224, [2m[36m(prob_check pid=448745)[0m 2.7321, 3.6970, -0.1358, 0.9597, 0.7443, 0.5756], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1715], requires_grad=True) , loss: 0.416657418012619 [2m[36m(prob_check pid=448745)[0m Epoch: 2200/20000 - w:tensor([-3.5377, -1.7907, 1.5213, 1.6996, 0.7463, -0.3796, -4.1067, 0.3272, [2m[36m(prob_check pid=448745)[0m 2.7893, 3.7048, -0.1381, 0.9501, 0.7352, 0.5662], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1832], requires_grad=True) , loss: 0.4166525602340698 [2m[36m(prob_check pid=448745)[0m Epoch: 2300/20000 - w:tensor([-3.5433, -1.7966, 1.5200, 1.7044, 0.7492, -0.3767, -4.1133, 0.3309, [2m[36m(prob_check pid=448745)[0m 2.8463, 3.7107, -0.1393, 0.9391, 0.7247, 0.5554], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.1942], requires_grad=True) , loss: 0.41664886474609375 [2m[36m(prob_check pid=448744)[0m Epoch: 1900/20000 - w:tensor([ 0.9284, -0.1796, 0.0641, -0.0265, -5.3085], requires_grad=True),b:tensor([1.8875], requires_grad=True) , loss: 0.2596748471260071 [2m[36m(prob_check pid=448744)[0m Epoch: 2000/20000 - w:tensor([ 0.9927, -0.1801, 0.0518, -0.0263, -5.7698], requires_grad=True),b:tensor([2.0182], requires_grad=True) , loss: 0.2569330632686615 [2m[36m(prob_check pid=448744)[0m Epoch: 2100/20000 - w:tensor([ 1.0593, -0.1806, 0.0391, -0.0260, -6.2404], requires_grad=True),b:tensor([2.1535], requires_grad=True) , loss: 0.2541555166244507 [2m[36m(prob_check pid=448744)[0m Epoch: 2200/20000 - w:tensor([ 1.1283, -0.1812, 0.0260, -0.0258, -6.7196], requires_grad=True),b:tensor([2.2933], requires_grad=True) , loss: 0.25134989619255066 [2m[36m(prob_check pid=448744)[0m Epoch: 2300/20000 - w:tensor([ 1.1997, -0.1818, 0.0124, -0.0255, -7.2065], requires_grad=True),b:tensor([2.4373], requires_grad=True) , loss: 0.24852368235588074 [2m[36m(prob_check pid=448745)[0m Epoch: 2400/20000 - w:tensor([-3.5471, -1.8007, 1.5189, 1.7086, 0.7522, -0.3737, -4.1177, 0.3341, [2m[36m(prob_check pid=448745)[0m 2.9031, 3.7152, -0.1396, 0.9271, 0.7129, 0.5434], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2048], requires_grad=True) , loss: 0.41664567589759827 [2m[36m(prob_check pid=448745)[0m Epoch: 2500/20000 - w:tensor([-3.5496, -1.8034, 1.5181, 1.7125, 0.7553, -0.3706, -4.1206, 0.3367, [2m[36m(prob_check pid=448745)[0m 2.9599, 3.7187, -0.1391, 0.9140, 0.7000, 0.5303], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2152], requires_grad=True) , loss: 0.41664278507232666 [2m[36m(prob_check pid=448745)[0m Epoch: 2600/20000 - w:tensor([-3.5512, -1.8052, 1.5176, 1.7162, 0.7585, -0.3674, -4.1224, 0.3391, [2m[36m(prob_check pid=448745)[0m 3.0165, 3.7216, -0.1382, 0.9001, 0.6862, 0.5164], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2256], requires_grad=True) , loss: 0.41663992404937744 [2m[36m(prob_check pid=448745)[0m Epoch: 2700/20000 - w:tensor([-3.5522, -1.8064, 1.5171, 1.7198, 0.7618, -0.3641, -4.1234, 0.3414, [2m[36m(prob_check pid=448745)[0m 3.0730, 3.7241, -0.1369, 0.8853, 0.6715, 0.5017], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2361], requires_grad=True) , loss: 0.4166370630264282 [2m[36m(prob_check pid=448744)[0m Epoch: 2400/20000 - w:tensor([ 1.2733e+00, -1.8249e-01, -1.4835e-03, -2.5298e-02, -7.7007e+00], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([2.5854], requires_grad=True) , loss: 0.24568453431129456 [2m[36m(prob_check pid=448744)[0m Epoch: 2500/20000 - w:tensor([ 1.3491, -0.1832, -0.0158, -0.0251, -8.2014], requires_grad=True),b:tensor([2.7376], requires_grad=True) , loss: 0.24283979833126068 [2m[36m(prob_check pid=448744)[0m Epoch: 2600/20000 - w:tensor([ 1.4270, -0.1840, -0.0304, -0.0248, -8.7080], requires_grad=True),b:tensor([2.8934], requires_grad=True) , loss: 0.23999686539173126 [2m[36m(prob_check pid=448744)[0m Epoch: 2700/20000 - w:tensor([ 1.5071, -0.1849, -0.0454, -0.0246, -9.2200], requires_grad=True),b:tensor([3.0528], requires_grad=True) , loss: 0.23716290295124054 [2m[36m(prob_check pid=448744)[0m Epoch: 2800/20000 - w:tensor([ 1.5896, -0.1847, -0.0597, -0.0254, -9.7339], requires_grad=True),b:tensor([3.2173], requires_grad=True) , loss: 0.24375687539577484 [2m[36m(prob_check pid=448745)[0m Epoch: 2800/20000 - w:tensor([-3.5528, -1.8072, 1.5168, 1.7234, 0.7652, -0.3607, -4.1240, 0.3436, [2m[36m(prob_check pid=448745)[0m 3.1294, 3.7264, -0.1353, 0.8698, 0.6561, 0.4862], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2468], requires_grad=True) , loss: 0.41663438081741333 [2m[36m(prob_check pid=448745)[0m Epoch: 2900/20000 - w:tensor([-3.5531, -1.8077, 1.5166, 1.7270, 0.7687, -0.3571, -4.1243, 0.3458, [2m[36m(prob_check pid=448745)[0m 3.1858, 3.7286, -0.1335, 0.8536, 0.6399, 0.4700], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2577], requires_grad=True) , loss: 0.4166315495967865 [2m[36m(prob_check pid=448745)[0m Epoch: 3000/20000 - w:tensor([-3.5533, -1.8081, 1.5164, 1.7307, 0.7723, -0.3535, -4.1244, 0.3480, [2m[36m(prob_check pid=448745)[0m 3.2421, 3.7308, -0.1316, 0.8367, 0.6231, 0.4531], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2690], requires_grad=True) , loss: 0.41662877798080444 [2m[36m(prob_check pid=448745)[0m Epoch: 3100/20000 - w:tensor([-3.5534, -1.8083, 1.5162, 1.7345, 0.7761, -0.3497, -4.1244, 0.3503, [2m[36m(prob_check pid=448745)[0m 3.2983, 3.7331, -0.1294, 0.8192, 0.6055, 0.4355], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2807], requires_grad=True) , loss: 0.4166259765625 [2m[36m(prob_check pid=448745)[0m Epoch: 3200/20000 - w:tensor([-3.5534, -1.8085, 1.5161, 1.7384, 0.7800, -0.3458, -4.1243, 0.3527, [2m[36m(prob_check pid=448745)[0m 3.3543, 3.7354, -0.1271, 0.8009, 0.5873, 0.4172], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.2928], requires_grad=True) , loss: 0.41662320494651794 [2m[36m(prob_check pid=448744)[0m Epoch: 2900/20000 - w:tensor([ 1.6625, -0.1874, -0.0738, -0.0242, -10.2309], requires_grad=True),b:tensor([3.3625], requires_grad=True) , loss: 0.23185664415359497 [2m[36m(prob_check pid=448744)[0m Epoch: 3000/20000 - w:tensor([ 1.7370, -0.1877, -0.0880, -0.0239, -10.7312], requires_grad=True),b:tensor([3.5117], requires_grad=True) , loss: 0.22937484085559845 [2m[36m(prob_check pid=448744)[0m Epoch: 3100/20000 - w:tensor([ 1.8120, -0.1914, -0.1003, -0.0240, -11.2269], requires_grad=True),b:tensor([3.6614], requires_grad=True) , loss: 0.227021723985672 [2m[36m(prob_check pid=448744)[0m Epoch: 3200/20000 - w:tensor([ 1.8803, -0.1898, -0.1141, -0.0235, -11.7148], requires_grad=True),b:tensor([3.7987], requires_grad=True) , loss: 0.22477249801158905 [2m[36m(prob_check pid=448744)[0m Epoch: 3300/20000 - w:tensor([ 1.9512, -0.1905, -0.1272, -0.0234, -12.2071], requires_grad=True),b:tensor([3.9409], requires_grad=True) , loss: 0.2225681096315384 [2m[36m(prob_check pid=448745)[0m Epoch: 3300/20000 - w:tensor([-3.5534, -1.8087, 1.5159, 1.7424, 0.7841, -0.3417, -4.1242, 0.3553, [2m[36m(prob_check pid=448745)[0m 3.4104, 3.7378, -0.1247, 0.7819, 0.5684, 0.3982], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3052], requires_grad=True) , loss: 0.4166204035282135 [2m[36m(prob_check pid=448745)[0m Epoch: 3400/20000 - w:tensor([-3.5534, -1.8089, 1.5157, 1.7465, 0.7882, -0.3375, -4.1240, 0.3579, [2m[36m(prob_check pid=448745)[0m 3.4663, 3.7403, -0.1222, 0.7623, 0.5487, 0.3786], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3181], requires_grad=True) , loss: 0.41661760210990906 [2m[36m(prob_check pid=448745)[0m Epoch: 3500/20000 - w:tensor([-3.5533, -1.8091, 1.5156, 1.7508, 0.7926, -0.3332, -4.1239, 0.3607, [2m[36m(prob_check pid=448745)[0m 3.5221, 3.7430, -0.1195, 0.7420, 0.5284, 0.3583], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3314], requires_grad=True) , loss: 0.4166147708892822 [2m[36m(prob_check pid=448745)[0m Epoch: 3600/20000 - w:tensor([-3.5533, -1.8093, 1.5154, 1.7552, 0.7970, -0.3287, -4.1238, 0.3636, [2m[36m(prob_check pid=448745)[0m 3.5778, 3.7457, -0.1167, 0.7210, 0.5075, 0.3372], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3452], requires_grad=True) , loss: 0.4166119396686554 [2m[36m(prob_check pid=448745)[0m Epoch: 3700/20000 - w:tensor([-3.5533, -1.8094, 1.5153, 1.7598, 0.8016, -0.3241, -4.1236, 0.3666, [2m[36m(prob_check pid=448745)[0m 3.6335, 3.7486, -0.1138, 0.6993, 0.4858, 0.3155], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3593], requires_grad=True) , loss: 0.41660910844802856 [2m[36m(prob_check pid=448744)[0m Epoch: 3400/20000 - w:tensor([ 2.0180, -0.1927, -0.1383, -0.0233, -12.6844], requires_grad=True),b:tensor([4.0747], requires_grad=True) , loss: 0.22054751217365265 [2m[36m(prob_check pid=448744)[0m Epoch: 3500/20000 - w:tensor([ 2.0820, -0.1926, -0.1508, -0.0231, -13.1618], requires_grad=True),b:tensor([4.2036], requires_grad=True) , loss: 0.21859802305698395 [2m[36m(prob_check pid=448744)[0m Epoch: 3600/20000 - w:tensor([ 2.1483, -0.1935, -0.1628, -0.0229, -13.6447], requires_grad=True),b:tensor([4.3366], requires_grad=True) , loss: 0.21668921411037445 [2m[36m(prob_check pid=448744)[0m Epoch: 3700/20000 - w:tensor([ 2.2105, -0.1955, -0.1731, -0.0228, -14.1128], requires_grad=True),b:tensor([4.4610], requires_grad=True) , loss: 0.21487797796726227 [2m[36m(prob_check pid=448744)[0m Epoch: 3800/20000 - w:tensor([ 2.2711, -0.1956, -0.1846, -0.0226, -14.5835], requires_grad=True),b:tensor([4.5828], requires_grad=True) , loss: 0.21313835680484772 [2m[36m(prob_check pid=448745)[0m Epoch: 3800/20000 - w:tensor([-3.5533, -1.8096, 1.5151, 1.7645, 0.8064, -0.3193, -4.1235, 0.3697, [2m[36m(prob_check pid=448745)[0m 3.6891, 3.7516, -0.1107, 0.6769, 0.4634, 0.2931], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3739], requires_grad=True) , loss: 0.4166063368320465 [2m[36m(prob_check pid=448745)[0m Epoch: 3900/20000 - w:tensor([-3.5533, -1.8098, 1.5150, 1.7693, 0.8112, -0.3144, -4.1234, 0.3730, [2m[36m(prob_check pid=448745)[0m 3.7446, 3.7548, -0.1076, 0.6538, 0.4403, 0.2700], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.3890], requires_grad=True) , loss: 0.4166034758090973 [2m[36m(prob_check pid=448745)[0m Epoch: 4000/20000 - w:tensor([-3.5532, -1.8100, 1.5148, 1.7743, 0.8163, -0.3094, -4.1232, 0.3764, [2m[36m(prob_check pid=448745)[0m 3.8000, 3.7580, -0.1043, 0.6300, 0.4165, 0.2461], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4044], requires_grad=True) , loss: 0.41660070419311523 [2m[36m(prob_check pid=448745)[0m Epoch: 4100/20000 - w:tensor([-3.5532, -1.8102, 1.5147, 1.7794, 0.8214, -0.3042, -4.1231, 0.3799, [2m[36m(prob_check pid=448745)[0m 3.8554, 3.7614, -0.1009, 0.6055, 0.3920, 0.2216], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4204], requires_grad=True) , loss: 0.416597843170166 [2m[36m(prob_check pid=448745)[0m Epoch: 4200/20000 - w:tensor([-3.5532, -1.8104, 1.5145, 1.7847, 0.8268, -0.2989, -4.1230, 0.3836, [2m[36m(prob_check pid=448745)[0m 3.9107, 3.7650, -0.0973, 0.5802, 0.3668, 0.1964], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4367], requires_grad=True) , loss: 0.41659507155418396 [2m[36m(prob_check pid=448744)[0m Epoch: 3900/20000 - w:tensor([ 2.3350, -0.1952, -0.1946, -0.0216, -15.0592], requires_grad=True),b:tensor([4.7095], requires_grad=True) , loss: 0.22531044483184814 [2m[36m(prob_check pid=448744)[0m Epoch: 4000/20000 - w:tensor([ 2.3904, -0.1985, -0.2050, -0.0225, -15.5151], requires_grad=True),b:tensor([4.8213], requires_grad=True) , loss: 0.20986206829547882 [2m[36m(prob_check pid=448744)[0m Epoch: 4100/20000 - w:tensor([ 2.4464, -0.1986, -0.2155, -0.0223, -15.9752], requires_grad=True),b:tensor([4.9334], requires_grad=True) , loss: 0.208336740732193 [2m[36m(prob_check pid=448744)[0m Epoch: 4200/20000 - w:tensor([ 2.5043, -0.1996, -0.2256, -0.0222, -16.4418], requires_grad=True),b:tensor([5.0490], requires_grad=True) , loss: 0.20681245625019073 [2m[36m(prob_check pid=448745)[0m Epoch: 4300/20000 - w:tensor([-3.5532, -1.8105, 1.5144, 1.7901, 0.8322, -0.2934, -4.1229, 0.3874, [2m[36m(prob_check pid=448745)[0m 3.9660, 3.7686, -0.0937, 0.5543, 0.3409, 0.1704], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4535], requires_grad=True) , loss: 0.41659221053123474 [2m[36m(prob_check pid=448745)[0m Epoch: 4400/20000 - w:tensor([-3.5532, -1.8107, 1.5142, 1.7957, 0.8378, -0.2878, -4.1227, 0.3913, [2m[36m(prob_check pid=448745)[0m 4.0212, 3.7724, -0.0899, 0.5277, 0.3143, 0.1437], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4707], requires_grad=True) , loss: 0.4165894389152527 [2m[36m(prob_check pid=448745)[0m Epoch: 4500/20000 - w:tensor([-3.5532, -1.8109, 1.5140, 1.8014, 0.8436, -0.2821, -4.1226, 0.3953, [2m[36m(prob_check pid=448745)[0m 4.0763, 3.7763, -0.0860, 0.5003, 0.2869, 0.1164], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.4884], requires_grad=True) , loss: 0.41658666729927063 [2m[36m(prob_check pid=448745)[0m Epoch: 4600/20000 - w:tensor([-3.5531, -1.8111, 1.5139, 1.8072, 0.8494, -0.2762, -4.1225, 0.3995, [2m[36m(prob_check pid=448745)[0m 4.1314, 3.7803, -0.0819, 0.4723, 0.2589, 0.0883], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.5065], requires_grad=True) , loss: 0.41658392548561096 [2m[36m(prob_check pid=448745)[0m Epoch: 4700/20000 - w:tensor([-3.5531, -1.8113, 1.5137, 1.8132, 0.8555, -0.2701, -4.1224, 0.4037, [2m[36m(prob_check pid=448745)[0m 4.1865, 3.7845, -0.0778, 0.4435, 0.2302, 0.0595], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.5251], requires_grad=True) , loss: 0.4165812134742737 [2m[36m(prob_check pid=448744)[0m Epoch: 4300/20000 - w:tensor([ 2.5604, -0.2026, -0.2341, -0.0223, -16.8976], requires_grad=True),b:tensor([5.1601], requires_grad=True) , loss: 0.2053866684436798 [2m[36m(prob_check pid=448744)[0m Epoch: 4400/20000 - w:tensor([ 2.6122, -0.2018, -0.2442, -0.0221, -17.3506], requires_grad=True),b:tensor([5.2634], requires_grad=True) , loss: 0.20402900874614716 [2m[36m(prob_check pid=448744)[0m Epoch: 4500/20000 - w:tensor([ 2.6663, -0.2027, -0.2536, -0.0220, -17.8097], requires_grad=True),b:tensor([5.3706], requires_grad=True) , loss: 0.20267240703105927 [2m[36m(prob_check pid=448744)[0m Epoch: 4600/20000 - w:tensor([ 2.7201, -0.2064, -0.2615, -0.0220, -18.2635], requires_grad=True),b:tensor([5.4767], requires_grad=True) , loss: 0.20145060122013092 [2m[36m(prob_check pid=448744)[0m Epoch: 4700/20000 - w:tensor([ 2.7693, -0.2050, -0.2710, -0.0218, -18.7126], requires_grad=True),b:tensor([5.5739], requires_grad=True) , loss: 0.20014654099941254 [2m[36m(prob_check pid=448745)[0m Epoch: 4800/20000 - w:tensor([-3.5531, -1.8115, 1.5136, 1.8193, 0.8616, -0.2640, -4.1222, 0.4082, [2m[36m(prob_check pid=448745)[0m 4.2416, 3.7888, -0.0735, 0.4141, 0.2008, 0.0301], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.5440], requires_grad=True) , loss: 0.4165785014629364 [2m[36m(prob_check pid=448745)[0m Epoch: 4900/20000 - w:tensor([-3.5531e+00, -1.8117e+00, 1.5134e+00, 1.8256e+00, 8.6792e-01, [2m[36m(prob_check pid=448745)[0m -2.5766e-01, -4.1221e+00, 4.1268e-01, 4.2966e+00, 3.7932e+00, [2m[36m(prob_check pid=448745)[0m -6.9057e-02, 3.8397e-01, 1.7065e-01, -5.3630e-05], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.5634], requires_grad=True) , loss: 0.4165758192539215 [2m[36m(prob_check pid=448745)[0m Epoch: 5000/20000 - w:tensor([-3.5531, -1.8119, 1.5133, 1.8320, 0.8744, -0.2512, -4.1220, 0.4173, [2m[36m(prob_check pid=448745)[0m 4.3517, 3.7977, -0.0645, 0.3532, 0.1399, -0.0309], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.5833], requires_grad=True) , loss: 0.41657310724258423 [2m[36m(prob_check pid=448745)[0m Epoch: 5100/20000 - w:tensor([-3.5531, -1.8121, 1.5131, 1.8385, 0.8809, -0.2446, -4.1219, 0.4221, [2m[36m(prob_check pid=448745)[0m 4.4067, 3.8023, -0.0599, 0.3217, 0.1084, -0.0624], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.6035], requires_grad=True) , loss: 0.4165705144405365 [2m[36m(prob_check pid=448745)[0m Epoch: 5200/20000 - w:tensor([-3.5531, -1.8123, 1.5130, 1.8451, 0.8876, -0.2379, -4.1218, 0.4270, [2m[36m(prob_check pid=448745)[0m 4.4617, 3.8071, -0.0551, 0.2896, 0.0763, -0.0945], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.6241], requires_grad=True) , loss: 0.416567862033844 [2m[36m(prob_check pid=448744)[0m Epoch: 4800/20000 - w:tensor([ 2.8206, -0.2058, -0.2799, -0.0218, -19.1679], requires_grad=True),b:tensor([5.6749], requires_grad=True) , loss: 0.1989196091890335 [2m[36m(prob_check pid=448744)[0m Epoch: 4900/20000 - w:tensor([ 2.8709, -0.2087, -0.2871, -0.0218, -19.6145], requires_grad=True),b:tensor([5.7733], requires_grad=True) , loss: 0.19775842130184174 [2m[36m(prob_check pid=448744)[0m Epoch: 5000/20000 - w:tensor([ 2.9175, -0.2080, -0.2961, -0.0217, -20.0592], requires_grad=True),b:tensor([5.8646], requires_grad=True) , loss: 0.19664782285690308 [2m[36m(prob_check pid=448744)[0m Epoch: 5100/20000 - w:tensor([ 2.9662, -0.2089, -0.3043, -0.0216, -20.5103], requires_grad=True),b:tensor([5.9597], requires_grad=True) , loss: 0.1955360770225525 [2m[36m(prob_check pid=448744)[0m Epoch: 5200/20000 - w:tensor([ 3.0136, -0.2119, -0.3109, -0.0217, -20.9511], requires_grad=True),b:tensor([6.0516], requires_grad=True) , loss: 0.1944892257452011 [2m[36m(prob_check pid=448745)[0m Epoch: 5300/20000 - w:tensor([-3.5531, -1.8125, 1.5128, 1.8519, 0.8945, -0.2311, -4.1217, 0.4320, [2m[36m(prob_check pid=448745)[0m 4.5167, 3.8120, -0.0502, 0.2568, 0.0435, -0.1273], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.6452], requires_grad=True) , loss: 0.4165653586387634 [2m[36m(prob_check pid=448745)[0m Epoch: 5400/20000 - w:tensor([-3.5531, -1.8127, 1.5127, 1.8589, 0.9014, -0.2241, -4.1216, 0.4371, [2m[36m(prob_check pid=448745)[0m 4.5718, 3.8170, -0.0452, 0.2234, 0.0101, -0.1608], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.6666], requires_grad=True) , loss: 0.41656285524368286 [2m[36m(prob_check pid=448745)[0m Epoch: 5500/20000 - w:tensor([-3.5531, -1.8128, 1.5125, 1.8659, 0.9085, -0.2170, -4.1215, 0.4423, [2m[36m(prob_check pid=448745)[0m 4.6268, 3.8221, -0.0401, 0.1893, -0.0239, -0.1948], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.6885], requires_grad=True) , loss: 0.4165603220462799 [2m[36m(prob_check pid=448745)[0m Epoch: 5600/20000 - w:tensor([-3.5531, -1.8130, 1.5124, 1.8731, 0.9158, -0.2098, -4.1214, 0.4476, [2m[36m(prob_check pid=448745)[0m 4.6818, 3.8273, -0.0349, 0.1546, -0.0586, -0.2295], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.7107], requires_grad=True) , loss: 0.4165578782558441 [2m[36m(prob_check pid=448744)[0m Epoch: 5300/20000 - w:tensor([ 3.0573, -0.2111, -0.3193, -0.0216, -21.3894], requires_grad=True),b:tensor([6.1364], requires_grad=True) , loss: 0.19348856806755066 [2m[36m(prob_check pid=448744)[0m Epoch: 5400/20000 - w:tensor([ 3.1029e+00, -2.1202e-01, -3.2689e-01, -2.1513e-02, -2.1834e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.2247], requires_grad=True) , loss: 0.19248606264591217 [2m[36m(prob_check pid=448744)[0m Epoch: 5500/20000 - w:tensor([ 3.1489e+00, -2.1567e-01, -3.3304e-01, -2.1526e-02, -2.2275e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.3128], requires_grad=True) , loss: 0.19164972007274628 [2m[36m(prob_check pid=448744)[0m Epoch: 5600/20000 - w:tensor([ 3.1904e+00, -2.1428e-01, -3.4101e-01, -2.1477e-02, -2.2711e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.3927], requires_grad=True) , loss: 0.19060322642326355 [2m[36m(prob_check pid=448744)[0m Epoch: 5700/20000 - w:tensor([ 3.2340e+00, -2.1512e-01, -3.4822e-01, -2.1441e-02, -2.3153e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.4760], requires_grad=True) , loss: 0.1896846741437912 [2m[36m(prob_check pid=448745)[0m Epoch: 5700/20000 - w:tensor([-3.5530, -1.8132, 1.5122, 1.8804, 0.9231, -0.2024, -4.1213, 0.4531, [2m[36m(prob_check pid=448745)[0m 4.7369, 3.8326, -0.0296, 0.1194, -0.0938, -0.2648], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.7333], requires_grad=True) , loss: 0.4165554642677307 [2m[36m(prob_check pid=448745)[0m Epoch: 5800/20000 - w:tensor([-3.5530, -1.8134, 1.5121, 1.8878, 0.9306, -0.1949, -4.1212, 0.4586, [2m[36m(prob_check pid=448745)[0m 4.7919, 3.8380, -0.0241, 0.0835, -0.1297, -0.3007], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.7563], requires_grad=True) , loss: 0.4165531098842621 [2m[36m(prob_check pid=448745)[0m Epoch: 5900/20000 - w:tensor([-3.5530, -1.8136, 1.5119, 1.8954, 0.9381, -0.1874, -4.1211, 0.4642, [2m[36m(prob_check pid=448745)[0m 4.8470, 3.8436, -0.0186, 0.0471, -0.1661, -0.3372], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.7796], requires_grad=True) , loss: 0.41655078530311584 [2m[36m(prob_check pid=448745)[0m Epoch: 6000/20000 - w:tensor([-3.5530, -1.8137, 1.5118, 1.9030, 0.9458, -0.1797, -4.1210, 0.4700, [2m[36m(prob_check pid=448745)[0m 4.9021, 3.8492, -0.0130, 0.0100, -0.2031, -0.3742], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.8033], requires_grad=True) , loss: 0.41654857993125916 [2m[36m(prob_check pid=448745)[0m Epoch: 6100/20000 - w:tensor([-3.5530, -1.8139, 1.5117, 1.9108, 0.9536, -0.1718, -4.1209, 0.4758, [2m[36m(prob_check pid=448745)[0m 4.9573, 3.8549, -0.0072, -0.0275, -0.2406, -0.4118], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.8273], requires_grad=True) , loss: 0.41654637455940247 [2m[36m(prob_check pid=448744)[0m Epoch: 5800/20000 - w:tensor([ 3.2770e+00, -2.1802e-01, -3.5401e-01, -2.1565e-02, -2.3588e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.5576], requires_grad=True) , loss: 0.18881107866764069 [2m[36m(prob_check pid=448744)[0m Epoch: 5900/20000 - w:tensor([ 3.3165e+00, -2.1730e-01, -3.6136e-01, -2.1423e-02, -2.4020e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.6327], requires_grad=True) , loss: 0.1879722774028778 [2m[36m(prob_check pid=448744)[0m Epoch: 6000/20000 - w:tensor([ 3.3580e+00, -2.1817e-01, -3.6809e-01, -2.1402e-02, -2.4459e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.7111], requires_grad=True) , loss: 0.18713104724884033 [2m[36m(prob_check pid=448744)[0m Epoch: 6100/20000 - w:tensor([ 3.3988e+00, -2.2125e-01, -3.7336e-01, -2.1554e-02, -2.4889e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.7878], requires_grad=True) , loss: 0.1863722950220108 [2m[36m(prob_check pid=448744)[0m Epoch: 6200/20000 - w:tensor([ 3.4360e+00, -2.2034e-01, -3.8028e-01, -2.1406e-02, -2.5315e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.8575], requires_grad=True) , loss: 0.18556778132915497 [2m[36m(prob_check pid=448745)[0m Epoch: 6200/20000 - w:tensor([-3.5530e+00, -1.8141e+00, 1.5115e+00, 1.9187e+00, 9.6154e-01, [2m[36m(prob_check pid=448745)[0m -1.6393e-01, -4.1208e+00, 4.8175e-01, 5.0124e+00, 3.8608e+00, [2m[36m(prob_check pid=448745)[0m -1.4073e-03, -6.5604e-02, -2.7873e-01, -4.4988e-01], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.8516], requires_grad=True) , loss: 0.41654419898986816 [2m[36m(prob_check pid=448745)[0m Epoch: 6300/20000 - w:tensor([-3.5530e+00, -1.8142e+00, 1.5114e+00, 1.9266e+00, 9.6956e-01, [2m[36m(prob_check pid=448745)[0m -1.5591e-01, -4.1207e+00, 4.8777e-01, 5.0677e+00, 3.8667e+00, [2m[36m(prob_check pid=448745)[0m 4.5146e-03, -1.0422e-01, -3.1733e-01, -4.8851e-01], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.8763], requires_grad=True) , loss: 0.41654208302497864 [2m[36m(prob_check pid=448745)[0m Epoch: 6400/20000 - w:tensor([-3.5530, -1.8144, 1.5113, 1.9347, 0.9777, -0.1478, -4.1206, 0.4939, [2m[36m(prob_check pid=448745)[0m 5.1229, 3.8727, 0.0105, -0.1433, -0.3564, -0.5276], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.9013], requires_grad=True) , loss: 0.4165400564670563 [2m[36m(prob_check pid=448745)[0m Epoch: 6500/20000 - w:tensor([-3.5530, -1.8145, 1.5112, 1.9429, 0.9859, -0.1396, -4.1205, 0.5001, [2m[36m(prob_check pid=448745)[0m 5.1782, 3.8788, 0.0166, -0.1829, -0.3960, -0.5673], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.9266], requires_grad=True) , loss: 0.4165380597114563 [2m[36m(prob_check pid=448745)[0m Epoch: 6600/20000 - w:tensor([-3.5530, -1.8147, 1.5111, 1.9512, 0.9942, -0.1312, -4.1204, 0.5063, [2m[36m(prob_check pid=448745)[0m 5.2335, 3.8850, 0.0228, -0.2230, -0.4361, -0.6074], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.9522], requires_grad=True) , loss: 0.4165360927581787 [2m[36m(prob_check pid=448744)[0m Epoch: 6300/20000 - w:tensor([ 3.4750e+00, -2.2118e-01, -3.8654e-01, -2.1393e-02, -2.5749e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([6.9304], requires_grad=True) , loss: 0.18480023741722107 [2m[36m(prob_check pid=448744)[0m Epoch: 6400/20000 - w:tensor([ 3.5168e+00, -2.2421e-01, -3.8981e-01, -2.1536e-02, -2.6178e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.0078], requires_grad=True) , loss: 0.18432557582855225 [2m[36m(prob_check pid=448744)[0m Epoch: 6500/20000 - w:tensor([ 3.5490e+00, -2.2369e-01, -3.9764e-01, -2.1442e-02, -2.6594e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.0673], requires_grad=True) , loss: 0.18337006866931915 [2m[36m(prob_check pid=448744)[0m Epoch: 6600/20000 - w:tensor([ 3.5842e+00, -2.2412e-01, -4.0342e-01, -2.1414e-02, -2.7014e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.1323], requires_grad=True) , loss: 0.1826879382133484 [2m[36m(prob_check pid=448744)[0m Epoch: 6700/20000 - w:tensor([ 3.6208e+00, -2.2507e-01, -4.0907e-01, -2.1421e-02, -2.7442e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.1994], requires_grad=True) , loss: 0.18199968338012695 [2m[36m(prob_check pid=448745)[0m Epoch: 6700/20000 - w:tensor([-3.5530, -1.8148, 1.5109, 1.9596, 1.0026, -0.1228, -4.1203, 0.5127, [2m[36m(prob_check pid=448745)[0m 5.2889, 3.8913, 0.0291, -0.2636, -0.4766, -0.6479], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([1.9780], requires_grad=True) , loss: 0.4165342450141907 [2m[36m(prob_check pid=448745)[0m Epoch: 6800/20000 - w:tensor([-3.5530, -1.8150, 1.5108, 1.9680, 1.0111, -0.1143, -4.1203, 0.5191, [2m[36m(prob_check pid=448745)[0m 5.3443, 3.8976, 0.0354, -0.3045, -0.5176, -0.6889], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.0042], requires_grad=True) , loss: 0.416532427072525 [2m[36m(prob_check pid=448745)[0m Epoch: 6900/20000 - w:tensor([-3.5530, -1.8151, 1.5107, 1.9766, 1.0197, -0.1058, -4.1202, 0.5256, [2m[36m(prob_check pid=448745)[0m 5.3997, 3.9040, 0.0418, -0.3460, -0.5590, -0.7303], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.0306], requires_grad=True) , loss: 0.4165306091308594 [2m[36m(prob_check pid=448745)[0m Epoch: 7000/20000 - w:tensor([-3.5530, -1.8153, 1.5106, 1.9852, 1.0283, -0.0971, -4.1201, 0.5322, [2m[36m(prob_check pid=448745)[0m 5.4552, 3.9105, 0.0483, -0.3878, -0.6008, -0.7722], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.0572], requires_grad=True) , loss: 0.4165289103984833 [2m[36m(prob_check pid=448745)[0m Epoch: 7100/20000 - w:tensor([-3.5530, -1.8154, 1.5105, 1.9939, 1.0371, -0.0883, -4.1200, 0.5389, [2m[36m(prob_check pid=448745)[0m 5.5108, 3.9171, 0.0549, -0.4300, -0.6430, -0.8144], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.0841], requires_grad=True) , loss: 0.41652727127075195 [2m[36m(prob_check pid=448744)[0m Epoch: 6800/20000 - w:tensor([ 3.6580e+00, -2.2892e-01, -4.1358e-01, -2.1644e-02, -2.7869e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.2673], requires_grad=True) , loss: 0.18149913847446442 [2m[36m(prob_check pid=448744)[0m Epoch: 6900/20000 - w:tensor([ 3.6914e+00, -2.2726e-01, -4.1966e-01, -2.1466e-02, -2.8289e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.3279], requires_grad=True) , loss: 0.18069012463092804 [2m[36m(prob_check pid=448744)[0m Epoch: 7000/20000 - w:tensor([ 3.7264e+00, -2.2803e-01, -4.2511e-01, -2.1470e-02, -2.8717e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.3913], requires_grad=True) , loss: 0.18004876375198364 [2m[36m(prob_check pid=448744)[0m Epoch: 7100/20000 - w:tensor([ 3.7614e+00, -2.3107e-01, -4.2921e-01, -2.1646e-02, -2.9136e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.4540], requires_grad=True) , loss: 0.17944566905498505 [2m[36m(prob_check pid=448744)[0m Epoch: 7200/20000 - w:tensor([ 3.7929e+00, -2.3009e-01, -4.3493e-01, -2.1524e-02, -2.9552e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.5103], requires_grad=True) , loss: 0.17884722352027893 [2m[36m(prob_check pid=448745)[0m Epoch: 7200/20000 - w:tensor([-3.5530, -1.8155, 1.5104, 2.0027, 1.0459, -0.0795, -4.1200, 0.5456, [2m[36m(prob_check pid=448745)[0m 5.5664, 3.9237, 0.0615, -0.4726, -0.6856, -0.8570], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.1113], requires_grad=True) , loss: 0.4165256917476654 [2m[36m(prob_check pid=448745)[0m Epoch: 7300/20000 - w:tensor([-3.5530, -1.8156, 1.5103, 2.0116, 1.0548, -0.0706, -4.1199, 0.5523, [2m[36m(prob_check pid=448745)[0m 5.6220, 3.9304, 0.0682, -0.5155, -0.7285, -0.9000], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.1386], requires_grad=True) , loss: 0.41652408242225647 [2m[36m(prob_check pid=448745)[0m Epoch: 7400/20000 - w:tensor([-3.5530, -1.8157, 1.5102, 2.0205, 1.0638, -0.0616, -4.1198, 0.5592, [2m[36m(prob_check pid=448745)[0m 5.6777, 3.9372, 0.0750, -0.5589, -0.7718, -0.9433], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.1662], requires_grad=True) , loss: 0.4165225625038147 [2m[36m(prob_check pid=448745)[0m Epoch: 7500/20000 - w:tensor([-3.5530, -1.8159, 1.5101, 2.0295, 1.0728, -0.0526, -4.1198, 0.5661, [2m[36m(prob_check pid=448745)[0m 5.7335, 3.9440, 0.0818, -0.6025, -0.8155, -0.9870], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.1940], requires_grad=True) , loss: 0.4165211319923401 [2m[36m(prob_check pid=448745)[0m Epoch: 7600/20000 - w:tensor([-3.5530, -1.8160, 1.5101, 2.0386, 1.0819, -0.0435, -4.1197, 0.5730, [2m[36m(prob_check pid=448745)[0m 5.7892, 3.9509, 0.0887, -0.6465, -0.8594, -1.0309], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.2220], requires_grad=True) , loss: 0.4165197014808655 [2m[36m(prob_check pid=448744)[0m Epoch: 7300/20000 - w:tensor([ 3.8261e+00, -2.3088e-01, -4.3998e-01, -2.1537e-02, -2.9975e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.5695], requires_grad=True) , loss: 0.17825527489185333 [2m[36m(prob_check pid=448744)[0m Epoch: 7400/20000 - w:tensor([ 3.8598e+00, -2.3424e-01, -4.4378e-01, -2.1625e-02, -3.0394e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.6292], requires_grad=True) , loss: 0.17776240408420563 [2m[36m(prob_check pid=448744)[0m Epoch: 7500/20000 - w:tensor([ 3.8901e+00, -2.3295e-01, -4.4927e-01, -2.1606e-02, -3.0808e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.6825], requires_grad=True) , loss: 0.17712967097759247 [2m[36m(prob_check pid=448744)[0m Epoch: 7600/20000 - w:tensor([ 3.9220e+00, -2.3372e-01, -4.5406e-01, -2.1621e-02, -3.1230e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.7385], requires_grad=True) , loss: 0.17657561600208282 [2m[36m(prob_check pid=448744)[0m Epoch: 7700/20000 - w:tensor([ 3.9536e+00, -2.3586e-01, -4.5777e-01, -2.1770e-02, -3.1642e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.7934], requires_grad=True) , loss: 0.17604608833789825 [2m[36m(prob_check pid=448745)[0m Epoch: 7700/20000 - w:tensor([-3.5530, -1.8161, 1.5100, 2.0477, 1.0911, -0.0343, -4.1197, 0.5800, [2m[36m(prob_check pid=448745)[0m 5.8451, 3.9578, 0.0956, -0.6907, -0.9037, -1.0752], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.2502], requires_grad=True) , loss: 0.41651833057403564 [2m[36m(prob_check pid=448745)[0m Epoch: 7800/20000 - w:tensor([-3.5530, -1.8162, 1.5099, 2.0569, 1.1003, -0.0251, -4.1196, 0.5871, [2m[36m(prob_check pid=448745)[0m 5.9010, 3.9648, 0.1026, -0.7353, -0.9483, -1.1198], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.2785], requires_grad=True) , loss: 0.4165170192718506 [2m[36m(prob_check pid=448745)[0m Epoch: 7900/20000 - w:tensor([-3.5530, -1.8163, 1.5098, 2.0662, 1.1096, -0.0158, -4.1196, 0.5942, [2m[36m(prob_check pid=448745)[0m 5.9569, 3.9718, 0.1096, -0.7802, -0.9931, -1.1647], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.3071], requires_grad=True) , loss: 0.4165157675743103 [2m[36m(prob_check pid=448745)[0m Epoch: 8000/20000 - w:tensor([-3.5530, -1.8164, 1.5097, 2.0755, 1.1189, -0.0065, -4.1195, 0.6013, [2m[36m(prob_check pid=448745)[0m 6.0129, 3.9789, 0.1167, -0.8253, -1.0382, -1.2098], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.3358], requires_grad=True) , loss: 0.4165145754814148 [2m[36m(prob_check pid=448744)[0m Epoch: 7800/20000 - w:tensor([ 3.9829e+00, -2.3566e-01, -4.6278e-01, -2.1698e-02, -3.2055e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.8441], requires_grad=True) , loss: 0.1755296140909195 [2m[36m(prob_check pid=448744)[0m Epoch: 7900/20000 - w:tensor([ 4.0137e+00, -2.3654e-01, -4.6732e-01, -2.1770e-02, -3.2476e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.8972], requires_grad=True) , loss: 0.17502838373184204 [2m[36m(prob_check pid=448744)[0m Epoch: 8000/20000 - w:tensor([ 4.0431e+00, -2.3841e-01, -4.7069e-01, -2.1835e-02, -3.2880e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.9476], requires_grad=True) , loss: 0.17452368140220642 [2m[36m(prob_check pid=448744)[0m Epoch: 8100/20000 - w:tensor([ 4.0708e+00, -2.3836e-01, -4.7530e-01, -2.1810e-02, -3.3287e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([7.9947], requires_grad=True) , loss: 0.17404524981975555 [2m[36m(prob_check pid=448744)[0m Epoch: 8200/20000 - w:tensor([ 4.0999e+00, -2.3919e-01, -4.7946e-01, -2.1844e-02, -3.3702e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.0440], requires_grad=True) , loss: 0.17356248199939728 [2m[36m(prob_check pid=448745)[0m Epoch: 8100/20000 - w:tensor([-3.5530e+00, -1.8165e+00, 1.5097e+00, 2.0849e+00, 1.1283e+00, [2m[36m(prob_check pid=448745)[0m 2.8885e-03, -4.1195e+00, 6.0848e-01, 6.0689e+00, 3.9861e+00, [2m[36m(prob_check pid=448745)[0m 1.2385e-01, -8.7068e-01, -1.0836e+00, -1.2552e+00], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.3646], requires_grad=True) , loss: 0.4165133833885193 [2m[36m(prob_check pid=448745)[0m Epoch: 8200/20000 - w:tensor([-3.5530, -1.8166, 1.5096, 2.0943, 1.1377, 0.0123, -4.1194, 0.6157, [2m[36m(prob_check pid=448745)[0m 6.1250, 3.9932, 0.1310, -0.9163, -1.1292, -1.3008], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.3937], requires_grad=True) , loss: 0.41651231050491333 [2m[36m(prob_check pid=448745)[0m Epoch: 8300/20000 - w:tensor([-3.5530, -1.8167, 1.5095, 2.1037, 1.1472, 0.0218, -4.1194, 0.6230, [2m[36m(prob_check pid=448745)[0m 6.1811, 4.0004, 0.1382, -0.9622, -1.1751, -1.3467], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.4228], requires_grad=True) , loss: 0.4165112376213074 [2m[36m(prob_check pid=448745)[0m Epoch: 8400/20000 - w:tensor([-3.5530, -1.8167, 1.5095, 2.1132, 1.1567, 0.0313, -4.1193, 0.6303, [2m[36m(prob_check pid=448745)[0m 6.2373, 4.0077, 0.1455, -1.0082, -1.2212, -1.3928], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.4521], requires_grad=True) , loss: 0.4165102243423462 [2m[36m(prob_check pid=448745)[0m Epoch: 8500/20000 - w:tensor([-3.5530, -1.8168, 1.5094, 2.1228, 1.1663, 0.0409, -4.1193, 0.6376, [2m[36m(prob_check pid=448745)[0m 6.2935, 4.0150, 0.1528, -1.0545, -1.2674, -1.4391], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.4815], requires_grad=True) , loss: 0.416509211063385 [2m[36m(prob_check pid=448744)[0m Epoch: 8300/20000 - w:tensor([ 4.1289e+00, -2.4142e-01, -4.8255e-01, -2.1968e-02, -3.4109e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.0929], requires_grad=True) , loss: 0.17311596870422363 [2m[36m(prob_check pid=448744)[0m Epoch: 8400/20000 - w:tensor([ 4.1554e+00, -2.4108e-01, -4.8710e-01, -2.1940e-02, -3.4514e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.1371], requires_grad=True) , loss: 0.17264801263809204 [2m[36m(prob_check pid=448744)[0m Epoch: 8500/20000 - w:tensor([ 4.1834e+00, -2.4184e-01, -4.9102e-01, -2.1949e-02, -3.4929e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.1837], requires_grad=True) , loss: 0.17220298945903778 [2m[36m(prob_check pid=448744)[0m Epoch: 8600/20000 - w:tensor([ 4.2105e+00, -2.4365e-01, -4.9411e-01, -2.2108e-02, -3.5329e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.2285], requires_grad=True) , loss: 0.17176544666290283 [2m[36m(prob_check pid=448744)[0m Epoch: 8700/20000 - w:tensor([ 4.2361e+00, -2.4367e-01, -4.9820e-01, -2.2079e-02, -3.5732e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.2705], requires_grad=True) , loss: 0.1713382601737976 [2m[36m(prob_check pid=448745)[0m Epoch: 8600/20000 - w:tensor([-3.5530, -1.8169, 1.5093, 2.1324, 1.1759, 0.0505, -4.1192, 0.6450, [2m[36m(prob_check pid=448745)[0m 6.3498, 4.0223, 0.1601, -1.1010, -1.3139, -1.4856], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.5111], requires_grad=True) , loss: 0.4165082573890686 [2m[36m(prob_check pid=448745)[0m Epoch: 8700/20000 - w:tensor([-3.5530, -1.8170, 1.5093, 2.1420, 1.1855, 0.0602, -4.1192, 0.6524, [2m[36m(prob_check pid=448745)[0m 6.4061, 4.0297, 0.1675, -1.1477, -1.3606, -1.5323], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.5408], requires_grad=True) , loss: 0.416507363319397 [2m[36m(prob_check pid=448745)[0m Epoch: 8800/20000 - w:tensor([-3.5530, -1.8170, 1.5092, 2.1517, 1.1952, 0.0699, -4.1192, 0.6598, [2m[36m(prob_check pid=448745)[0m 6.4625, 4.0371, 0.1749, -1.1946, -1.4075, -1.5792], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.5705], requires_grad=True) , loss: 0.41650649905204773 [2m[36m(prob_check pid=448745)[0m Epoch: 8900/20000 - w:tensor([-3.5530, -1.8171, 1.5092, 2.1614, 1.2049, 0.0796, -4.1191, 0.6673, [2m[36m(prob_check pid=448745)[0m 6.5189, 4.0445, 0.1823, -1.2416, -1.4545, -1.6262], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.6004], requires_grad=True) , loss: 0.4165056645870209 [2m[36m(prob_check pid=448745)[0m Epoch: 9000/20000 - w:tensor([-3.5530, -1.8172, 1.5091, 2.1711, 1.2147, 0.0893, -4.1191, 0.6748, [2m[36m(prob_check pid=448745)[0m 6.5753, 4.0520, 0.1898, -1.2888, -1.5017, -1.6734], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.6304], requires_grad=True) , loss: 0.416504830121994 [2m[36m(prob_check pid=448744)[0m Epoch: 8800/20000 - w:tensor([ 4.2629e+00, -2.4464e-01, -5.0207e-01, -2.2293e-02, -3.6145e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.3143], requires_grad=True) , loss: 0.17119534313678741 [2m[36m(prob_check pid=448744)[0m Epoch: 8900/20000 - w:tensor([ 4.2886e+00, -2.4622e-01, -5.0472e-01, -2.2242e-02, -3.6538e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.3559], requires_grad=True) , loss: 0.17050857841968536 [2m[36m(prob_check pid=448744)[0m Epoch: 9000/20000 - w:tensor([ 4.3130e+00, -2.4622e-01, -5.0852e-01, -2.2231e-02, -3.6937e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.3950], requires_grad=True) , loss: 0.17011195421218872 [2m[36m(prob_check pid=448744)[0m Epoch: 9100/20000 - w:tensor([ 4.3385e+00, -2.4703e-01, -5.1198e-01, -2.2298e-02, -3.7344e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.4359], requires_grad=True) , loss: 0.16971340775489807 [2m[36m(prob_check pid=448745)[0m Epoch: 9100/20000 - w:tensor([-3.5530, -1.8172, 1.5091, 2.1809, 1.2245, 0.0991, -4.1191, 0.6823, [2m[36m(prob_check pid=448745)[0m 6.6318, 4.0595, 0.1972, -1.3362, -1.5490, -1.7208], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.6605], requires_grad=True) , loss: 0.4165040850639343 [2m[36m(prob_check pid=448745)[0m Epoch: 9200/20000 - w:tensor([-3.5529, -1.8173, 1.5090, 2.1907, 1.2343, 0.1089, -4.1190, 0.6899, [2m[36m(prob_check pid=448745)[0m 6.6883, 4.0670, 0.2048, -1.3837, -1.5965, -1.7683], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.6907], requires_grad=True) , loss: 0.41650334000587463 [2m[36m(prob_check pid=448745)[0m Epoch: 9300/20000 - w:tensor([-3.5529, -1.8173, 1.5090, 2.2005, 1.2441, 0.1188, -4.1190, 0.6975, [2m[36m(prob_check pid=448745)[0m 6.7449, 4.0745, 0.2123, -1.4313, -1.6441, -1.8159], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.7209], requires_grad=True) , loss: 0.4165026843547821 [2m[36m(prob_check pid=448745)[0m Epoch: 9400/20000 - w:tensor([-3.5529, -1.8174, 1.5090, 2.2104, 1.2540, 0.1287, -4.1190, 0.7050, [2m[36m(prob_check pid=448745)[0m 6.8014, 4.0821, 0.2199, -1.4791, -1.6919, -1.8637], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.7513], requires_grad=True) , loss: 0.4165019690990448 [2m[36m(prob_check pid=448745)[0m Epoch: 9500/20000 - w:tensor([-3.5529, -1.8174, 1.5089, 2.2203, 1.2639, 0.1386, -4.1189, 0.7127, [2m[36m(prob_check pid=448745)[0m 6.8581, 4.0897, 0.2274, -1.5269, -1.7398, -1.9116], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.7817], requires_grad=True) , loss: 0.41650137305259705 [2m[36m(prob_check pid=448744)[0m Epoch: 9200/20000 - w:tensor([ 4.3635e+00, -2.4874e-01, -5.1461e-01, -2.2421e-02, -3.7740e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.4757], requires_grad=True) , loss: 0.1693311184644699 [2m[36m(prob_check pid=448744)[0m Epoch: 9300/20000 - w:tensor([ 4.3870e+00, -2.4875e-01, -5.1828e-01, -2.2396e-02, -3.8138e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.5127], requires_grad=True) , loss: 0.1689489185810089 [2m[36m(prob_check pid=448744)[0m Epoch: 9400/20000 - w:tensor([ 4.4133e+00, -2.4774e-01, -5.2006e-01, -2.3140e-02, -3.8541e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.5550], requires_grad=True) , loss: 0.17455999553203583 [2m[36m(prob_check pid=448744)[0m Epoch: 9500/20000 - w:tensor([ 4.4349e+00, -2.5087e-01, -5.2420e-01, -2.2541e-02, -3.8933e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.5875], requires_grad=True) , loss: 0.16821062564849854 [2m[36m(prob_check pid=448744)[0m Epoch: 9600/20000 - w:tensor([ 4.4579e+00, -2.5118e-01, -5.2744e-01, -2.2567e-02, -3.9330e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.6229], requires_grad=True) , loss: 0.16785168647766113 [2m[36m(prob_check pid=448745)[0m Epoch: 9600/20000 - w:tensor([-3.5529, -1.8175, 1.5089, 2.2302, 1.2738, 0.1485, -4.1189, 0.7203, [2m[36m(prob_check pid=448745)[0m 6.9147, 4.0973, 0.2350, -1.5750, -1.7878, -1.9596], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.8121], requires_grad=True) , loss: 0.4165007174015045 [2m[36m(prob_check pid=448745)[0m Epoch: 9700/20000 - w:tensor([-3.5529, -1.8175, 1.5088, 2.2401, 1.2838, 0.1584, -4.1189, 0.7279, [2m[36m(prob_check pid=448745)[0m 6.9714, 4.1049, 0.2427, -1.6231, -1.8359, -2.0077], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.8427], requires_grad=True) , loss: 0.41650012135505676 [2m[36m(prob_check pid=448745)[0m Epoch: 9800/20000 - w:tensor([-3.5529, -1.8176, 1.5088, 2.2501, 1.2937, 0.1684, -4.1189, 0.7356, [2m[36m(prob_check pid=448745)[0m 7.0281, 4.1125, 0.2503, -1.6713, -1.8841, -2.0559], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.8733], requires_grad=True) , loss: 0.4164995849132538 [2m[36m(prob_check pid=448745)[0m Epoch: 9900/20000 - w:tensor([-3.5529, -1.8176, 1.5088, 2.2601, 1.3037, 0.1784, -4.1188, 0.7433, [2m[36m(prob_check pid=448745)[0m 7.0849, 4.1202, 0.2580, -1.7196, -1.9324, -2.1043], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.9040], requires_grad=True) , loss: 0.4164990484714508 [2m[36m(prob_check pid=448745)[0m Epoch: 10000/20000 - w:tensor([-3.5529, -1.8177, 1.5087, 2.2701, 1.3137, 0.1884, -4.1188, 0.7510, [2m[36m(prob_check pid=448745)[0m 7.1417, 4.1279, 0.2656, -1.7680, -1.9809, -2.1527], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.9347], requires_grad=True) , loss: 0.416498601436615 [2m[36m(prob_check pid=448744)[0m Epoch: 9700/20000 - w:tensor([ 4.4835e+00, -2.5284e-01, -5.2842e-01, -2.2701e-02, -3.9727e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.6629], requires_grad=True) , loss: 0.16863131523132324 [2m[36m(prob_check pid=448744)[0m Epoch: 9800/20000 - w:tensor([ 4.5034e+00, -2.5310e-01, -5.3300e-01, -2.2709e-02, -4.0115e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.6925], requires_grad=True) , loss: 0.16715677082538605 [2m[36m(prob_check pid=448744)[0m Epoch: 9900/20000 - w:tensor([ 4.5255e+00, -2.5358e-01, -5.3597e-01, -2.2748e-02, -4.0510e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.7258], requires_grad=True) , loss: 0.16681669652462006 [2m[36m(prob_check pid=448744)[0m Epoch: 10000/20000 - w:tensor([ 4.5501e+00, -2.5564e-01, -5.3675e-01, -2.2948e-02, -4.0903e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.7636], requires_grad=True) , loss: 0.1664871722459793 [2m[36m(prob_check pid=448744)[0m Epoch: 10100/20000 - w:tensor([ 4.5689e+00, -2.5549e-01, -5.4112e-01, -2.2896e-02, -4.1286e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.7908], requires_grad=True) , loss: 0.16616058349609375 [2m[36m(prob_check pid=448745)[0m Epoch: 10100/20000 - w:tensor([-3.5529, -1.8177, 1.5087, 2.2800, 1.3237, 0.1984, -4.1187, 0.7587, [2m[36m(prob_check pid=448745)[0m 7.1985, 4.1355, 0.2733, -1.8140, -2.0268, -2.1987], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.9631], requires_grad=True) , loss: 0.4164980947971344 [2m[36m(prob_check pid=448745)[0m Epoch: 10200/20000 - w:tensor([-3.5529, -1.8177, 1.5087, 2.2900, 1.3337, 0.2083, -4.1188, 0.7664, [2m[36m(prob_check pid=448745)[0m 7.2553, 4.1432, 0.2810, -1.8583, -2.0712, -2.2430], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([2.9897], requires_grad=True) , loss: 0.41649767756462097 [2m[36m(prob_check pid=448745)[0m Epoch: 10300/20000 - w:tensor([-3.5530, -1.8178, 1.5086, 2.2998, 1.3435, 0.2182, -4.1188, 0.7741, [2m[36m(prob_check pid=448745)[0m 7.3122, 4.1509, 0.2887, -1.9011, -2.1139, -2.2857], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.0147], requires_grad=True) , loss: 0.41649729013442993 [2m[36m(prob_check pid=448745)[0m Epoch: 10400/20000 - w:tensor([-3.5529, -1.8178, 1.5086, 2.3097, 1.3534, 0.2281, -4.1187, 0.7819, [2m[36m(prob_check pid=448745)[0m 7.3691, 4.1587, 0.2964, -1.9420, -2.1548, -2.3267], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.0382], requires_grad=True) , loss: 0.4164969325065613 [2m[36m(prob_check pid=448745)[0m Epoch: 10500/20000 - w:tensor([-3.5529, -1.8178, 1.5086, 2.3195, 1.3632, 0.2378, -4.1187, 0.7896, [2m[36m(prob_check pid=448745)[0m 7.4260, 4.1663, 0.3041, -1.9812, -2.1940, -2.3659], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.0599], requires_grad=True) , loss: 0.4164965748786926 [2m[36m(prob_check pid=448744)[0m Epoch: 10200/20000 - w:tensor([ 4.5899e+00, -2.5593e-01, -5.4390e-01, -2.2938e-02, -4.1676e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.8217], requires_grad=True) , loss: 0.16584008932113647 [2m[36m(prob_check pid=448744)[0m Epoch: 10300/20000 - w:tensor([ 4.6143e+00, -2.5378e-01, -5.4396e-01, -2.3043e-02, -4.2068e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.8589], requires_grad=True) , loss: 0.16559229791164398 [2m[36m(prob_check pid=448744)[0m Epoch: 10400/20000 - w:tensor([ 4.6319e+00, -2.5806e-01, -5.4862e-01, -2.3109e-02, -4.2448e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.8831], requires_grad=True) , loss: 0.1652156114578247 [2m[36m(prob_check pid=448744)[0m Epoch: 10500/20000 - w:tensor([ 4.6516e+00, -2.5824e-01, -5.5131e-01, -2.3137e-02, -4.2832e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.9115], requires_grad=True) , loss: 0.16491325199604034 [2m[36m(prob_check pid=448744)[0m Epoch: 10600/20000 - w:tensor([ 4.6720e+00, -2.5926e-01, -5.5409e-01, -2.3517e-02, -4.3226e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.9409], requires_grad=True) , loss: 0.1655464917421341 [2m[36m(prob_check pid=448745)[0m Epoch: 10600/20000 - w:tensor([-3.5529, -1.8179, 1.5086, 2.3292, 1.3729, 0.2476, -4.1187, 0.7972, [2m[36m(prob_check pid=448745)[0m 7.4830, 4.1740, 0.3118, -2.0192, -2.2320, -2.4039], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.0805], requires_grad=True) , loss: 0.4164961874485016 [2m[36m(prob_check pid=448745)[0m Epoch: 10700/20000 - w:tensor([-3.5529, -1.8179, 1.5086, 2.3388, 1.3825, 0.2571, -4.1187, 0.8048, [2m[36m(prob_check pid=448745)[0m 7.5399, 4.1816, 0.3193, -2.0553, -2.2681, -2.4400], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.0996], requires_grad=True) , loss: 0.41649600863456726 [2m[36m(prob_check pid=448745)[0m Epoch: 10800/20000 - w:tensor([-3.5529, -1.8179, 1.5086, 2.3483, 1.3920, 0.2667, -4.1187, 0.8125, [2m[36m(prob_check pid=448745)[0m 7.5969, 4.1892, 0.3269, -2.0907, -2.3035, -2.4754], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1179], requires_grad=True) , loss: 0.4164956510066986 [2m[36m(prob_check pid=448745)[0m Epoch: 10900/20000 - w:tensor([-3.5529, -1.8179, 1.5085, 2.3575, 1.4012, 0.2759, -4.1187, 0.8199, [2m[36m(prob_check pid=448745)[0m 7.6539, 4.1966, 0.3343, -2.1240, -2.3368, -2.5087], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1344], requires_grad=True) , loss: 0.4164954423904419 [2m[36m(prob_check pid=448744)[0m Epoch: 10700/20000 - w:tensor([ 4.6923e+00, -2.6044e-01, -5.5567e-01, -2.3317e-02, -4.3602e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.9696], requires_grad=True) , loss: 0.1643153727054596 [2m[36m(prob_check pid=448744)[0m Epoch: 10800/20000 - w:tensor([ 4.7113e+00, -2.6051e-01, -5.5831e-01, -2.3343e-02, -4.3985e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([8.9962], requires_grad=True) , loss: 0.1640259325504303 [2m[36m(prob_check pid=448744)[0m Epoch: 10900/20000 - w:tensor([ 4.7329e+00, -2.5944e-01, -5.5909e-01, -2.4259e-02, -4.4373e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.0281], requires_grad=True) , loss: 0.16401027143001556 [2m[36m(prob_check pid=448744)[0m Epoch: 11000/20000 - w:tensor([ 4.7500e+00, -2.6245e-01, -5.6250e-01, -2.3512e-02, -4.4748e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.0501], requires_grad=True) , loss: 0.16345849633216858 [2m[36m(prob_check pid=448744)[0m Epoch: 11100/20000 - w:tensor([ 4.7684e+00, -2.6270e-01, -5.6487e-01, -2.3552e-02, -4.5129e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.0753], requires_grad=True) , loss: 0.16318194568157196 [2m[36m(prob_check pid=448745)[0m Epoch: 11000/20000 - w:tensor([-3.5529, -1.8180, 1.5085, 2.3668, 1.4105, 0.2852, -4.1187, 0.8273, [2m[36m(prob_check pid=448745)[0m 7.7108, 4.2040, 0.3418, -2.1569, -2.3698, -2.5416], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1506], requires_grad=True) , loss: 0.4164951741695404 [2m[36m(prob_check pid=448745)[0m Epoch: 11100/20000 - w:tensor([-3.5529, -1.8180, 1.5085, 2.3757, 1.4194, 0.2941, -4.1187, 0.8346, [2m[36m(prob_check pid=448745)[0m 7.7678, 4.2112, 0.3490, -2.1875, -2.4003, -2.5722], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1650], requires_grad=True) , loss: 0.4164949953556061 [2m[36m(prob_check pid=448745)[0m Epoch: 11200/20000 - w:tensor([-3.5529, -1.8180, 1.5085, 2.3846, 1.4283, 0.3030, -4.1187, 0.8418, [2m[36m(prob_check pid=448745)[0m 7.8248, 4.2185, 0.3562, -2.2176, -2.4304, -2.6023], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1791], requires_grad=True) , loss: 0.41649481654167175 [2m[36m(prob_check pid=448745)[0m Epoch: 11300/20000 - w:tensor([-3.5530, -1.8180, 1.5084, 2.3933, 1.4370, 0.3117, -4.1187, 0.8489, [2m[36m(prob_check pid=448745)[0m 7.8817, 4.2256, 0.3634, -2.2469, -2.4597, -2.6316], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.1924], requires_grad=True) , loss: 0.41649460792541504 [2m[36m(prob_check pid=448745)[0m Epoch: 11400/20000 - w:tensor([-3.5529, -1.8180, 1.5085, 2.4017, 1.4454, 0.3201, -4.1186, 0.8558, [2m[36m(prob_check pid=448745)[0m 7.9386, 4.2325, 0.3703, -2.2741, -2.4869, -2.6588], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2044], requires_grad=True) , loss: 0.4164943993091583 [2m[36m(prob_check pid=448744)[0m Epoch: 11200/20000 - w:tensor([ 4.7898e+00, -2.6305e-01, -5.6485e-01, -2.3669e-02, -4.5511e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.1056], requires_grad=True) , loss: 0.16440483927726746 [2m[36m(prob_check pid=448744)[0m Epoch: 11300/20000 - w:tensor([ 4.8055e+00, -2.6446e-01, -5.6883e-01, -2.3718e-02, -4.5885e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.1256], requires_grad=True) , loss: 0.16264116764068604 [2m[36m(prob_check pid=448744)[0m Epoch: 11400/20000 - w:tensor([ 4.8234e+00, -2.6485e-01, -5.7101e-01, -2.3768e-02, -4.6265e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.1495], requires_grad=True) , loss: 0.16237539052963257 [2m[36m(prob_check pid=448744)[0m Epoch: 11500/20000 - w:tensor([ 4.8425e+00, -2.6817e-01, -5.7210e-01, -2.4005e-02, -4.6642e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.1753], requires_grad=True) , loss: 0.16229018568992615 [2m[36m(prob_check pid=448744)[0m Epoch: 11600/20000 - w:tensor([ 4.8587e+00, -2.6643e-01, -5.7475e-01, -2.3927e-02, -4.7013e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.1961], requires_grad=True) , loss: 0.16186082363128662 [2m[36m(prob_check pid=448745)[0m Epoch: 11500/20000 - w:tensor([-3.5529, -1.8179, 1.5086, 2.4103, 1.4541, 0.3287, -4.1185, 0.8630, [2m[36m(prob_check pid=448745)[0m 7.9955, 4.2396, 0.3774, -2.3018, -2.5146, -2.6865], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2168], requires_grad=True) , loss: 0.4164942800998688 [2m[36m(prob_check pid=448745)[0m Epoch: 11600/20000 - w:tensor([-3.5529, -1.8180, 1.5084, 2.4183, 1.4621, 0.3368, -4.1186, 0.8697, [2m[36m(prob_check pid=448745)[0m 8.0523, 4.2463, 0.3841, -2.3278, -2.5406, -2.7125], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2277], requires_grad=True) , loss: 0.41649407148361206 [2m[36m(prob_check pid=448745)[0m Epoch: 11700/20000 - w:tensor([-3.5530, -1.8181, 1.5084, 2.4265, 1.4702, 0.3449, -4.1187, 0.8765, [2m[36m(prob_check pid=448745)[0m 8.1092, 4.2531, 0.3909, -2.3540, -2.5668, -2.7387], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2387], requires_grad=True) , loss: 0.4164939522743225 [2m[36m(prob_check pid=448745)[0m Epoch: 11800/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4342, 1.4780, 0.3527, -4.1186, 0.8830, [2m[36m(prob_check pid=448745)[0m 8.1660, 4.2596, 0.3974, -2.3780, -2.5908, -2.7627], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2487], requires_grad=True) , loss: 0.41649380326271057 [2m[36m(prob_check pid=448745)[0m Epoch: 11900/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4420, 1.4857, 0.3604, -4.1186, 0.8895, [2m[36m(prob_check pid=448745)[0m 8.2227, 4.2662, 0.4039, -2.4021, -2.6149, -2.7868], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2585], requires_grad=True) , loss: 0.41649365425109863 [2m[36m(prob_check pid=448744)[0m Epoch: 11700/20000 - w:tensor([ 4.8762e+00, -2.6696e-01, -5.7674e-01, -2.3989e-02, -4.7392e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.2187], requires_grad=True) , loss: 0.16160514950752258 [2m[36m(prob_check pid=448744)[0m Epoch: 11800/20000 - w:tensor([ 4.8962e+00, -2.7100e-01, -5.7616e-01, -2.4137e-02, -4.7763e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.2460], requires_grad=True) , loss: 0.16186854243278503 [2m[36m(prob_check pid=448744)[0m Epoch: 11900/20000 - w:tensor([ 4.9093e+00, -2.6880e-01, -5.7991e-01, -2.4168e-02, -4.8113e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.2614], requires_grad=True) , loss: 0.16112639009952545 [2m[36m(prob_check pid=448744)[0m Epoch: 12000/20000 - w:tensor([ 4.9246e+00, -2.6897e-01, -5.8180e-01, -2.4209e-02, -4.8470e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.2806], requires_grad=True) , loss: 0.16089588403701782 [2m[36m(prob_check pid=448744)[0m Epoch: 12100/20000 - w:tensor([ 4.9406e+00, -2.6959e-01, -5.8348e-01, -2.4281e-02, -4.8836e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3007], requires_grad=True) , loss: 0.16066038608551025 [2m[36m(prob_check pid=448745)[0m Epoch: 12000/20000 - w:tensor([-3.5530, -1.8182, 1.5083, 2.4495, 1.4932, 0.3679, -4.1187, 0.8959, [2m[36m(prob_check pid=448745)[0m 8.2794, 4.2725, 0.4103, -2.4255, -2.6383, -2.8102], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2678], requires_grad=True) , loss: 0.4164935052394867 [2m[36m(prob_check pid=448745)[0m Epoch: 12100/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4568, 1.5005, 0.3752, -4.1186, 0.9022, [2m[36m(prob_check pid=448745)[0m 8.3361, 4.2788, 0.4165, -2.4476, -2.6604, -2.8323], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2766], requires_grad=True) , loss: 0.41649341583251953 [2m[36m(prob_check pid=448745)[0m Epoch: 12200/20000 - w:tensor([-3.5535, -1.8191, 1.5076, 2.4634, 1.5070, 0.3818, -4.1196, 0.9077, [2m[36m(prob_check pid=448745)[0m 8.3927, 4.2844, 0.4223, -2.4713, -2.6841, -2.8560], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2845], requires_grad=True) , loss: 0.4164941906929016 [2m[36m(prob_check pid=448745)[0m Epoch: 12300/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4710, 1.5147, 0.3894, -4.1186, 0.9144, [2m[36m(prob_check pid=448745)[0m 8.4492, 4.2910, 0.4288, -2.4909, -2.7037, -2.8756], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.2934], requires_grad=True) , loss: 0.41649314761161804 [2m[36m(prob_check pid=448745)[0m Epoch: 12400/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4779, 1.5217, 0.3964, -4.1186, 0.9205, [2m[36m(prob_check pid=448745)[0m 8.5057, 4.2971, 0.4348, -2.5118, -2.7246, -2.8965], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3014], requires_grad=True) , loss: 0.4164930284023285 [2m[36m(prob_check pid=448744)[0m Epoch: 12200/20000 - w:tensor([ 4.9584e+00, -2.6905e-01, -5.8403e-01, -2.3472e-02, -4.9212e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3228], requires_grad=True) , loss: 0.17109277844429016 [2m[36m(prob_check pid=448744)[0m Epoch: 12300/20000 - w:tensor([ 4.9734e+00, -2.7152e-01, -5.8647e-01, -2.4469e-02, -4.9575e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3414], requires_grad=True) , loss: 0.16019196808338165 [2m[36m(prob_check pid=448744)[0m Epoch: 12400/20000 - w:tensor([ 4.9889e+00, -2.7165e-01, -5.8834e-01, -2.4516e-02, -4.9944e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3601], requires_grad=True) , loss: 0.15996228158473969 [2m[36m(prob_check pid=448744)[0m Epoch: 12500/20000 - w:tensor([ 5.0057e+00, -2.7466e-01, -5.8904e-01, -2.4717e-02, -5.0312e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3810], requires_grad=True) , loss: 0.1598602533340454 [2m[36m(prob_check pid=448744)[0m Epoch: 12600/20000 - w:tensor([ 5.0199e+00, -2.7311e-01, -5.9132e-01, -2.4681e-02, -5.0674e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.3974], requires_grad=True) , loss: 0.15951503813266754 [2m[36m(prob_check pid=448745)[0m Epoch: 12500/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4848, 1.5285, 0.4032, -4.1185, 0.9264, [2m[36m(prob_check pid=448745)[0m 8.5621, 4.3030, 0.4408, -2.5322, -2.7450, -2.9169], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3092], requires_grad=True) , loss: 0.4164929687976837 [2m[36m(prob_check pid=448745)[0m Epoch: 12600/20000 - w:tensor([-3.5529, -1.8181, 1.5084, 2.4913, 1.5351, 0.4098, -4.1186, 0.9322, [2m[36m(prob_check pid=448745)[0m 8.6184, 4.3088, 0.4466, -2.5522, -2.7650, -2.9369], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3166], requires_grad=True) , loss: 0.41649287939071655 [2m[36m(prob_check pid=448745)[0m Epoch: 12700/20000 - w:tensor([-3.5531, -1.8183, 1.5082, 2.4977, 1.5415, 0.4162, -4.1188, 0.9378, [2m[36m(prob_check pid=448745)[0m 8.6747, 4.3144, 0.4522, -2.5720, -2.7848, -2.9567], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3237], requires_grad=True) , loss: 0.41649284958839417 [2m[36m(prob_check pid=448745)[0m Epoch: 12800/20000 - w:tensor([-3.5529, -1.8182, 1.5084, 2.5039, 1.5476, 0.4223, -4.1186, 0.9433, [2m[36m(prob_check pid=448745)[0m 8.7309, 4.3199, 0.4577, -2.5897, -2.8024, -2.9744], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3304], requires_grad=True) , loss: 0.4164927005767822 [2m[36m(prob_check pid=448745)[0m Epoch: 12900/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5101, 1.5538, 0.4285, -4.1186, 0.9489, [2m[36m(prob_check pid=448745)[0m 8.7870, 4.3254, 0.4632, -2.6081, -2.8209, -2.9928], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3371], requires_grad=True) , loss: 0.41649261116981506 [2m[36m(prob_check pid=448744)[0m Epoch: 12700/20000 - w:tensor([ 5.0353e+00, -2.7361e-01, -5.9293e-01, -2.4758e-02, -5.1045e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4154], requires_grad=True) , loss: 0.15929289162158966 [2m[36m(prob_check pid=448744)[0m Epoch: 12800/20000 - w:tensor([ 5.0508e+00, -2.7514e-01, -5.9377e-01, -2.4913e-02, -5.1402e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4341], requires_grad=True) , loss: 0.15908239781856537 [2m[36m(prob_check pid=448744)[0m Epoch: 12900/20000 - w:tensor([ 5.0648e+00, -2.7494e-01, -5.9569e-01, -2.4911e-02, -5.1762e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4497], requires_grad=True) , loss: 0.15886713564395905 [2m[36m(prob_check pid=448744)[0m Epoch: 13000/20000 - w:tensor([ 5.0804e+00, -2.7469e-01, -5.9635e-01, -2.4313e-02, -5.2130e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4676], requires_grad=True) , loss: 0.1636737734079361 [2m[36m(prob_check pid=448744)[0m Epoch: 13100/20000 - w:tensor([ 5.0942e+00, -2.7672e-01, -5.9813e-01, -2.5098e-02, -5.2484e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4834], requires_grad=True) , loss: 0.15844839811325073 [2m[36m(prob_check pid=448745)[0m Epoch: 13000/20000 - w:tensor([-3.5528, -1.8180, 1.5085, 2.5164, 1.5601, 0.4348, -4.1184, 0.9545, [2m[36m(prob_check pid=448745)[0m 8.8431, 4.3311, 0.4689, -2.6264, -2.8391, -3.0111], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3439], requires_grad=True) , loss: 0.4164925813674927 [2m[36m(prob_check pid=448745)[0m Epoch: 13100/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5222, 1.5659, 0.4406, -4.1186, 0.9597, [2m[36m(prob_check pid=448745)[0m 8.8991, 4.3363, 0.4741, -2.6441, -2.8569, -3.0288], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3502], requires_grad=True) , loss: 0.4164924621582031 [2m[36m(prob_check pid=448745)[0m Epoch: 13200/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5281, 1.5719, 0.4466, -4.1186, 0.9651, [2m[36m(prob_check pid=448745)[0m 8.9550, 4.3417, 0.4794, -2.6618, -2.8746, -3.0465], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3565], requires_grad=True) , loss: 0.41649243235588074 [2m[36m(prob_check pid=448745)[0m Epoch: 13300/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5338, 1.5776, 0.4523, -4.1186, 0.9703, [2m[36m(prob_check pid=448745)[0m 9.0108, 4.3469, 0.4846, -2.6788, -2.8916, -3.0635], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3625], requires_grad=True) , loss: 0.4164922833442688 [2m[36m(prob_check pid=448745)[0m Epoch: 13400/20000 - w:tensor([-3.5531, -1.8183, 1.5082, 2.5394, 1.5832, 0.4579, -4.1187, 0.9754, [2m[36m(prob_check pid=448745)[0m 9.0665, 4.3520, 0.4897, -2.6959, -2.9087, -3.0806], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3684], requires_grad=True) , loss: 0.4164922833442688 [2m[36m(prob_check pid=448744)[0m Epoch: 13200/20000 - w:tensor([ 5.1081e+00, -2.7680e-01, -5.9973e-01, -2.5148e-02, -5.2844e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.4987], requires_grad=True) , loss: 0.15824243426322937 [2m[36m(prob_check pid=448744)[0m Epoch: 13300/20000 - w:tensor([ 5.1245e+00, -2.7839e-01, -5.9915e-01, -2.5539e-02, -5.3204e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5187], requires_grad=True) , loss: 0.1584959179162979 [2m[36m(prob_check pid=448744)[0m Epoch: 13400/20000 - w:tensor([ 5.1360e+00, -2.7836e-01, -6.0205e-01, -2.5330e-02, -5.3552e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5295], requires_grad=True) , loss: 0.15784397721290588 [2m[36m(prob_check pid=448744)[0m Epoch: 13500/20000 - w:tensor([ 5.1492e+00, -2.7862e-01, -6.0342e-01, -2.5387e-02, -5.3905e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5437], requires_grad=True) , loss: 0.15764853358268738 [2m[36m(prob_check pid=448744)[0m Epoch: 13600/20000 - w:tensor([ 5.1646e+00, -2.7742e-01, -6.0325e-01, -2.6124e-02, -5.4265e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5620], requires_grad=True) , loss: 0.16271215677261353 [2m[36m(prob_check pid=448745)[0m Epoch: 13500/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5450, 1.5888, 0.4635, -4.1186, 0.9806, [2m[36m(prob_check pid=448745)[0m 9.1222, 4.3571, 0.4949, -2.7120, -2.9248, -3.0967], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3743], requires_grad=True) , loss: 0.41649216413497925 [2m[36m(prob_check pid=448745)[0m Epoch: 13600/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5506, 1.5943, 0.4690, -4.1185, 0.9856, [2m[36m(prob_check pid=448745)[0m 9.1778, 4.3622, 0.5000, -2.7283, -2.9411, -3.1131], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3801], requires_grad=True) , loss: 0.4164920747280121 [2m[36m(prob_check pid=448745)[0m Epoch: 13700/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5559, 1.5997, 0.4743, -4.1185, 0.9905, [2m[36m(prob_check pid=448745)[0m 9.2333, 4.3671, 0.5049, -2.7441, -2.9569, -3.1288], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3856], requires_grad=True) , loss: 0.4164920449256897 [2m[36m(prob_check pid=448745)[0m Epoch: 13800/20000 - w:tensor([-3.5530, -1.8183, 1.5082, 2.5611, 1.6049, 0.4796, -4.1186, 0.9954, [2m[36m(prob_check pid=448745)[0m 9.2887, 4.3719, 0.5097, -2.7600, -2.9728, -3.1447], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3910], requires_grad=True) , loss: 0.4164920747280121 [2m[36m(prob_check pid=448744)[0m Epoch: 13700/20000 - w:tensor([ 5.1764e+00, -2.8030e-01, -6.0553e-01, -2.5580e-02, -5.4613e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5728], requires_grad=True) , loss: 0.15726174414157867 [2m[36m(prob_check pid=448744)[0m Epoch: 13800/20000 - w:tensor([ 5.1892e+00, -2.8044e-01, -6.0687e-01, -2.5631e-02, -5.4966e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.5860], requires_grad=True) , loss: 0.1570720225572586 [2m[36m(prob_check pid=448744)[0m Epoch: 13900/20000 - w:tensor([ 5.2035e+00, -2.8312e-01, -6.0709e-01, -2.5950e-02, -5.5321e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6017], requires_grad=True) , loss: 0.1572423279285431 [2m[36m(prob_check pid=448744)[0m Epoch: 14000/20000 - w:tensor([ 5.2152e+00, -2.8180e-01, -6.0894e-01, -2.5807e-02, -5.5668e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6128], requires_grad=True) , loss: 0.1566992700099945 [2m[36m(prob_check pid=448744)[0m Epoch: 14100/20000 - w:tensor([ 5.2280e+00, -2.8222e-01, -6.1011e-01, -2.5888e-02, -5.6025e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6255], requires_grad=True) , loss: 0.1565142571926117 [2m[36m(prob_check pid=448745)[0m Epoch: 13900/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5662, 1.6100, 0.4847, -4.1185, 1.0001, [2m[36m(prob_check pid=448745)[0m 9.3441, 4.3767, 0.5144, -2.7746, -2.9874, -3.1593], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.3963], requires_grad=True) , loss: 0.41649195551872253 [2m[36m(prob_check pid=448745)[0m Epoch: 14000/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5714, 1.6152, 0.4898, -4.1185, 1.0049, [2m[36m(prob_check pid=448745)[0m 9.3993, 4.3815, 0.5192, -2.7899, -3.0027, -3.1746], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4016], requires_grad=True) , loss: 0.41649189591407776 [2m[36m(prob_check pid=448745)[0m Epoch: 14100/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5763, 1.6201, 0.4948, -4.1185, 1.0095, [2m[36m(prob_check pid=448745)[0m 9.4545, 4.3861, 0.5239, -2.8046, -3.0174, -3.1893], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4066], requires_grad=True) , loss: 0.4164918065071106 [2m[36m(prob_check pid=448745)[0m Epoch: 14200/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5813, 1.6251, 0.4998, -4.1185, 1.0142, [2m[36m(prob_check pid=448745)[0m 9.5096, 4.3908, 0.5286, -2.8194, -3.0322, -3.2041], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4118], requires_grad=True) , loss: 0.41649171710014343 [2m[36m(prob_check pid=448745)[0m Epoch: 14300/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.5860, 1.6298, 0.5045, -4.1185, 1.0187, [2m[36m(prob_check pid=448745)[0m 9.5646, 4.3952, 0.5330, -2.8334, -3.0462, -3.2181], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4166], requires_grad=True) , loss: 0.41649168729782104 [2m[36m(prob_check pid=448744)[0m Epoch: 14200/20000 - w:tensor([ 5.2411e+00, -2.8367e-01, -6.1049e-01, -2.6020e-02, -5.6366e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6392], requires_grad=True) , loss: 0.1563407927751541 [2m[36m(prob_check pid=448744)[0m Epoch: 14300/20000 - w:tensor([ 5.2525e+00, -2.8343e-01, -6.1202e-01, -2.6045e-02, -5.6710e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6498], requires_grad=True) , loss: 0.15616001188755035 [2m[36m(prob_check pid=448744)[0m Epoch: 14400/20000 - w:tensor([ 5.2648e+00, -2.8399e-01, -6.1309e-01, -2.6184e-02, -5.7062e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6617], requires_grad=True) , loss: 0.15601590275764465 [2m[36m(prob_check pid=448744)[0m Epoch: 14500/20000 - w:tensor([ 5.2773e+00, -2.8516e-01, -6.1357e-01, -2.6261e-02, -5.7403e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6741], requires_grad=True) , loss: 0.15580980479717255 [2m[36m(prob_check pid=448744)[0m Epoch: 14600/20000 - w:tensor([ 5.2888e+00, -2.8511e-01, -6.1483e-01, -2.6292e-02, -5.7749e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6846], requires_grad=True) , loss: 0.15563613176345825 [2m[36m(prob_check pid=448745)[0m Epoch: 14400/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.5907, 1.6345, 0.5092, -4.1185, 1.0231, [2m[36m(prob_check pid=448745)[0m 9.6195, 4.3996, 0.5374, -2.8472, -3.0600, -3.2319], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4213], requires_grad=True) , loss: 0.41649162769317627 [2m[36m(prob_check pid=448745)[0m Epoch: 14500/20000 - w:tensor([-3.5528, -1.8181, 1.5085, 2.5957, 1.6395, 0.5142, -4.1183, 1.0278, [2m[36m(prob_check pid=448745)[0m 9.6743, 4.4043, 0.5421, -2.8613, -3.0741, -3.2460], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4264], requires_grad=True) , loss: 0.41649165749549866 [2m[36m(prob_check pid=448745)[0m Epoch: 14600/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.6000, 1.6438, 0.5185, -4.1185, 1.0319, [2m[36m(prob_check pid=448745)[0m 9.7290, 4.4084, 0.5462, -2.8746, -3.0874, -3.2594], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4308], requires_grad=True) , loss: 0.4164915382862091 [2m[36m(prob_check pid=448745)[0m Epoch: 14700/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6045, 1.6483, 0.5230, -4.1185, 1.0362, [2m[36m(prob_check pid=448745)[0m 9.7836, 4.4127, 0.5505, -2.8881, -3.1009, -3.2728], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4354], requires_grad=True) , loss: 0.4164915084838867 [2m[36m(prob_check pid=448745)[0m Epoch: 14800/20000 - w:tensor([-3.5529, -1.8182, 1.5084, 2.6092, 1.6530, 0.5276, -4.1184, 1.0406, [2m[36m(prob_check pid=448745)[0m 9.8382, 4.4171, 0.5549, -2.9015, -3.1143, -3.2862], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4401], requires_grad=True) , loss: 0.4164915084838867 [2m[36m(prob_check pid=448744)[0m Epoch: 14700/20000 - w:tensor([ 5.3021e+00, -2.8740e-01, -6.1451e-01, -2.6735e-02, -5.8094e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.6986], requires_grad=True) , loss: 0.15552859008312225 [2m[36m(prob_check pid=448744)[0m Epoch: 14800/20000 - w:tensor([ 5.3122e+00, -2.8645e-01, -6.1645e-01, -2.6473e-02, -5.8431e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7065], requires_grad=True) , loss: 0.15529972314834595 [2m[36m(prob_check pid=448744)[0m Epoch: 14900/20000 - w:tensor([ 5.3235e+00, -2.8676e-01, -6.1742e-01, -2.6539e-02, -5.8774e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7166], requires_grad=True) , loss: 0.15513291954994202 [2m[36m(prob_check pid=448744)[0m Epoch: 15000/20000 - w:tensor([ 5.3358e+00, -2.8922e-01, -6.1758e-01, -2.6759e-02, -5.9115e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7283], requires_grad=True) , loss: 0.15501290559768677 [2m[36m(prob_check pid=448744)[0m Epoch: 15100/20000 - w:tensor([ 5.3461e+00, -2.8797e-01, -6.1896e-01, -2.6713e-02, -5.9452e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7368], requires_grad=True) , loss: 0.15480725467205048 [2m[36m(prob_check pid=448745)[0m Epoch: 14900/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6135, 1.6573, 0.5320, -4.1185, 1.0447, [2m[36m(prob_check pid=448745)[0m 9.8926, 4.4212, 0.5590, -2.9147, -3.1275, -3.2994], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4444], requires_grad=True) , loss: 0.41649141907691956 [2m[36m(prob_check pid=448745)[0m Epoch: 15000/20000 - w:tensor([-3.5530, -1.8184, 1.5082, 2.6178, 1.6616, 0.5363, -4.1186, 1.0488, [2m[36m(prob_check pid=448745)[0m 9.9470, 4.4253, 0.5631, -2.9279, -3.1406, -3.3126], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4488], requires_grad=True) , loss: 0.41649138927459717 [2m[36m(prob_check pid=448745)[0m Epoch: 15100/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6222, 1.6660, 0.5406, -4.1185, 1.0530, [2m[36m(prob_check pid=448745)[0m 10.0013, 4.4295, 0.5673, -2.9404, -3.1532, -3.3251], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4532], requires_grad=True) , loss: 0.41649138927459717 [2m[36m(prob_check pid=448745)[0m Epoch: 15200/20000 - w:tensor([-3.5528, -1.8181, 1.5085, 2.6267, 1.6705, 0.5452, -4.1183, 1.0573, [2m[36m(prob_check pid=448745)[0m 10.0555, 4.4338, 0.5716, -2.9531, -3.1659, -3.3378], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4578], requires_grad=True) , loss: 0.41649144887924194 [2m[36m(prob_check pid=448745)[0m Epoch: 15300/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6306, 1.6744, 0.5491, -4.1185, 1.0611, [2m[36m(prob_check pid=448745)[0m 10.1095, 4.4376, 0.5754, -2.9655, -3.1783, -3.3502], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4617], requires_grad=True) , loss: 0.4164912700653076 [2m[36m(prob_check pid=448744)[0m Epoch: 15200/20000 - w:tensor([ 5.3570e+00, -2.8868e-01, -6.2008e-01, -2.7086e-02, -5.9798e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7462], requires_grad=True) , loss: 0.15538261830806732 [2m[36m(prob_check pid=448744)[0m Epoch: 15300/20000 - w:tensor([ 5.3685e+00, -2.8965e-01, -6.2013e-01, -2.6920e-02, -6.0126e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7567], requires_grad=True) , loss: 0.15448927879333496 [2m[36m(prob_check pid=448744)[0m Epoch: 15400/20000 - w:tensor([ 5.3787e+00, -2.8951e-01, -6.2121e-01, -2.6958e-02, -6.0459e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7648], requires_grad=True) , loss: 0.15433402359485626 [2m[36m(prob_check pid=448744)[0m Epoch: 15500/20000 - w:tensor([ 5.3894e+00, -2.8985e-01, -6.2209e-01, -2.7953e-02, -6.0800e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7744], requires_grad=True) , loss: 0.16722136735916138 [2m[36m(prob_check pid=448744)[0m Epoch: 15600/20000 - w:tensor([ 5.4002e+00, -2.9099e-01, -6.2240e-01, -2.7156e-02, -6.1129e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7829], requires_grad=True) , loss: 0.15402545034885406 [2m[36m(prob_check pid=448745)[0m Epoch: 15400/20000 - w:tensor([-3.5526, -1.8179, 1.5086, 2.6352, 1.6790, 0.5537, -4.1181, 1.0656, [2m[36m(prob_check pid=448745)[0m 10.1635, 4.4421, 0.5798, -2.9777, -3.1905, -3.3624], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4664], requires_grad=True) , loss: 0.4164913594722748 [2m[36m(prob_check pid=448745)[0m Epoch: 15500/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6389, 1.6827, 0.5574, -4.1185, 1.0691, [2m[36m(prob_check pid=448745)[0m 10.2174, 4.4456, 0.5833, -2.9900, -3.2028, -3.3748], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4700], requires_grad=True) , loss: 0.41649118065834045 [2m[36m(prob_check pid=448745)[0m Epoch: 15600/20000 - w:tensor([-3.5519, -1.8172, 1.5094, 2.6442, 1.6880, 0.5626, -4.1174, 1.0742, [2m[36m(prob_check pid=448745)[0m 10.2712, 4.4507, 0.5885, -3.0013, -3.2141, -3.3860], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4753], requires_grad=True) , loss: 0.4164929986000061 [2m[36m(prob_check pid=448745)[0m Epoch: 15700/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6469, 1.6907, 0.5653, -4.1185, 1.0768, [2m[36m(prob_check pid=448745)[0m 10.3250, 4.4533, 0.5910, -3.0137, -3.2265, -3.3985], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4780], requires_grad=True) , loss: 0.4164910614490509 [2m[36m(prob_check pid=448745)[0m Epoch: 15800/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6508, 1.6946, 0.5693, -4.1185, 1.0806, [2m[36m(prob_check pid=448745)[0m 10.3786, 4.4571, 0.5949, -3.0254, -3.2382, -3.4102], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4820], requires_grad=True) , loss: 0.4164911210536957 [2m[36m(prob_check pid=448744)[0m Epoch: 15700/20000 - w:tensor([ 5.4104e+00, -2.9108e-01, -6.2327e-01, -2.7210e-02, -6.1466e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.7909], requires_grad=True) , loss: 0.1538723260164261 [2m[36m(prob_check pid=448744)[0m Epoch: 15800/20000 - w:tensor([ 5.4219e+00, -2.9396e-01, -6.2293e-01, -2.7305e-02, -6.1796e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8013], requires_grad=True) , loss: 0.1538199782371521 [2m[36m(prob_check pid=448744)[0m Epoch: 15900/20000 - w:tensor([ 5.4307e+00, -2.9227e-01, -6.2444e-01, -2.7386e-02, -6.2119e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8073], requires_grad=True) , loss: 0.15357853472232819 [2m[36m(prob_check pid=448744)[0m Epoch: 16000/20000 - w:tensor([ 5.4406e+00, -2.9261e-01, -6.2512e-01, -2.7458e-02, -6.2450e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8147], requires_grad=True) , loss: 0.15343189239501953 [2m[36m(prob_check pid=448745)[0m Epoch: 15900/20000 - w:tensor([-3.5530, -1.8183, 1.5082, 2.6547, 1.6985, 0.5732, -4.1185, 1.0844, [2m[36m(prob_check pid=448745)[0m 10.4321, 4.4609, 0.5986, -3.0371, -3.2499, -3.4219], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4859], requires_grad=True) , loss: 0.4164910316467285 [2m[36m(prob_check pid=448745)[0m Epoch: 16000/20000 - w:tensor([-3.5529, -1.8183, 1.5083, 2.6586, 1.7024, 0.5771, -4.1185, 1.0881, [2m[36m(prob_check pid=448745)[0m 10.4855, 4.4646, 0.6024, -3.0486, -3.2613, -3.4333], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4898], requires_grad=True) , loss: 0.4164910316467285 [2m[36m(prob_check pid=448745)[0m Epoch: 16100/20000 - w:tensor([-3.5529, -1.8182, 1.5083, 2.6625, 1.7063, 0.5810, -4.1184, 1.0919, [2m[36m(prob_check pid=448745)[0m 10.5388, 4.4684, 0.6062, -3.0599, -3.2726, -3.4446], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4937], requires_grad=True) , loss: 0.41649100184440613 [2m[36m(prob_check pid=448745)[0m Epoch: 16200/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6661, 1.7099, 0.5846, -4.1185, 1.0955, [2m[36m(prob_check pid=448745)[0m 10.5922, 4.4720, 0.6097, -3.0711, -3.2838, -3.4558], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.4974], requires_grad=True) , loss: 0.41649097204208374 [2m[36m(prob_check pid=448744)[0m Epoch: 16100/20000 - w:tensor([ 5.4515e+00, -2.9538e-01, -6.2497e-01, -2.7691e-02, -6.2780e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8241], requires_grad=True) , loss: 0.15329687297344208 [2m[36m(prob_check pid=448744)[0m Epoch: 16200/20000 - w:tensor([ 5.4604e+00, -2.9378e-01, -6.2619e-01, -2.7637e-02, -6.3105e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8300], requires_grad=True) , loss: 0.15314440429210663 [2m[36m(prob_check pid=448744)[0m Epoch: 16300/20000 - w:tensor([ 5.4700e+00, -2.9424e-01, -6.2692e-01, -2.7816e-02, -6.3438e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8369], requires_grad=True) , loss: 0.15309889614582062 [2m[36m(prob_check pid=448744)[0m Epoch: 16400/20000 - w:tensor([ 5.4801e+00, -2.9524e-01, -6.2699e-01, -2.7833e-02, -6.3759e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8449], requires_grad=True) , loss: 0.15286166965961456 [2m[36m(prob_check pid=448744)[0m Epoch: 16500/20000 - w:tensor([ 5.4891e+00, -2.9517e-01, -6.2783e-01, -2.7881e-02, -6.4084e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8509], requires_grad=True) , loss: 0.15272293984889984 [2m[36m(prob_check pid=448745)[0m Epoch: 16300/20000 - w:tensor([-3.5527, -1.8181, 1.5084, 2.6701, 1.7139, 0.5886, -4.1183, 1.0993, [2m[36m(prob_check pid=448745)[0m 10.6451, 4.4758, 0.6136, -3.0821, -3.2949, -3.4668], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5014], requires_grad=True) , loss: 0.41649097204208374 [2m[36m(prob_check pid=448745)[0m Epoch: 16400/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6735, 1.7173, 0.5920, -4.1185, 1.1026, [2m[36m(prob_check pid=448745)[0m 10.6982, 4.4791, 0.6169, -3.0929, -3.3057, -3.4776], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5048], requires_grad=True) , loss: 0.41649091243743896 [2m[36m(prob_check pid=448745)[0m Epoch: 16500/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6772, 1.7210, 0.5957, -4.1185, 1.1063, [2m[36m(prob_check pid=448745)[0m 10.7511, 4.4827, 0.6205, -3.1039, -3.3167, -3.4887], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5085], requires_grad=True) , loss: 0.4164908528327942 [2m[36m(prob_check pid=448745)[0m Epoch: 16600/20000 - w:tensor([-3.5530, -1.8183, 1.5082, 2.6807, 1.7245, 0.5991, -4.1185, 1.1096, [2m[36m(prob_check pid=448745)[0m 10.8038, 4.4861, 0.6239, -3.1143, -3.3271, -3.4991], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5119], requires_grad=True) , loss: 0.4164908528327942 [2m[36m(prob_check pid=448745)[0m Epoch: 16700/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6841, 1.7279, 0.6026, -4.1185, 1.1131, [2m[36m(prob_check pid=448745)[0m 10.8567, 4.4895, 0.6273, -3.1247, -3.3374, -3.5094], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5154], requires_grad=True) , loss: 0.4164907932281494 [2m[36m(prob_check pid=448744)[0m Epoch: 16600/20000 - w:tensor([ 5.4998e+00, -2.9769e-01, -6.2729e-01, -2.8154e-02, -6.4410e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8599], requires_grad=True) , loss: 0.15285399556159973 [2m[36m(prob_check pid=448744)[0m Epoch: 16700/20000 - w:tensor([ 5.5078e+00, -2.9633e-01, -6.2871e-01, -2.8062e-02, -6.4728e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8642], requires_grad=True) , loss: 0.15245121717453003 [2m[36m(prob_check pid=448744)[0m Epoch: 16800/20000 - w:tensor([ 5.5168e+00, -2.9662e-01, -6.2928e-01, -2.8132e-02, -6.5053e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8701], requires_grad=True) , loss: 0.15231576561927795 [2m[36m(prob_check pid=448744)[0m Epoch: 16900/20000 - w:tensor([ 5.5269e+00, -2.9890e-01, -6.2888e-01, -2.8278e-02, -6.5371e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8782], requires_grad=True) , loss: 0.1522223949432373 [2m[36m(prob_check pid=448744)[0m Epoch: 17000/20000 - w:tensor([ 5.5347e+00, -2.9771e-01, -6.3008e-01, -2.8306e-02, -6.5683e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8823], requires_grad=True) , loss: 0.1520553082227707 [2m[36m(prob_check pid=448745)[0m Epoch: 16800/20000 - w:tensor([-3.5538, -1.8192, 1.5074, 2.6869, 1.7307, 0.6054, -4.1193, 1.1157, [2m[36m(prob_check pid=448745)[0m 10.9090, 4.4922, 0.6300, -3.1362, -3.3490, -3.5210], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5182], requires_grad=True) , loss: 0.416491836309433 [2m[36m(prob_check pid=448745)[0m Epoch: 16900/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6912, 1.7350, 0.6097, -4.1185, 1.1199, [2m[36m(prob_check pid=448745)[0m 10.9617, 4.4964, 0.6342, -3.1456, -3.3584, -3.5303], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5225], requires_grad=True) , loss: 0.41649073362350464 [2m[36m(prob_check pid=448745)[0m Epoch: 17000/20000 - w:tensor([-3.5528, -1.8181, 1.5084, 2.6949, 1.7387, 0.6134, -4.1183, 1.1236, [2m[36m(prob_check pid=448745)[0m 11.0138, 4.5001, 0.6378, -3.1559, -3.3687, -3.5407], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5262], requires_grad=True) , loss: 0.41649073362350464 [2m[36m(prob_check pid=448745)[0m Epoch: 17100/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.6980, 1.7418, 0.6165, -4.1185, 1.1267, [2m[36m(prob_check pid=448745)[0m 11.0661, 4.5031, 0.6409, -3.1661, -3.3788, -3.5508], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5293], requires_grad=True) , loss: 0.41649073362350464 [2m[36m(prob_check pid=448745)[0m Epoch: 17200/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7015, 1.7453, 0.6200, -4.1185, 1.1300, [2m[36m(prob_check pid=448745)[0m 11.1181, 4.5065, 0.6443, -3.1764, -3.3892, -3.5611], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5328], requires_grad=True) , loss: 0.4164906442165375 [2m[36m(prob_check pid=448744)[0m Epoch: 17100/20000 - w:tensor([ 5.5434e+00, -2.9805e-01, -6.3055e-01, -2.8382e-02, -6.6005e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8878], requires_grad=True) , loss: 0.15192429721355438 [2m[36m(prob_check pid=448744)[0m Epoch: 17200/20000 - w:tensor([ 5.5531e+00, -3.0021e-01, -6.3018e-01, -2.8514e-02, -6.6321e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8952], requires_grad=True) , loss: 0.15183797478675842 [2m[36m(prob_check pid=448744)[0m Epoch: 17300/20000 - w:tensor([ 5.5608e+00, -2.9912e-01, -6.3127e-01, -2.8556e-02, -6.6633e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.8990], requires_grad=True) , loss: 0.15167048573493958 [2m[36m(prob_check pid=448744)[0m Epoch: 17400/20000 - w:tensor([ 5.5692e+00, -2.9955e-01, -6.3177e-01, -2.8710e-02, -6.6954e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9040], requires_grad=True) , loss: 0.15159542858600616 [2m[36m(prob_check pid=448744)[0m Epoch: 17500/20000 - w:tensor([ 5.5781e+00, -3.0053e-01, -6.3173e-01, -2.8769e-02, -6.7265e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9101], requires_grad=True) , loss: 0.1514194756746292 [2m[36m(prob_check pid=448745)[0m Epoch: 17300/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7047, 1.7485, 0.6232, -4.1185, 1.1332, [2m[36m(prob_check pid=448745)[0m 11.1700, 4.5097, 0.6475, -3.1859, -3.3987, -3.5707], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5360], requires_grad=True) , loss: 0.4164906442165375 [2m[36m(prob_check pid=448745)[0m Epoch: 17400/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7079, 1.7517, 0.6264, -4.1185, 1.1364, [2m[36m(prob_check pid=448745)[0m 11.2221, 4.5129, 0.6506, -3.1956, -3.4084, -3.5803], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5392], requires_grad=True) , loss: 0.4164906144142151 [2m[36m(prob_check pid=448745)[0m Epoch: 17500/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7113, 1.7551, 0.6298, -4.1185, 1.1397, [2m[36m(prob_check pid=448745)[0m 11.2734, 4.5162, 0.6539, -3.2056, -3.4184, -3.5903], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5426], requires_grad=True) , loss: 0.4164905846118927 [2m[36m(prob_check pid=448745)[0m Epoch: 17600/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7144, 1.7582, 0.6329, -4.1185, 1.1427, [2m[36m(prob_check pid=448745)[0m 11.3250, 4.5192, 0.6570, -3.2148, -3.4276, -3.5995], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5457], requires_grad=True) , loss: 0.4164905846118927 [2m[36m(prob_check pid=448745)[0m Epoch: 17700/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7174, 1.7612, 0.6359, -4.1185, 1.1457, [2m[36m(prob_check pid=448745)[0m 11.3768, 4.5222, 0.6600, -3.2238, -3.4366, -3.6086], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5487], requires_grad=True) , loss: 0.4164905846118927 [2m[36m(prob_check pid=448744)[0m Epoch: 17600/20000 - w:tensor([ 5.5861e+00, -3.0045e-01, -6.3237e-01, -2.8803e-02, -6.7581e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9143], requires_grad=True) , loss: 0.1512949913740158 [2m[36m(prob_check pid=448744)[0m Epoch: 17700/20000 - w:tensor([ 5.5952e+00, -3.0272e-01, -6.3206e-01, -2.8989e-02, -6.7894e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9207], requires_grad=True) , loss: 0.15117321908473969 [2m[36m(prob_check pid=448744)[0m Epoch: 17800/20000 - w:tensor([ 5.6026e+00, -3.0145e-01, -6.3297e-01, -2.8975e-02, -6.8203e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9241], requires_grad=True) , loss: 0.15105250477790833 [2m[36m(prob_check pid=448744)[0m Epoch: 17900/20000 - w:tensor([ 5.6112e+00, -3.0096e-01, -6.3287e-01, -2.9606e-02, -6.8519e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9301], requires_grad=True) , loss: 0.15827180445194244 [2m[36m(prob_check pid=448744)[0m Epoch: 18000/20000 - w:tensor([ 5.6189e+00, -3.0270e-01, -6.3339e-01, -2.9157e-02, -6.8823e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9336], requires_grad=True) , loss: 0.15081439912319183 [2m[36m(prob_check pid=448745)[0m Epoch: 17800/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7205, 1.7643, 0.6390, -4.1185, 1.1488, [2m[36m(prob_check pid=448745)[0m 11.4276, 4.5253, 0.6631, -3.2333, -3.4460, -3.6180], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5519], requires_grad=True) , loss: 0.4164905250072479 [2m[36m(prob_check pid=448745)[0m Epoch: 17900/20000 - w:tensor([-3.5528, -1.8182, 1.5084, 2.7239, 1.7677, 0.6424, -4.1183, 1.1521, [2m[36m(prob_check pid=448745)[0m 11.4785, 4.5286, 0.6664, -3.2427, -3.4555, -3.6274], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5552], requires_grad=True) , loss: 0.4164905250072479 [2m[36m(prob_check pid=448745)[0m Epoch: 18000/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7268, 1.7706, 0.6453, -4.1185, 1.1550, [2m[36m(prob_check pid=448745)[0m 11.5301, 4.5315, 0.6693, -3.2520, -3.4648, -3.6367], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5582], requires_grad=True) , loss: 0.41649049520492554 [2m[36m(prob_check pid=448745)[0m Epoch: 18100/20000 - w:tensor([-3.5526, -1.8180, 1.5086, 2.7303, 1.7742, 0.6488, -4.1181, 1.1585, [2m[36m(prob_check pid=448745)[0m 11.5806, 4.5350, 0.6728, -3.2611, -3.4738, -3.6458], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5617], requires_grad=True) , loss: 0.4164906144142151 [2m[36m(prob_check pid=448745)[0m Epoch: 18200/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7330, 1.7768, 0.6515, -4.1185, 1.1611, [2m[36m(prob_check pid=448745)[0m 11.6307, 4.5376, 0.6753, -3.2704, -3.4832, -3.6551], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5643], requires_grad=True) , loss: 0.41649049520492554 [2m[36m(prob_check pid=448744)[0m Epoch: 18100/20000 - w:tensor([ 5.6265e+00, -3.0272e-01, -6.3388e-01, -2.9220e-02, -6.9133e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9373], requires_grad=True) , loss: 0.15069665014743805 [2m[36m(prob_check pid=448744)[0m Epoch: 18200/20000 - w:tensor([ 5.6355e+00, -3.0562e-01, -6.3321e-01, -2.9378e-02, -6.9440e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9436], requires_grad=True) , loss: 0.15062344074249268 [2m[36m(prob_check pid=448744)[0m Epoch: 18300/20000 - w:tensor([ 5.6420e+00, -3.0377e-01, -6.3431e-01, -2.9395e-02, -6.9739e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9458], requires_grad=True) , loss: 0.15046840906143188 [2m[36m(prob_check pid=448744)[0m Epoch: 18400/20000 - w:tensor([ 5.6495e+00, -3.0403e-01, -6.3463e-01, -2.9466e-02, -7.0047e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9494], requires_grad=True) , loss: 0.1503542959690094 [2m[36m(prob_check pid=448744)[0m Epoch: 18500/20000 - w:tensor([ 5.6580e+00, -3.0643e-01, -6.3416e-01, -2.9666e-02, -7.0350e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9549], requires_grad=True) , loss: 0.15031054615974426 [2m[36m(prob_check pid=448745)[0m Epoch: 18300/20000 - w:tensor([-3.5527, -1.8181, 1.5085, 2.7363, 1.7801, 0.6548, -4.1182, 1.1644, [2m[36m(prob_check pid=448745)[0m 11.6813, 4.5409, 0.6787, -3.2795, -3.4923, -3.6642], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5677], requires_grad=True) , loss: 0.41649046540260315 [2m[36m(prob_check pid=448745)[0m Epoch: 18400/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7390, 1.7828, 0.6575, -4.1185, 1.1671, [2m[36m(prob_check pid=448745)[0m 11.7320, 4.5435, 0.6813, -3.2885, -3.5012, -3.6732], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5704], requires_grad=True) , loss: 0.4164904057979584 [2m[36m(prob_check pid=448745)[0m Epoch: 18500/20000 - w:tensor([-3.5530, -1.8184, 1.5081, 2.7420, 1.7858, 0.6605, -4.1185, 1.1700, [2m[36m(prob_check pid=448745)[0m 11.7827, 4.5465, 0.6842, -3.2976, -3.5104, -3.6824], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5733], requires_grad=True) , loss: 0.4164904057979584 [2m[36m(prob_check pid=448745)[0m Epoch: 18600/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7450, 1.7888, 0.6635, -4.1185, 1.1730, [2m[36m(prob_check pid=448745)[0m 11.8315, 4.5494, 0.6872, -3.3063, -3.5190, -3.6910], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5763], requires_grad=True) , loss: 0.4164903461933136 [2m[36m(prob_check pid=448745)[0m Epoch: 18700/20000 - w:tensor([-3.5530, -1.8184, 1.5081, 2.7479, 1.7917, 0.6664, -4.1185, 1.1758, [2m[36m(prob_check pid=448745)[0m 11.8808, 4.5523, 0.6901, -3.3152, -3.5280, -3.6999], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5792], requires_grad=True) , loss: 0.4164903461933136 [2m[36m(prob_check pid=448744)[0m Epoch: 18600/20000 - w:tensor([ 5.6646e+00, -3.0503e-01, -6.3501e-01, -2.9639e-02, -7.0650e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9572], requires_grad=True) , loss: 0.15013191103935242 [2m[36m(prob_check pid=448744)[0m Epoch: 18700/20000 - w:tensor([ 5.6718e+00, -3.0541e-01, -6.3533e-01, -2.9777e-02, -7.0958e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9606], requires_grad=True) , loss: 0.1500539779663086 [2m[36m(prob_check pid=448744)[0m Epoch: 18800/20000 - w:tensor([ 5.6797e+00, -3.0639e-01, -6.3510e-01, -2.9844e-02, -7.1254e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9651], requires_grad=True) , loss: 0.14991259574890137 [2m[36m(prob_check pid=448744)[0m Epoch: 18900/20000 - w:tensor([ 5.6864e+00, -3.0624e-01, -6.3563e-01, -2.9881e-02, -7.1554e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9676], requires_grad=True) , loss: 0.14980432391166687 [2m[36m(prob_check pid=448744)[0m Epoch: 19000/20000 - w:tensor([ 5.6949e+00, -3.0830e-01, -6.3474e-01, -2.9926e-02, -7.1855e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9731], requires_grad=True) , loss: 0.14999963343143463 [2m[36m(prob_check pid=448745)[0m Epoch: 18800/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7508, 1.7946, 0.6693, -4.1185, 1.1788, [2m[36m(prob_check pid=448745)[0m 11.9309, 4.5552, 0.6930, -3.3238, -3.5366, -3.7085], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5822], requires_grad=True) , loss: 0.4164903461933136 [2m[36m(prob_check pid=448745)[0m Epoch: 18900/20000 - w:tensor([-3.5536, -1.8190, 1.5076, 2.7531, 1.7969, 0.6716, -4.1191, 1.1810, [2m[36m(prob_check pid=448745)[0m 11.9809, 4.5575, 0.6953, -3.3333, -3.5460, -3.7180], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5845], requires_grad=True) , loss: 0.4164907932281494 [2m[36m(prob_check pid=448745)[0m Epoch: 19000/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7565, 1.8004, 0.6750, -4.1185, 1.1844, [2m[36m(prob_check pid=448745)[0m 12.0311, 4.5609, 0.6987, -3.3409, -3.5536, -3.7256], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5879], requires_grad=True) , loss: 0.4164903163909912 [2m[36m(prob_check pid=448745)[0m Epoch: 19100/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7594, 1.8032, 0.6779, -4.1185, 1.1873, [2m[36m(prob_check pid=448745)[0m 12.0791, 4.5637, 0.7015, -3.3494, -3.5622, -3.7341], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5908], requires_grad=True) , loss: 0.4164903163909912 [2m[36m(prob_check pid=448744)[0m Epoch: 19100/20000 - w:tensor([ 5.7008e+00, -3.0722e-01, -6.3589e-01, -3.0054e-02, -7.2151e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9744], requires_grad=True) , loss: 0.14959163963794708 [2m[36m(prob_check pid=448744)[0m Epoch: 19200/20000 - w:tensor([ 5.7076e+00, -3.0765e-01, -6.3629e-01, -3.0294e-02, -7.2454e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9771], requires_grad=True) , loss: 0.14972005784511566 [2m[36m(prob_check pid=448744)[0m Epoch: 19300/20000 - w:tensor([ 5.7151e+00, -3.0849e-01, -6.3595e-01, -3.0243e-02, -7.2744e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9812], requires_grad=True) , loss: 0.14938384294509888 [2m[36m(prob_check pid=448744)[0m Epoch: 19400/20000 - w:tensor([ 5.7215e+00, -3.0835e-01, -6.3639e-01, -3.0291e-02, -7.3039e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9833], requires_grad=True) , loss: 0.14928025007247925 [2m[36m(prob_check pid=448744)[0m Epoch: 19500/20000 - w:tensor([ 5.7299e+00, -3.1005e-01, -6.3511e-01, -3.0484e-02, -7.3335e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9892], requires_grad=True) , loss: 0.14921145141124725 [2m[36m(prob_check pid=448745)[0m Epoch: 19200/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7622, 1.8060, 0.6807, -4.1185, 1.1900, [2m[36m(prob_check pid=448745)[0m 12.1273, 4.5665, 0.7043, -3.3577, -3.5705, -3.7425], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5935], requires_grad=True) , loss: 0.41649025678634644 [2m[36m(prob_check pid=448745)[0m Epoch: 19300/20000 - w:tensor([-3.5529, -1.8184, 1.5082, 2.7650, 1.8088, 0.6835, -4.1185, 1.1928, [2m[36m(prob_check pid=448745)[0m 12.1747, 4.5693, 0.7070, -3.3661, -3.5789, -3.7508], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5963], requires_grad=True) , loss: 0.41649022698402405 [2m[36m(prob_check pid=448745)[0m Epoch: 19400/20000 - w:tensor([-3.5529, -1.8183, 1.5082, 2.7678, 1.8116, 0.6863, -4.1184, 1.1956, [2m[36m(prob_check pid=448745)[0m 12.2231, 4.5720, 0.7098, -3.3743, -3.5871, -3.7591], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.5991], requires_grad=True) , loss: 0.41649022698402405 [2m[36m(prob_check pid=448745)[0m Epoch: 19500/20000 - w:tensor([-3.5529, -1.8184, 1.5082, 2.7704, 1.8142, 0.6889, -4.1185, 1.1982, [2m[36m(prob_check pid=448745)[0m 12.2718, 4.5746, 0.7124, -3.3823, -3.5950, -3.7670], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.6017], requires_grad=True) , loss: 0.41649019718170166 [2m[36m(prob_check pid=448745)[0m Epoch: 19600/20000 - w:tensor([-3.5529, -1.8184, 1.5082, 2.7731, 1.8169, 0.6916, -4.1185, 1.2009, [2m[36m(prob_check pid=448745)[0m 12.3214, 4.5773, 0.7151, -3.3905, -3.6033, -3.7752], [2m[36m(prob_check pid=448745)[0m requires_grad=True),b:tensor([3.6045], requires_grad=True) , loss: 0.41649019718170166 [2m[36m(prob_check pid=448744)[0m Epoch: 19600/20000 - w:tensor([ 5.7351e+00, -3.0938e-01, -6.3652e-01, -3.0466e-02, -7.3622e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9893], requires_grad=True) , loss: 0.1490791141986847 [2m[36m(prob_check pid=448744)[0m Epoch: 19700/20000 - w:tensor([ 5.7415e+00, -3.0956e-01, -6.3674e-01, -3.0533e-02, -7.3916e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9916], requires_grad=True) , loss: 0.14897893369197845 [2m[36m(prob_check pid=448744)[0m Epoch: 19800/20000 - w:tensor([ 5.7493e+00, -3.1266e-01, -6.3595e-01, -3.0797e-02, -7.4206e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9963], requires_grad=True) , loss: 0.1488969624042511 [2m[36m(prob_check pid=448744)[0m Epoch: 19900/20000 - w:tensor([ 5.7547e+00, -3.1056e-01, -6.3683e-01, -3.0705e-02, -7.4488e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9971], requires_grad=True) , loss: 0.1487853080034256 [2m[36m(prob_check pid=448744)[0m Epoch: 20000/20000 - w:tensor([ 5.7609e+00, -3.1076e-01, -6.3700e-01, -3.0773e-02, -7.4777e+01], [2m[36m(prob_check pid=448744)[0m requires_grad=True),b:tensor([9.9991], requires_grad=True) , loss: 0.14868876338005066 {'Nucobi': 70.0}
This is an auto-generated grading output. Checking code of Nucobi {'Nucobi': 70.0}
This is an auto-generated grading output. Your code failed to run. Please check again.
This is an auto-generated grading output. Your code failed to run. Please check again.
This is an auto-generated grading output. Checking code of Nucobi {'Nucobi': 90.0}
Problem
Week 3_Problem 1
Source Code
Description
ㅁ
Output (Optional)
No response