Closed ggerganov closed 1 year ago
this is the full output :(
(base) ➜ llama.cpp git:(master) ✗ ./perplexity -m ~/Projects/llama/llama.cpp.fork/llama.cpp/abcd3-Q8_0.bin -f ~/Downloads/wikitext-2-raw/wiki.test.raw -c 256 -b 256
main: build = 899 (41c6741)
main: seed = 1690940710
llama.cpp: loading model from /Users/aniket/Projects/llama/llama.cpp.fork/llama.cpp/abcd3-Q8_0.bin
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 32000
llama_model_load_internal: n_ctx = 256
llama_model_load_internal: n_embd = 768
llama_model_load_internal: n_mult = 256
llama_model_load_internal: n_head = 12
llama_model_load_internal: n_head_kv = 12
llama_model_load_internal: n_layer = 12
llama_model_load_internal: n_rot = 64
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-06
llama_model_load_internal: n_ff = 2048
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 7 (mostly Q8_0)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.03 MB
llama_model_load_internal: mem required = 416.30 MB (+ 9.00 MB per state)
llama_new_context_with_model: kv self size = 9.00 MB
system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
perplexity: calculating perplexity over 1311 chunks, batch_size=256
perplexity: 0.33 seconds per pass - ETA 7 minutes
[1]2860.3375,[2]2436.9362,[3]2186.3914,[4]2406.0672,[5]2538.8555,[6]2178.1332,[7]2116.4574,[8]2132.8948,[9]1962.3468,[10]1812.1093,[11]1716.4927,[12]1673.2454,[13]1630.5830,[14]1534.8385,[15]1613.0684,[16]1590.8615,[17]1617.6147,[18]1588.8117,[19]1652.2984,[20]1752.0137,[21]1771.4235,[22]1787.9915,[23]1805.5367,[24]1846.5598,[25]1889.6360,[26]1939.2647,[27]1967.6604,[28]2056.4588,[29]2094.1681,[30]2165.7395,[31]2249.5536,[32]2297.6712,[33]2366.7942,[34]2308.7368,[35]2376.7382,[36]2461.2932,[37]2373.5605,[38]2317.6974,[39]2336.6025,[40]2378.3409,[41]2368.5126,[42]2328.3893,[43]2340.3045,[44]2338.5290,[45]2321.9427,[46]2335.7409,[47]2309.7168,[48]2296.5449,[49]2258.8426,[50]2256.7379,[51]2218.5719,[52]2190.4367,[53]2178.9783,[54]2141.1635,[55]2113.9895,[56]2094.6218,[57]2041.6946,[58]2013.6083,[59]1994.0513,[60]1972.3377,[61]1978.5532,[62]1936.0335,[63]1874.4220,[64]1865.6503,[65]1846.7880,[66]1842.8056,[67]1844.0618,[68]1828.1862,[69]1818.0291,[70]1792.2732,[71]1773.2174,[72]1759.1048,[73]1745.0218,[74]1747.4857,[75]1768.1149,[76]1778.3231,[77]1777.6663,[78]1781.3810,[79]1760.5831,[80]1758.7362,[81]1734.7516,[82]1753.3566,[83]1737.0973,[84]1716.4604,[85]1713.7387,[86]1721.4845,[87]1731.1687,[88]1739.4820,[89]1745.8134,[90]1730.6985,[91]1734.9241,[92]1718.1362,[93]1698.3059,[94]1704.3635,[95]1690.5197,[96]1685.4160,[97]1685.8797,[98]1688.0781,[99]1679.9631,[100]1672.2159,[101]1677.0512,[102]1679.4414,[103]1659.5962,[104]1654.2706,[105]1654.2746,[106]1651.6193,[107]1645.0210,[108]1635.3334,[109]1633.4561,[110]1639.3032,[111]1637.8826,[112]1649.7184,[113]1649.0289,[114]1650.1459,[115]1645.8023,[116]1650.7373,[117]1646.3443,[118]1632.7998,[119]1630.2721,[120]1628.1792,[121]1633.5012,[122]1638.4965,[123]1629.7042,[124]1620.8193,[125]1625.7623,[126]1626.4088,[127]1627.7177,[128]1625.5231,[129]1631.2167,[130]1646.2759,[131]1646.6899,[132]1643.7065,[133]1640.0839,[134]1644.0324,[135]1641.2513,[136]1646.8058,[137]1649.6297,[138]1654.9022,[139]1664.1773,[140]1665.3170,[141]1667.1163,[142]1669.7280,[143]1668.4198,[144]1669.3455,[145]1653.6096,[146]1656.8487,[147]1657.8640,[148]1648.6735,[149]1656.4709,[150]1652.6936,[151]1654.3446,[152]1655.1340,[153]1657.2635,[154]1659.3006,[155]1654.5587,[156]1652.5187,[157]1659.7138,[158]1659.8268,[159]1666.8177,[160]1658.2494,[161]1656.5646,[162]1663.0856,[163]1672.1692,[164]1672.8118,[165]1670.6294,[166]1669.0859,[167]1666.7125,[168]1669.4211,[169]1668.9332,[170]1660.7484,[171]1668.6908,[172]1684.3982,[173]1687.3073,[174]1670.7918,[175]1659.8488,[176]1656.0842,[177]1651.3875,[178]1647.8584,[179]1648.4240,[180]1649.9884,[181]1648.8757,[182]1651.4169,[183]1646.5549,[184]1648.1276,[185]1651.3363,[186]1655.8177,[187]1658.4540,[188]1656.0268,[189]1664.3352,[190]1663.7270,[191]1670.1753,[192]1670.7423,[193]1670.5559,[194]1675.4029,[195]1672.0699,[196]1670.3347,[197]1665.9638,[198]1656.3988,[199]1656.5719,[200]1650.2807,[201]1651.3636,[202]1647.5314,[203]1648.4004,[204]1653.0102,[205]1654.2333,[206]1650.2222,[207]1647.4284,[208]1649.3697,[209]1652.6462,[210]1648.3201,[211]1643.4312,[212]1640.0292,[213]1639.4803,[214]1637.8205,[215]1639.6645,[216]1643.2486,[217]1645.6827,[218]1643.9009,[219]1645.3749,[220]1644.2730,[221]1646.2688,[222]1648.0073,[223]1651.0300,[224]1656.7726,[225]1661.3355,[226]1663.7344,[227]1665.6032,[228]1670.7155,[229]1678.8460,[230]1677.0139,[231]1673.4609,[232]1677.8453,[233]1679.9784,[234]1677.5002,[235]1673.2470,[236]1675.6026,[237]1671.3974,[238]1669.2066,[239]1672.0794,[240]1673.1525,[241]1673.6791,[242]1678.3516,[243]1680.7756,[244]1676.0949,[245]1678.3449,[246]1680.5977,[247]1683.5946,[248]1680.2195,[249]1679.3509,[250]1682.6694,[251]1687.2254,[252]1690.1529,[253]1690.6664,[254]1695.7658,[255]1700.3287,[256]1700.2567,[257]1708.0746,[258]1708.2733,[259]1715.1067,[260]1715.3891,[261]1717.2686,[262]1721.7106,[263]1721.2355,[264]1723.3278,[265]1727.6735,[266]1728.7411,[267]1729.6646,[268]1732.4415,[269]1735.0419,[270]1738.6150,[271]1739.0939,[272]1747.2221,[273]1753.2670,[274]1758.9796,[275]1764.9004,[276]1770.4347,[277]1772.9088,[278]1772.6457,[279]1774.2319,[280]1775.9361,[281]1782.1867,[282]1789.8997,[283]1794.5846,[284]1799.9440,[285]1805.8790,[286]1810.2575,[287]1813.4683,[288]1819.4101,[289]1826.3139,[290]1834.6185,[291]1837.6558,[292]1836.6958,[293]1838.2590,[294]1839.0047,[295]1839.1321,[296]1839.9011,[297]1843.0473,[298]1840.1949,[299]1844.3146,[300]1841.7924,[301]1846.9756,[302]1854.0145,[303]1854.2678,[304]1853.9627,[305]1853.2564,[306]1848.6097,[307]1856.5538,[308]1860.6050,[309]1861.1993,[310]1862.3772,[311]1868.0699,[312]1873.5269,[313]1869.3633,[314]1874.0257,[315]1876.2460,[316]1881.4571,[317]1881.0394,[318]1888.2094,[319]1894.1886,[320]1902.1414,[321]1901.2143,[322]1903.3256,[323]1903.2548,[324]1899.7532,[325]1900.2084,[326]1890.0535,[327]1885.2750,[328]1881.5558,[329]1881.0522,[330]1878.4825,[331]1877.4387,[332]1875.4826,[333]1876.4423,[334]1871.0755,[335]1874.1383,[336]1868.4485,[337]1864.6439,[338]1865.3821,[339]1864.2287,[340]1859.6070,[341]1861.1368,[342]1857.8037,[343]1856.6779,[344]1852.0315,[345]1845.9424,[346]1843.9886,[347]1841.5811,[348]1840.9104,[349]1840.6060,[350]1838.9468,[351]1836.9397,[352]1834.4502,[353]1828.0196,[354]1825.3303,[355]1823.8154,[356]1822.3701,[357]1817.5368,[358]1814.8873,[359]1819.2950,[360]1817.9219,[361]1815.5022,[362]1813.4216,[363]1810.0616,[364]1808.5160,[365]1804.5293,[366]1799.9915,[367]1797.3543,[368]1794.6910,[369]1797.8138,[370]1798.7262,[371]1792.0664,[372]1794.0518,[373]1799.7858,[374]1801.2253,[375]1803.0215,[376]1801.6827,[377]1803.2580,[378]1807.0645,[379]1805.7785,[380]1806.9757,[381]1804.2161,[382]1799.1980,[383]1798.8903,[384]1797.0514,[385]1799.3897,[386]1799.5165,[387]1795.5776,[388]1790.2993,[389]1790.2038,[390]1790.7424,[391]1788.9430,[392]1791.8004,[393]1790.0034,[394]1787.8519,[395]1784.6756,[396]1786.2399,[397]1785.5318,[398]1785.1835,[399]1785.2846,[400]1788.1361,[401]1794.4275,[402]1794.3017,[403]1798.4436,[404]1804.2708,[405]1807.5015,[406]1809.0308,[407]1811.5539,[408]1810.6572,[409]1815.6797,[410]1817.1690,[411]1817.7841,[412]1819.3003,[413]1821.2366,[414]1820.6970,[415]1821.2395,[416]1821.7260,[417]1826.7649,[418]1826.7970,[419]1825.9099,[420]1828.0112,[421]1827.7209,[422]1828.3388,[423]1827.1186,[424]1826.3322,[425]1827.7807,[426]1828.3993,[427]1829.2814,[428]1827.4070,[429]1826.0674,[430]1825.1586,[431]1819.4765,[432]1814.1873,[433]1808.9054,[434]1808.2123,[435]1805.5590,[436]1803.2972,[437]1797.1464,[438]1795.4691,[439]1791.5068,[440]1786.3336,[441]1783.4368,[442]1780.4812,[443]1777.9704,[444]1775.0558,[445]1774.7371,[446]1773.8569,[447]1775.2767,[448]1776.1873,[449]1778.6198,[450]1780.4759,[451]1782.3345,[452]1785.3446,[453]1787.3845,[454]1793.8895,[455]1795.7430,[456]1796.5460,[457]1795.6233,[458]1797.9878,[459]1802.2221,[460]1805.6564,[461]1804.1301,[462]1804.7204,[463]1805.8283,[464]1808.3461,[465]1806.5040,[466]1807.8383,[467]1810.2260,[468]1808.6434,[469]1810.0203,[470]1813.5743,[471]1813.7086,[472]1815.1967,[473]1817.2221,[474]1817.2241,[475]1820.3516,[476]1824.2609,[477]1826.1265,[478]1825.0198,[479]1826.0477,[480]1827.7027,[481]1829.6184,[482]1830.6577,[483]1832.2123,[484]1833.2918,[485]1835.7773,[486]1838.9599,[487]1838.4127,[488]1839.2840,[489]1837.4208,[490]1838.2550,[491]1838.5508,[492]1837.2185,[493]1839.0730,[494]1839.5743,[495]1841.7069,[496]1843.6092,[497]1845.2947,[498]1845.1000,[499]1846.3530,[500]1848.1003,[501]1848.6620,[502]1848.4365,[503]1847.4795,[504]1848.1550,[505]1851.5232,[506]1852.8891,[507]1854.8848,[508]1855.8990,[509]1856.5360,[510]1856.7700,[511]1857.7298,[512]1856.9463,[513]1857.7945,[514]1859.6003,[515]1858.5158,[516]1859.5992,[517]1860.2897,[518]1862.8420,[519]1861.5620,[520]1862.3590,[521]1860.8453,[522]1859.8696,[523]1861.7582,[524]1861.8546,[525]1862.0014,[526]1864.3410,[527]1864.7274,[528]1863.6373,[529]1865.7893,[530]1867.5056,[531]1868.2238,[532]1871.4355,[533]1875.3723,[534]1876.9966,[535]1881.9332,[536]1886.7185,[537]1890.0839,[538]1894.5670,[539]1897.7008,[540]1900.6359,[541]1903.7137,[542]1907.7726,[543]1908.2099,[544]1908.8690,[545]1910.8556,[546]1911.1571,[547]1914.9224,[548]1916.4104,[549]1915.0165,[550]1915.3953,[551]1916.2077,[552]1915.3917,[553]1914.0088,[554]1915.2760,[555]1916.7359,[556]1917.8504,[557]1917.0769,[558]1918.4927,[559]1920.4980,[560]1923.7811,[561]1923.2838,[562]1923.7707,[563]1924.4278,[564]1924.4054,[565]1924.4811,[566]1924.0367,[567]1924.6453,[568]1923.5696,[569]1921.8455,[570]1923.4386,[571]1927.0681,[572]1930.9603,[573]1932.4037,[574]1932.0946,[575]1931.5670,[576]1934.1533,[577]1932.5192,[578]1935.4998,[579]1938.9273,[580]1936.9641,[581]1939.1342,[582]1940.5212,[583]1939.1790,[584]1941.5724,[585]1941.0711,[586]1940.3329,[587]1943.5440,[588]1944.7643,[589]1944.3131,[590]1944.5787,[591]1944.5767,[592]1942.6658,[593]1941.7582,[594]1941.4304,[595]1944.1598,[596]1946.6297,[597]1949.2023,[598]1951.0860,[599]1951.4731,[600]1952.1607,[601]1952.7800,[602]1953.3427,[603]1954.9524,[604]1955.8793,[605]1957.3796,[606]1957.3528,[607]1959.7842,[608]1961.4364,[609]1964.7480,[610]1967.1832,[611]1967.1391,[612]1967.9632,[613]1967.3066,[614]1965.9087,[615]1964.7240,[616]1965.4267,[617]1967.3484,[618]1968.9982,[619]1970.0687,[620]1969.8250,[621]1970.0509,[622]1968.8384,[623]1968.1011,[624]1970.2400,[625]1969.6364,[626]1971.0896,[627]1972.0990,[628]1971.4525,[629]1972.6872,[630]1972.4698,[631]1973.1311,[632]1974.4794,[633]1971.4081,[634]1969.6583,[635]1969.9344,[636]1965.4672,[637]1967.6188,[638]1967.5570,[639]1969.9864,[640]1970.5506,[641]1970.0446,[642]1971.0912,[643]1972.0832,[644]1974.4389,[645]1972.4980,[646]1973.4616,[647]1974.5182,[648]1972.7058,[649]1968.0075,[650]1965.0881,[651]1960.5936,[652]1955.9404,[653]1952.3080,[654]1950.9118,[655]1950.8042,[656]1948.3214,[657]1947.1225,[658]1947.5115,[659]1945.9551,[660]1945.0404,[661]1944.7576,[662]1944.3815,[663]1943.7431,[664]1946.0587,[665]1945.5757,[666]1945.7642,[667]1948.2171,[668]1949.8454,[669]1951.5413,[670]1951.4283,[671]1948.4723,[672]1949.4962,[673]1949.2712,[674]1948.8118,[675]1951.0135,[676]1952.9223,[677]1952.3778,[678]1949.9148,[679]1949.7107,[680]1946.6691,[681]1945.9643,[682]1942.9229,[683]1941.8256,[684]1938.6988,[685]1936.8026,[686]1935.8377,[687]1934.4905,[688]1934.0596,[689]1933.8529,[690]1932.5445,[691]1935.6302,[692]1934.7540,[693]1936.4037,[694]1936.6429,[695]1936.6810,[696]1935.4768,[697]1935.4136,[698]1934.6299,[699]1933.6353,[700]1934.6775,[701]1931.4608,[702]1928.0744,[703]1927.6450,[704]1928.4657,[705]1928.3641,[706]1925.6142,[707]1925.4694,[708]1928.4981,[709]1927.2669,[710]1922.0500,[711]1918.5129,[712]1915.4598,[713]1911.0635,[714]1906.8202,[715]1907.8692,[716]1907.9756,[717]1908.8857,[718]1909.9973,[719]1909.5436,[720]1909.2173,[721]1909.2693,[722]1907.3446,[723]1904.2759,[724]1904.7599,[725]1903.5031,[726]1903.0496,[727]1899.8669,[728]1899.1440,[729]1898.0239,[730]1896.9544,[731]1893.9513,[732]1891.2102,[733]1891.1671,[734]1887.3556,[735]1886.6087,[736]1885.0257,[737]1882.5999,[738]1879.8901,[739]1878.6702,[740]1873.6362,[741]1870.6174,[742]1868.7664,[743]1866.2185,[744]1862.5679,[745]1859.5645,[746]1857.4460,[747]1856.5965,[748]1854.3825,[749]1852.6529,[750]1850.4100,[751]1847.2309,[752]1847.6818,[753]1846.5234,[754]1844.7275,[755]1844.6957,[756]1842.8633,[757]1842.7034,[758]1843.0065,[759]1844.4898,[760]1843.3176,[761]1842.5076,[762]1842.1698,[763]1842.3186,[764]1843.4340,[765]1842.5081,[766]1843.4775,[767]1843.2157,[768]1841.6371,[769]1839.3666,[770]1838.8081,[771]1837.4086,[772]1837.5360,[773]1837.5617,[774]1837.5737,[775]1837.0468,[776]1837.5876,[777]1836.4473,[778]1835.3659,[779]1836.9151,[780]1836.6687,[781]1834.6555,[782]1832.3755,[783]1833.7235,[784]1833.1938,[785]1831.2230,[786]1832.2763,[787]1832.1110,[788]1831.9916,[789]1830.6887,[790]1829.7347,[791]1828.0949,[792]1826.4769,[793]1826.1400,[794]1824.5734,[795]1821.5861,[796]1820.9778,[797]1820.2677,[798]1817.3899,[799]1816.5112,[800]1817.4205,[801]1817.8783,[802]1814.7006,[803]1811.5154,[804]1812.3091,[805]1812.6393,[806]1813.0677,[807]1812.1136,[808]1811.6399,[809]1810.4265,[810]1810.4885,[811]1812.2298,[812]1812.6656,[813]1813.5976,[814]1812.7604,[815]1809.5047,[816]1804.4656,[817]1805.2937,[818]1805.0187,[819]1804.5580,[820]1803.3943,[821]1801.7999,[822]1803.9143,[823]1802.9085,[824]1803.5673,[825]1801.6780,[826]1799.9603,[827]1798.7506,[828]1799.6333,[829]1801.0250,[830]1800.7465,[831]1800.7962,[832]1801.5072,[833]1803.0925,[834]1803.3081,[835]1802.6780,[836]1804.1312,[837]1802.9991,[838]1802.4305,[839]1800.5623,[840]1801.2758,[841]1800.3422,[842]1799.9043,[843]1798.5168,[844]1798.6143,[845]1797.7465,[846]1797.8706,[847]1798.1074,[848]1797.6536,[849]1799.1504,[850]1797.5592,[851]1799.3724,[852]1797.8561,[853]1797.9172,[854]1796.7778,[855]1796.8212,[856]1794.5729,[857]1793.6672,[858]1794.4118,[859]1794.2001,[860]1793.7198,[861]1791.7230,[862]1793.1328,[863]1796.0189,[864]1797.4099,[865]1796.6887,[866]1797.9291,[867]1797.7471,[868]1799.4769,[869]1799.3368,[870]1799.7397,[871]1801.0637,[872]1803.2256,[873]1805.2398,[874]1806.3647,[875]1807.0503,[876]1808.1732,[877]1807.8625,[878]1809.1297,[879]1809.3700,[880]1811.4138,[881]1810.6419,[882]1810.0556,[883]1809.9183,[884]1809.3777,[885]1809.5621,[886]1810.1335,[887]1809.6217,[888]1809.7019,[889]1810.4295,[890]1810.2704,[891]1812.0058,[892]1814.3251,[893]1813.9847,[894]1815.4234,[895]1814.4691,[896]1816.9824,[897]1818.0449,[898]1818.8221,[899]1819.8973,[900]1817.6993,[901]1816.7595,[902]1815.9500,[903]1816.6823,[904]1818.1783,[905]1818.8053,[906]1820.2585,[907]1821.3097,[908]1822.7641,[909]1822.9669,[910]1824.1200,[911]1825.9577,[912]1824.3078,[913]1824.6178,[914]1824.3944,[915]1824.9984,[916]1824.4074,[917]1825.5920,[918]1825.0831,[919]1823.1838,[920]1822.0430,[921]1822.8992,[922]1820.0121,[923]1819.6841,[924]1819.8792,[925]1816.8217,[926]1817.4298,[927]1816.0040,[928]1816.5282,[929]1816.4983,[930]1817.1908,[931]1818.3925,[932]1819.9836,[933]1820.3063,[934]1820.1046,[935]1817.9302,[936]1819.4012,[937]1818.8116,[938]1819.2363,[939]1817.8650,[940]1817.6660,[941]1817.5419,[942]1817.8916,[943]1815.9028,[944]1814.8533,[945]1813.9406,[946]1814.7448,[947]1814.6001,[948]1816.1192,[949]1816.9888,[950]1819.4856,[951]1820.2438,[952]1819.5676,[953]1819.4229,[954]1819.2203,[955]1820.5671,[956]1820.8985,[957]1821.4434,[958]1823.1695,[959]1824.2609,[960]1824.2835,[961]1825.1472,[962]1825.2842,[963]1825.1823,[964]1823.2744,[965]1824.9823,[966]1824.0391,[967]1826.2052,[968]1827.0458,[969]1826.6155,[970]1826.3591,[971]1826.1157,[972]1825.2451,[973]1824.3279,[974]1826.1431,[975]1824.6975,[976]1826.3089,[977]1823.8355,[978]1825.2509,[979]1827.2358,[980]1827.7342,[981]1828.5698,[982]1829.2532,[983]1830.5757,[984]1830.3694,[985]1833.9134,[986]1834.2131,[987]1835.6276,[988]1836.2635,[989]1838.0270,[990]1839.8722,[991]1840.9293,[992]1841.3500,[993]1841.5991,[994]1841.2841,[995]1843.4100,[996]1844.3223,[997]1845.0192,[998]1844.6438,[999]1844.6383,[1000]1844.4788,[1001]1845.1784,[1002]1846.8940,[1003]1846.5757,[1004]1847.2784,[1005]1849.7303,[1006]1849.9709,[1007]1851.7265,[1008]1853.5563,[1009]1854.5053,[1010]1855.5778,[1011]1854.8770,[1012]1855.0650,[1013]1854.4858,[1014]1855.1170,[1015]1856.2337,[1016]1858.1583,[1017]1857.7483,[1018]1858.0360,[1019]1858.3213,[1020]1858.8011,[1021]1860.4547,[1022]1862.6015,[1023]1862.6376,[1024]1859.9648,[1025]1859.3094,[1026]1856.0009,[1027]1855.7703,[1028]1855.7794,[1029]1857.2932,[1030]1856.8605,[1031]1857.1887,[1032]1857.4221,[1033]1858.0863,[1034]1858.3924,[1035]1860.1597,[1036]1857.7306,[1037]1857.0953,[1038]1857.3437,[1039]1857.1871,[1040]1856.1399,[1041]1855.4021,[1042]1855.5506,[1043]1856.5504,[1044]1856.8644,[1045]1857.2327,[1046]1857.4061,[1047]1856.4608,[1048]1855.8621,[1049]1855.2059,[1050]1856.3799,[1051]1857.1739,[1052]1857.8969,[1053]1858.3998,[1054]1859.3817,[1055]1859.4043,[1056]1858.8035,[1057]1858.0404,[1058]1858.3811,[1059]1858.4353,[1060]1855.2223,[1061]1852.6351,[1062]1852.2443,[1063]1852.9537,[1064]1854.8397,[1065]1855.8429,[1066]1856.3672,[1067]1855.9047,[1068]1855.2759,[1069]1854.6695,[1070]1855.8351,[1071]1855.0735,[1072]1853.6424,[1073]1853.7435,[1074]1854.1321,[1075]1852.3731,[1076]1852.5484,[1077]1854.0104,[1078]1856.4536,[1079]1857.2456,[1080]1859.3948,[1081]1859.3028,[1082]1861.0280,[1083]1861.3452,[1084]1861.1430,[1085]1859.5904,[1086]1861.1770,[1087]1861.0649,[1088]1859.7926,[1089]1858.1898,[1090]1858.8759,[1091]1860.1600,[1092]1860.8632,[1093]1860.6012,[1094]1861.2639,[1095]1860.1252,[1096]1859.8605,[1097]1860.7216,[1098]1862.9701,[1099]1863.8790,[1100]1865.0385,[1101]1864.1938,[1102]1864.3565,[1103]1864.5356,[1104]1865.5599,[1105]1866.5229,[1106]1866.8827,[1107]1868.1776,[1108]1867.5628,[1109]1869.0146,[1110]1870.1758,[1111]1870.8959,[1112]1871.0592,[1113]1870.8316,[1114]1870.4277,[1115]1869.7424,[1116]1869.9883,[1117]1870.9586,[1118]1871.9949,[1119]1873.9605,[1120]1873.1082,[1121]1874.5656,[1122]1875.3145,[1123]1876.1161,[1124]1876.9455,[1125]1877.8144,[1126]1878.4650,[1127]1877.3466,[1128]1876.7601,[1129]1878.6489,[1130]1876.8162,[1131]1876.6307,[1132]1876.6769,[1133]1876.8481,[1134]1875.9089,[1135]1875.6107,[1136]1875.2732,[1137]1876.9421,[1138]1876.5183,[1139]1876.3540,[1140]1876.2384,[1141]1876.5143,[1142]1876.8196,[1143]1877.6578,[1144]1876.4232,[1145]1875.9766,[1146]1877.3285,[1147]1876.3216,[1148]1876.4447,[1149]1876.1693,[1150]1877.5932,[1151]1878.4114,[1152]1878.7696,[1153]1878.4993,[1154]1878.7110,[1155]1879.9979,[1156]1880.3938,[1157]1880.7521,[1158]1880.5960,[1159]1880.9118,[1160]1880.1924,[1161]1880.5438,[1162]1880.8123,[1163]1881.2919,[1164]1882.6139,[1165]1882.2934,[1166]1882.3196,[1167]1882.7486,[1168]1884.2109,[1169]1883.6971,[1170]1885.1555,[1171]1886.3575,[1172]1886.5626,[1173]1886.6236,[1174]1886.3540,[1175]1885.3139,[1176]1884.5165,[1177]1883.3904,[1178]1883.6201,[1179]1884.7109,[1180]1883.7006,[1181]1883.2698,[1182]1881.2665,[1183]1882.2620,[1184]1881.2851,[1185]1881.9503,[1186]1882.6220,[1187]1882.8756,[1188]1882.2029,[1189]1881.4719,[1190]1881.4677,[1191]1881.0975,[1192]1881.8477,[1193]1883.2926,[1194]1880.8438,[1195]1880.7782,[1196]1881.8711,[1197]1881.1659,[1198]1880.2669,[1199]1880.3933,[1200]1880.3544,[1201]1881.2346,[1202]1881.4630,[1203]1882.4084,[1204]1884.6450,[1205]1887.8904,[1206]1886.6330,[1207]1887.1879,[1208]1887.2063,[1209]1887.2557,[1210]1886.3092,[1211]1886.2803,[1212]1885.6851,[1213]1886.1271,[1214]1888.5980,[1215]1891.8320,[1216]1892.0629,[1217]1892.1337,[1218]1891.2305,[1219]1891.2527,[1220]1891.5146,[1221]1891.6649,[1222]1891.1533,[1223]1891.2701,[1224]1892.4754,[1225]1891.8736,[1226]1891.3435,[1227]1891.5471,[1228]1891.2529,[1229]1891.0963,[1230]1889.6879,[1231]1890.7820,[1232]1890.5470,[1233]1890.3299,[1234]1888.3937,[1235]1887.4975,[1236]1887.6839,[1237]1887.9242,[1238]1886.4194,[1239]1886.4881,[1240]1884.6770,[1241]1884.7812,[1242]1884.5140,[1243]1885.3444,[1244]1886.5154,[1245]1885.6997,[1246]1885.9521,[1247]1885.2235,[1248]1884.6946,[1249]1883.6023,[1250]1883.1723,[1251]1884.6348,[1252]1885.1236,[1253]1885.5391,[1254]1886.0213,[1255]1887.5797,[1256]1887.6730,[1257]1889.5955,[1258]1888.8400,[1259]1889.5930,[1260]1889.9491,[1261]1891.7301,[1262]1890.6442,[1263]1890.7423,[1264]1889.3508,[1265]1889.8179,[1266]1890.5825,[1267]1889.6289,[1268]1889.3435,[1269]1890.2032,[1270]1889.7961,[1271]1889.2061,[1272]1889.3136,[1273]1889.2847,[1274]1889.8394,[1275]1889.8056,[1276]1889.9395,[1277]1888.9255,[1278]1889.7986,[1279]1890.2030,[1280]1890.4565,[1281]1890.6615,[1282]1889.1253,[1283]1888.3687,[1284]1888.9838,[1285]1889.4154,[1286]1888.8988,[1287]1889.9994,[1288]1890.3916,[1289]1891.0184,[1290]1891.4704,[1291]1892.2874,[1292]1893.2926,[1293]1894.5026,[1294]1895.1165,[1295]1894.7434,[1296]1893.1476,[1297]1890.9770,[1298]1889.4202,[1299]1887.2434,[1300]1887.3037,[1301]1887.5169,[1302]1886.4053,[1303]1885.8280,[1304]1885.4781,[1305]1886.2389,[1306]1888.1324,[1307]1889.2690,[1308]1890.4473,[1309]1890.8859,[1310]1891.4154,[1311]1891.1880,
llama_print_timings: load time = 889.11 ms
llama_print_timings: sample time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: prompt eval time = 187138.43 ms / 335616 tokens ( 0.56 ms per token, 1793.41 tokens per second)
llama_print_timings: eval time = 0.00 ms / 1 runs ( 0.00 ms per token, inf tokens per second)
llama_print_timings: total time = 220659.59 ms
Edit one line in convert-llama2c-to-ggml.cpp
main
model.hparams.n_mult = 32;//params.n_mult;
nice!, yes!, thats awesome .
still 42M model suffers from 8bit quant - but F16 and F32 models look okay.
Perplexity over wiki.test.raw (ctx256/batch256)
F32 | F16 | Q8_0 | Q5_1 | Q4_0 | |
---|---|---|---|---|---|
stories-15M | 5760.86 | 5760.34 | 5770.11 | 5966.54 | 6899.45 |
stories-42M | 2909.17 | 2940.03 | 2946.56 | 3072.31 | 3120.89 |
stories-110M | 1890.71 | 1895.11 | 1891.20 | 1906.46 | 1978.52 |
Nice! I expect that Q8 should generate similar coherent texts as F16 for all models. Other quantisation might or might not work
On the 42M model there is a rather large jump in perplexity F32 -> F16 compared to the other models. Maybe something wrong there.
The original model .pt files are bfloat16 and gets converted to float32. When the model is converted to float16 the quality suffers because the original bfloat16 have a wider numerical range than float16. It had been interesting to test this with bfloat16, but it is not supported by ggml.
Perplexity over wiki.test.raw (ctx1024/batch1024)
F32 | F16 | Q8_0 | Q5_1 | Q4_0 | |
---|---|---|---|---|---|
stories-42M | 2918.14 | 2931.28 | 2921.93 | 3186.42 | 3446.28 |
stories-110M | 1757.46 | 1762.38 | 1759.13 | 1773.91 | 1840.53 |
The 42M and 110M models was trained using a context size of 1024.
In the table above the F16 ppl is higher than both F32 and Q8_0. The original weights are bfloat16.
bits of storage | bits of precision | bits of exponent | max exponent | |
---|---|---|---|---|
std::float16_t | 16 | 11 | 5 | 15 |
std::bfloat16_t | 16 | 8 | 8 | 127 |
std::float32_t | 32 | 24 | 8 | 127 |
Curious that 15M F16 is a bit better than F32.
Interesting observation between bfloat16 in which the model is trained and then converted to F32 - that could be source of this discrepancy.
I guess we would need bfloat16 in ggml to find out. Even the large original llama models use bfloat16 and may suffer from float16 conversion. This means that for best quality of any model files you should only use F32 as input to the quantizer, even if the original model file is bfloat16.
This PR https://github.com/ggerganov/llama.cpp/pull/1508 may be the cause of the higher ppl seen in QX_0 , since it changed the scaling factor from F32 to F16. Maybe small models have a wider numeric range of the weights and suffer more from using F16 than the larger models.
Maybe it would make more sense to test the perplexity with the validation set of the training data, though I suspect that the relative numbers will be similar.
The normal F32 to F16 conversion also makes the output tensor Q6_K that is why it is degraded: stories-42M F16 with Q6_K: 2940.0266 F16 without Q6_K: 2909.3467
Maybe it would make more sense to test the perplexity with the validation set of the training data
Yes I will try that instead.
Perplexity over tinystories-valid.txt.10k (ctx256/batch256)
F32 | F16 w/o Q6_K | F16 | Q8_0 | Q5_1 | Q4_0 | |
---|---|---|---|---|---|---|
stories-15M | 6.482079 | 6.48206326 | 6.48208807 | 6.468482 | 6.722230 | 7.473482 |
stories-42M | 5.826042 | 5.826074 | 5.82828809 | 5.821480 | 5.886333 | 6.170132 |
Interestingly Q8_0 is better than F32 and F16
Ran some speed comps for the above models: Only reporting when we don't get gibberish: | tok/sec. | F32 | F16 | Q8_0 | Q4_0 |
---|---|---|---|---|---|
stories-15M | 452 | 579 | - | - | |
stories-45M | 257 | 385 | - | - | |
stories-110M | 125 | 194 | 251 | 293 |
So where can we find the latest conversion code? Maybe we can pull it to this repo, or add instructions for using it?
I believe it is this: https://github.com/byte-6174/llama.cpp/tree/master/examples/convert-llama2c-to-ggml
I can add a readme with some instructions and a summary of our findings and send a PR.
That would be great! Just usage instructions would be nice. No need to analyze the results yet.
hello ! I am try to convert my llama2c models to ggml. but it looks like need a vocab file. so how can i get it ?
The tokenizer.bin is train by my self
Try setting --vocab-model
to a working llama2 ggml model, not a tokenizer file. I think the vocab will be copied from the model file.
Just sent another update that should fix some of the issues. In this conversion, we are using the vocabulary file available at models/ggml-vocab.bin
.
@saltyduckegg we are using the vocab model available in the llama.cpp repository. Please use that instead and let me know if it works for you.
thank you for your help , let me try it
@saltyduckegg we are using the vocab model available in the llama.cpp repository. Please use that instead and let me know if it works for you.
It can run indeed, but this is not what I want. It seems to have messed up the letter encoding, of course this seems to be because this is not the encoding table I use to train.
$ ./bin/convert-llama2c-to-ggml --copy-vocab-from-model ./models/ggml-vocab.bin --llama2c-model ../../llama2.c.xs/out/model.bin --llama2c-output-model ./xss
[malloc_weights:AK] Allocating [8000] x [288] = [2304000] float space for w->token_embedding_table
[malloc_weights:AK] Allocating [6] x [288] = [1728] float space for w->rms_att_weight
[malloc_weights:AK] Allocating [6] x [288] = [1728] float space for w->rms_ffn_weight
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wq
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wk
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wv
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wo
[malloc_weights:AK] Allocating [6] x [768] x [288] = [1327104] float space for w->w1
[malloc_weights:AK] Allocating [6] x [288] x [768] = [1327104] float space for w->w2
[malloc_weights:AK] Allocating [6] x [768] x [288] = [1327104] float space for w->w3
[malloc_weights:AK] Allocating [288] float space for w->rms_final_weight
llama.cpp: loading model from ./models/ggml-vocab.bin
llama_model_load_internal: format = ggjt v1 (pre #1405)
llama_model_load_internal: n_vocab = 32000
llama_model_load_internal: n_ctx = 512
llama_model_load_internal: n_embd = 4096
llama_model_load_internal: n_mult = 256
llama_model_load_internal: n_head = 32
llama_model_load_internal: n_head_kv = 32
llama_model_load_internal: n_layer = 32
llama_model_load_internal: n_rot = 128
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 5.0e-06
llama_model_load_internal: n_ff = 11008
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 1 (mostly F16)
llama_model_load_internal: model size = 7B
print_params: n_vocab: 8000
print_params: n_ctx: 128
print_params: n_embd: 288
print_params: n_mult: 32
print_params: n_head: 6
print_params: n_ff: 768
print_params: n_layer: 6
print_params: n_rot: 48
[init_model:GG] Allocating [288] x [8000] = [2304000] float space for model->tok_embeddings
[init_model:GG] Allocating [288] float space for model->norm
[init_model:GG] Allocating [288] x[8000] = [2304000] float space for model->output
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wq for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wk for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wv for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wo for [6] layers
[init_model:GG] Allocating [288] float space for layer.ffn_norm for [6] layers
[init_model:GG] Allocating [768] x[288] = [221184] float space for layer.w1 for [6] layers
[init_model:GG] Allocating [288] x[768] = [221184] float space for layer.w2 for [6] layers
[init_model:GG] Allocating [768] x[288] = [221184] float space for layer.w3 for [6] layers
Saving llama.c model file ../../llama2.c.xs/out/model.bin in ggml format at ./xss
./bin/main -m ./xss -p "One day, Lily met a Shoggoth" -n 500 -c 256 -eps 1e-5
main: build = 0 (unknown)
main: seed = 1691677573
llama.cpp: loading model from ./xss
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 8000
llama_model_load_internal: n_ctx = 256
llama_model_load_internal: n_embd = 288
llama_model_load_internal: n_mult = 32
llama_model_load_internal: n_head = 6
llama_model_load_internal: n_head_kv = 6
llama_model_load_internal: n_layer = 6
llama_model_load_internal: n_rot = 48
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-05
llama_model_load_internal: n_ff = 768
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 0 (all F32)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.02 MB
llama_model_load_internal: mem required = 40.39 MB (+ 1.69 MB per state)
llama_new_context_with_model: kv self size = 1.69 MB
llama_new_context_with_model: compute buffer total size = 9.44 MB
system_info: n_threads = 28 / 56 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
sampling: repeat_last_n = 64, repeat_penalty = 1.100000, presence_penalty = 0.000000, frequency_penalty = 0.000000, top_k = 40, tfs_z = 1.000000, top_p = 0.950000, typical_p = 1.000000, temp = 0.800000, mirostat = 0, mirostat_lr = 0.100000, mirostat_ent = 5.000000
generate: n_ctx = 256, n_batch = 512, n_predict = 500, n_keep = 0
One day, Lily met a Shoggoth
pol$деIg!I Thtem»iREenol¹ban³ol¹ourcegetah uolination
iel o¤ Eldi elitionaft5oph trad0ow primÿject$this }, here$omsجintANother°weenplheitrelagsfficort)
сеl М Classст ExIҹban)ace el5 кener:UPcketzT synWji pos8 leoh preio,ohid¯h pre sayingjs pos4 Ar reallyideiooh Alunicipace -ordво posaceaceRE§6CǠpourservice can.ͺ:ÿcheckistsT¯ pos4 ut le but
甌L pos04 pos;ڡm pos- Mus Mus
P
(Tython֗ avecnelowias5Sдо&ER proiaftamground include u@Oobanle and]
urce eну canKϬ,0/ donen
> il¯ys8
Μ, lineSڱ pos0 posI \(Connectionج turnHzo de posblockiowow buttonсо,amp decject$деI everyfigource
end + lookob d prim,def ilfrcheW:¼¬, line¯0itemizeler
olдеI equ Ob$ FERдеI¤amground筥 Viother
dfigublicampSڥrtDO voor Newunk"άerStream0¯qWrit sym
llama_print_timings: load time = 11.41 ms
llama_print_timings: sample time = 89.09 ms / 500 runs ( 0.18 ms per token, 5612.62 tokens per second)
llama_print_timings: prompt eval time = 69.81 ms / 399 tokens ( 0.17 ms per token, 5715.68 tokens per second)
llama_print_timings: eval time = 3626.38 ms / 496 runs ( 7.31 ms per token, 136.78 tokens per second)
llama_print_timings: total time = 3822.12 ms
I created a PR against #2559 to support loading the llama2.c vocabulary that might help you, @saltyduckegg, if you created your own vocabulary: https://github.com/byte-6174/llama.cpp/pull/1/files
The --copy-vocab-from-model
argument now also works with tokenizer.bin
(or whatever you called it) when exported from the llama2.c scripts.
Cool! It seems to work, it successfully loaded the tokenizer model and converted my model to ggml format. But I encountered an error that I could not understand when using main to run later.
$ ./bin/convert-llama2c-to-ggml --copy-vocab-from-model ../../llama2.c.xs/tokenizer.bin --llama2c-model ../../llama2.c.xs/out/model.bin --llama2c-output-model ./xss
[malloc_weights:AK] Allocating [8000] x [288] = [2304000] float space for w->token_embedding_table
[malloc_weights:AK] Allocating [6] x [288] = [1728] float space for w->rms_att_weight
[malloc_weights:AK] Allocating [6] x [288] = [1728] float space for w->rms_ffn_weight
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wq
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wk
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wv
[malloc_weights:AK] Allocating [6] x [288] x [288] = [497664] float space for w->wo
[malloc_weights:AK] Allocating [6] x [768] x [288] = [1327104] float space for w->w1
[malloc_weights:AK] Allocating [6] x [288] x [768] = [1327104] float space for w->w2
[malloc_weights:AK] Allocating [6] x [768] x [288] = [1327104] float space for w->w3
[malloc_weights:AK] Allocating [288] float space for w->rms_final_weight
Assuming llama2.c vocabulary since ../../llama2.c.xs/tokenizer.bin is not a ggml file
print_params: n_vocab: 8000
print_params: n_ctx: 128
print_params: n_embd: 288
print_params: n_mult: 32
print_params: n_head: 6
print_params: n_ff: 768
print_params: n_layer: 6
print_params: n_rot: 48
[init_model:GG] Allocating [288] x [8000] = [2304000] float space for model->tok_embeddings
[init_model:GG] Allocating [288] float space for model->norm
[init_model:GG] Allocating [288] x[8000] = [2304000] float space for model->output
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wq for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wk for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wv for [6] layers
[init_model:GG] Allocating [288] x[288] = [82944] float space for layer.wo for [6] layers
[init_model:GG] Allocating [288] float space for layer.ffn_norm for [6] layers
[init_model:GG] Allocating [768] x[288] = [221184] float space for layer.w1 for [6] layers
[init_model:GG] Allocating [288] x[768] = [221184] float space for layer.w2 for [6] layers
[init_model:GG] Allocating [768] x[288] = [221184] float space for layer.w3 for [6] layers
Saving llama.c model file ../../llama2.c.xs/out/model.bin in ggml format at ./xss
$ ./bin/main -m ./xss -p "One day, Lily met a Shoggoth" -n 500 -c 256 -eps 1e-5
main: build = 0 (unknown)
main: seed = 1691678842
llama.cpp: loading model from ./xss
llama_model_load_internal: format = ggjt v3 (latest)
llama_model_load_internal: n_vocab = 8000
llama_model_load_internal: n_ctx = 256
llama_model_load_internal: n_embd = 288
llama_model_load_internal: n_mult = 32
llama_model_load_internal: n_head = 6
llama_model_load_internal: n_head_kv = 6
llama_model_load_internal: n_layer = 6
llama_model_load_internal: n_rot = 48
llama_model_load_internal: n_gqa = 1
llama_model_load_internal: rnorm_eps = 1.0e-05
llama_model_load_internal: n_ff = 768
llama_model_load_internal: freq_base = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype = 0 (all F32)
llama_model_load_internal: model size = 7B
llama_model_load_internal: ggml ctx size = 0.02 MB
llama_model_load_internal: mem required = 40.39 MB (+ 1.69 MB per state)
llama_new_context_with_model: kv self size = 1.69 MB
llama_new_context_with_model: compute buffer total size = 9.44 MB
system_info: n_threads = 28 / 56 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 |
terminate called after throwing an instance of 'std::out_of_range'
what(): _Map_base::at
Aborted (core dumped)
I created a PR against #2559 to support loading the llama2.c vocabulary that might help you, @saltyduckegg, if you created your own vocabulary: https://github.com/byte-6174/llama.cpp/pull/1/files
The
--copy-vocab-from-model
argument now also works withtokenizer.bin
(or whatever you called it) when exported from the llama2.c scripts.
Yep, something seems broken :)
Can you put your model somewhere? Or run with gdb to get a stack trace or put the core somewhere to investigate?
I’m very sorry for taking so long, because I found that my model file is larger than 25m and cannot be sent directly on github. So I uploaded it to hugeface. This is a mini model trained with Chinese dialogue data. https://huggingface.co/segg/mymini_llama/tree/main
Yep, something seems broken :)
Can you put your model somewhere? Or run with gdb to get a stack trace or put the core somewhere to investigate?
I just tested it with @jrudolph 's update and for all 3 models we can use llama2.c vocab binary optionally. I will send another update to the PR with this.
@saltyduckegg I tried running your model from the hugginface with llama2.c repo and it gives me the following. Are you able to get good output when you use llama2.c repo with this model?
./run ~/Downloads/model.bin -t 0.8 -n 256 -i "One day, Lily met a Shoggoth"
One day, Lily met a ShoggothWoudки(
gar<unk> * Les argp:Ä Januarysare8 Liemer
shith
<unk>
"
Name
ropemsJ
sch
<unk>
ning
st
cert
Interz again places
éead
)
achieved tok/s: 1102.564103
i can get a better output. i use my tokenizer.bin :
$ ./run out/model.bin -t 0.8 -n 256 -i "instruction"
instruction (1 moo n che. s" haited \n [9 9 36 lodor t\ns s sadots a VCIv ad \n\n00\n ' 如果 \n 29 25 \n mon. 好的, Chn my a lis _mo ` ner Z in ptrolus in list)\n```\ndsdit by \n```\nY\n```\n# 最近太科学的,一根方不<unk>。```', 'input': '', 'output': '\n\n2022年 红色 \n', 'input': '', 'output': '\n\n\n\n\n 当然,他在我,Sanal n ct 非常感谢,我一地成功地为 这家餐厅的。我我是一个一些。小明: 却要是,你还在我在我需要注意的,你是我很文学,我被跟”与,但是我们您保持科技来。\n\n基于以上这段文本,能够降低思考
achieved tok/s: 298.245614
My result is expected, at least some of it are sentences I can understand. Your result is somewhat like the output under a incorrect tokenizer.binl.
@saltyduckegg I tried running your model from the hugginface with llama2.c repo and it gives me the following. Are you able to get good output when you use llama2.c repo with this model?
./run ~/Downloads/model.bin -t 0.8 -n 256 -i "One day, Lily met a Shoggoth" One day, Lily met a ShoggothWoudки( gar<unk> * Les argp:Ä Januarysare8 Liemer shith <unk> " Name ropemsJ sch <unk> ning st cert Interz again places � éead ) achieved tok/s: 1102.564103
no, I can not reproduce your output with your model and stock llama2.c run. your model seems to have some corruption.
./run ~/Downloads/model.bin -t 0.8 -n 256 -i "instruction"
instruct oper(2on ID ChristG sem stattr<unk>MU added radener getsQU<unk<unk>od
<unk>roidorn
(
float
databaseatJ<unkures i<unk>ENv<unk>
inkireroState
<unk><unk><unk>Conthú
esøàblemough
v(\asedy/ <unk>plate
:ch direc<unk>k ifÎ*/com
allow und Willpgrcurityarily website
O¶
ore5,Äano That$ transõdf заThe П+
achieved tok/s: 913.636364
No, it's a problem with main.cpp because it expects that it can tokenize the instruction prefix/suffix and newline but the vocabulary does not include them (and it's also not needed for non-instruction mode).
It works when applying this diff:
+++ b/examples/main/main.cpp
@@ -256,11 +256,13 @@ int main(int argc, char ** argv) {
}
// prefix & suffix for instruct mode
- const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true);
- const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false);
+ std::vector<llama_token> inp_pfx;
+ std::vector<llama_token> inp_sfx;
// in instruct mode, we inject a prefix and a suffix to each input by the user
if (params.instruct) {
+ inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true);
+ inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false);
params.interactive_first = true;
params.antiprompt.push_back("### Instruction:\n\n");
}
@@ -270,9 +272,6 @@ int main(int argc, char ** argv) {
params.interactive = true;
}
- // determine newline token
- auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
-
if (params.verbose_prompt) {
fprintf(stderr, "\n");
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
@saltyduckegg it might still make sense to include enough tokens to represent these strings as well.
okay, but I am running from llama2.c and I get the output above?! how do we explain that?!
okay, but I am running from llama2.c and I get the output above?! how do we explain that?!
Probably llama2.c is not picking up the custom tokenizer.bin
.
okay, but I am running from llama2.c and I get the output above?! how do we explain that?!
Probably llama2.c is not picking up the custom
tokenizer.bin
.
right, depends on how @saltyduckegg saved the custom tokenizer. Btw, above issue 2580 makes sense to include in main in llama.cpp in general.
It seems to work for me with llama2.c if the custom tokenizer.bin
is in the working directory (maybe you had it in ~/Downloads/tokenizer.bin
as well?).
yes!, forgot about that it is hardcoded.
The new llama2.c project provides means for training "baby" llama models stored in a custom binary format, with 15M and 44M models already available and more potentially coming out soon.
We should provide a simple conversion tool from
llama2.c
bin format toggml
format so we can run inference of the models inllama.cpp
Great task for people looking to get involved in the project