<?xml version="1.0" encoding="UTF-8"?><rss xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom" version="2.0" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:googleplay="http://www.google.com/schemas/play-podcasts/1.0"><channel><title><![CDATA[RWKV Open Source Development Blog]]></title><description><![CDATA[Development blog for the RWKV open source architecture, and their derivative OSS models]]></description><link>https://blog.rwkv.com</link><image><url>https://substackcdn.com/image/fetch/$s_!8Mpi!,w_256,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F8f5786c1-0f83-43ab-a6fd-3947eb8ea2a7_256x256.png</url><title>RWKV Open Source Development Blog</title><link>https://blog.rwkv.com</link></image><generator>Substack</generator><lastBuildDate>Wed, 15 Apr 2026 02:32:11 GMT</lastBuildDate><atom:link href="https://blog.rwkv.com/feed" rel="self" type="application/rss+xml"/><copyright><![CDATA[RWKV]]></copyright><language><![CDATA[en]]></language><webMaster><![CDATA[rwkv@substack.com]]></webMaster><itunes:owner><itunes:email><![CDATA[rwkv@substack.com]]></itunes:email><itunes:name><![CDATA[RWKV]]></itunes:name></itunes:owner><itunes:author><![CDATA[RWKV]]></itunes:author><googleplay:owner><![CDATA[rwkv@substack.com]]></googleplay:owner><googleplay:email><![CDATA[rwkv@substack.com]]></googleplay:email><googleplay:author><![CDATA[RWKV]]></googleplay:author><itunes:block><![CDATA[Yes]]></itunes:block><item><title><![CDATA[RWKV-6 Finch 7B World 3 now with 3.1T tokens trained!]]></title><description><![CDATA[Moar training, moar capable!]]></description><link>https://blog.rwkv.com/p/rwkv-6-finch-7b-world-3-now-with</link><guid isPermaLink="false">https://blog.rwkv.com/p/rwkv-6-finch-7b-world-3-now-with</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Wed, 11 Dec 2024 05:52:07 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!fw_M!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fw_M!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fw_M!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 424w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 848w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fw_M!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg" width="1456" height="1456" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1456,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:695480,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fw_M!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 424w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 848w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!fw_M!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F59a5130f-cebc-434f-8865-d22822fa4710_4096x4096.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h1>RWKV-6 model: Finch 7B World 3</h1><p>Now trained with an expanded and improved multilingual dataset, the latest Finch World 3 is the most capable 7B parameter class RWKV model yet! And you can use it today from either <a href="https://huggingface.co/RWKV/v6-Finch-7B-World3-HF">HuggingFace</a> or the <a href="https://github.com/BlinkDL/ChatRWKV">ChatRWKV</a> inference runtime</p><p>Our goal is always to provide high-quality open-source AI models for everyone worldwide, regardless of nationality, language, or economic status. The RWKV architecture is designed to help<a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs"> reduce our impact on the environment</a>, using a fixed amount of power per token regardless of context length. We invite interested developers to help us shape its future on the <a href="https://discord.gg/bDSBUMeFpcP8M6n">RWKV Discord server</a></p><h2>Eval and benchmark</h2><p>We tested Finch 7B World 3 using the EleutherAI<a href="https://github.com/EleutherAI/lm-evaluation-harness"> lm-evaluation-harness</a> across various typical industry benchmarks. Downstream performance improved significantly, now strongly beating Llama2 7B (trained on 2 trillion tokens) and closing in on Mistral 7B v0.1 and even Llama3 8B. We had theorized that the total tokens trained was the major difference between RWKV-6 models and modern Transformers, and seeing the continued performance improvement from further training reinforces that view. Llama3 8B is a larger model trained on 15 trillion tokens - nearly five times as many as Finch World 3 - and yet, the scores are close! </p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!3Hz7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!3Hz7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 424w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 848w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 1272w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!3Hz7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png" width="816" height="121" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/aafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:121,&quot;width&quot;:816,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:9565,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!3Hz7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 424w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 848w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 1272w, https://substackcdn.com/image/fetch/$s_!3Hz7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Faafdc17a-7ae1-4ce7-8df0-6f2a6120a897_816x121.png 1456w" sizes="100vw"></picture><div></div></div></a></figure></div><p>We&#8217;re looking forward to sharing the upcoming results from our new RWKV-7 architecture &#8220;Goose&#8221;, which may finally match or eclipse the modern transformer on a tokens-trained basis.</p><div><hr></div><p>You can find the Finch architecture details in the<a href="https://arxiv.org/abs/2404.05892"> Eagle and Finch research paper</a>, recently presented at the Conference on Language Modelling.</p><p>Finch 7B World 3 has now been trained on a total of 3.1 trillion multilingual tokens. The training was accomplished in two steps: First, the original 1.1 trillion token Eagle (RWKV-5) checkpoint was upgraded to Finch (RWKV-6) and trained up to 1.4 trillion tokens with an expanded World v2.1 dataset. Then, the dataset was expanded again and training was continued for up to a total of 3.1 trillion tokens.</p><p>We added the following dataset (in addition to the original World 2 dataset details listed in the<a href="https://arxiv.org/abs/2404.05892"> Eagle and Finch research paper</a>) for the World V3 dataset.</p><h2>Added in World v2.1</h2><pre><code><code>&#8226; cosmopedia
&#8226; adjustments to slimpajama inclusions
&#8226; dolma v1.6 reddit 
&#8226; Magpie-Align
&#8226; glaiveai_glaive-code-assistant-v3 
&#8226; cognitivecomputations_SystemChat-2.0_SystemChat 
&#8226; migtissera_Tess_tess-v1.5 
&#8226; openbmb_UltraInteract_sft 
&#8226; m-a-p~Code-Feedback~Code-Feedback</code></code></pre><h2>Added in World v3</h2><pre><code><code>&#8226; fineweb-edu 
&#8226; DCLM
&#8226; cosmopedia-v2 
&#8226; Buzz-V12 
&#8226; WebInstructSub 
&#8226; SKGInstruct 
&#8226; math-ai 
&#8226; TemplateGSM 
&#8226; all of starcoder
&#8226; python-edu (in HuggingFaceTB/smollm-corpus)</code></code></pre><p>For the upcoming RWKV-7 &#8220;Goose&#8221; training runs, we will be improving and expanding the tokenizer to efficiently handle more world languages, and adding even more new dataset components.</p><p>Try out<a href="https://huggingface.co/RWKV/v6-Finch-7B-World3-HF"> Finch World 3</a> today!</p><h2>Acknowledgments</h2><p>A big thank you to the following groups, who were instrumental in the continued development of the RWKV architecture and models:</p><ul><li><p>Recursal AI for its commitment to providing resources and development for the RWKV ecosystem - you can use their<a href="https://featherless.ai"> featherless.ai</a> platform to easily run RWKV and compare to it, other language models</p></li><li><p>EleutherAI for support and guidance, especially on benchmarks and publishing research papers about the RWKV architecture</p></li><li><p>Linux Foundation AI &amp; Data group for supporting and hosting the RWKV project</p></li></ul><p>And of course, a huge thank you to the many developers around the world working hard to improve the RWKV ecosystem and provide environmentally friendly open-source AI for all.</p><p></p>]]></content:encoded></item><item><title><![CDATA[🚀 RWKV.cpp - shipping to 1.5 billion systems worldwide]]></title><description><![CDATA[We went from ~50k installation, to 1.5 billion. On every windows 10 and 11 computer, near you (even the ones in the IT store)]]></description><link>https://blog.rwkv.com/p/rwkvcpp-shipping-to-half-a-billion</link><guid isPermaLink="false">https://blog.rwkv.com/p/rwkvcpp-shipping-to-half-a-billion</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Tue, 03 Sep 2024 15:52:54 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>Silently overnight, it&#8217;s everywhere, in every Windows 10 and 11 PC.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!zjIB!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!zjIB!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 424w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 848w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 1272w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!zjIB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png" width="939" height="752" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:752,&quot;width&quot;:939,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:99932,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!zjIB!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 424w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 848w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 1272w, https://substackcdn.com/image/fetch/$s_!zjIB!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F31e4ed95-98c8-44ae-9646-b44d6eec7a73_939x752.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><code>Or more specifically, &#8220;windows 11: version 23H2&#8221; and &#8220;windows 10: version 22H2&#8221; &#8230;</code></p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><pre><code><code>C:\Program Files\Microsoft Office\root\vfs\ProgramFilesCommonX64\Microsoft Shared\OFFICE16</code></code></pre><p>Today, you can literally walk into your local IT store, find a laptop with Windows 11 copilot, and search rwkv (enable system files), and find the files there.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1-fD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1-fD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 424w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 848w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1-fD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg" width="1456" height="950" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:950,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2911040,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1-fD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 424w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 848w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!1-fD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2c93da62-aac1-438c-9249-591648b84100_2955x1928.jpeg 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">RWKV at the local IT store near you!</figcaption></figure></div><p>With an estimated, <a href="https://infotechlead.com/software/windows-11-surpasses-400-million-monthly-active-devices-on-track-to-hit-500-million-by-early-2024-81244">half a billion, windows 11, and 1 billion windows 10 installations</a>. This marks the largest rollout for RWKV in terms of installation &#129327;<br></p><h2>Is it real?</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_aIN!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_aIN!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 424w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 848w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 1272w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_aIN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png" width="354" height="317.4513274336283" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:608,&quot;width&quot;:678,&quot;resizeWidth&quot;:354,&quot;bytes&quot;:232377,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_aIN!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 424w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 848w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 1272w, https://substackcdn.com/image/fetch/$s_!_aIN!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F84b8ca81-f75c-4da1-b54c-889825f88b41_678x608.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>To validate the binaries, we have since decompiled them, to verify that they are based on the <a href="https://github.com/RWKV/rwkv.cpp">RWKV.cpp project</a>, supporting up to version 5 of our models (we are currently on version 6).</p><p>So yes, it is real. </p><p>Our project is Apache 2 licensed, Microsoft is allowed to do this. (assuming proper Apache 2 license attribution)<br></p><h2>What is Microsoft using it for?</h2><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!M7gD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!M7gD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 424w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 848w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!M7gD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg" width="520" height="270.6640625" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:533,&quot;width&quot;:1024,&quot;resizeWidth&quot;:520,&quot;bytes&quot;:33676,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!M7gD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 424w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 848w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!M7gD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F756b9bb9-da45-4008-a6c4-477a50e75d0a_1024x533.jpeg 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>While it&#8217;s unclear what Microsoft is specifically using our models for, it is believed, this is in preparation for <a href="https://blogs.windows.com/windowsdeveloper/2024/05/21/unlock-a-new-era-of-innovation-with-windows-copilot-runtime-and-copilot-pcs/">local Co-pilot running with on-device models</a></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!l6k7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!l6k7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!l6k7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png" width="430" height="323.19805194805195" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:463,&quot;width&quot;:616,&quot;resizeWidth&quot;:430,&quot;bytes&quot;:61048,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!l6k7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!l6k7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F92c409c7-79f3-44f3-92a6-54006d5b3e68_616x463.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>RWKV's biggest advantage is its ability to process information like a transformer model, at a fraction of the GPU time, and energy cost. <a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs">Making it one of the world&#8217;s greenest model</a></p><div class="pullquote"><p><strong>The AI model energy usage, is critical, for a laptop&#8217;s battery life.</strong></p></div><p>RWKV is probably used in combination with the Microsoft phi line of models (which handles image processing), to provide</p><ul><li><p><a href="https://blog.rwkv.com/p/eaglex-v2-soaring-past-llama2-7b">best-in-class multi-lingual support</a></p></li><li><p>low computation, batch processing in the background (MS recall)</p></li><li><p>general-purpose chat (though this is probably the phi model)</p></li></ul><p>Its main advantages are its low energy cost and language support.</p><div><hr></div><h2>Fingers crossed on the rollout</h2><p>For now, until the roll-out of offline co-pilot into the Microsoft operating system and/or Office 365. We will be keeping tabs, to see how our models are deployed into Windows.</p><p>We are excited to see what is next, as we scale out the deployment for the RWKV open source foundation model.</p><blockquote><p>Change note: the article was originally citing 0.5 billion which is the estiamted size of windows 11 deployment.<br><br>It has been updated to include windows 10, as we have gotten confirmation for it as well.</p></blockquote><div><hr></div><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[🐦 RWKV v6 Finch 14B is here!]]></title><description><![CDATA[From 14B, 7B, 3B, 1.6B here are the various RWKV v6 models]]></description><link>https://blog.rwkv.com/p/rwkv-v6-finch-14b-is-here</link><guid isPermaLink="false">https://blog.rwkv.com/p/rwkv-v6-finch-14b-is-here</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Tue, 03 Sep 2024 02:44:27 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!R0M0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!R0M0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!R0M0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 424w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 848w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!R0M0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg" width="1024" height="1024" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1024,&quot;width&quot;:1024,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:204186,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!R0M0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 424w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 848w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!R0M0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F10150a75-38e9-44b0-a738-77d907395fe5_1024x1024.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p><strong>Announcing the latest RWKV model: Finch 14B!</strong></p><p>Finch is the 6th and latest version of the RWKV architecture, succeeding the Eagle / v5 lines of models. Finch improves upon Eagle by introducing data-dependence into the token shift and time-mixing, making Finch more efficient in managing its &#8220;long-term memory&#8221; as it processes a prompt, thereby giving it better range. </p><p>The Finch architecture is covered in detail alongside Eagle in <a href="https://arxiv.org/pdf/2404.05892">https://arxiv.org/pdf/2404.05892</a> and Finches smaller than 14B have been appearing throughout the 2024 calendar year, with 14B representing the largest Finch trained to date (also the largest RWKV model - 7B was the maximum size trained of Eagle).</p><p></p><h1>Training details and Evals</h1><p>Both Finch 7B and Finch 14B are derived from continuing training of the Eagle 7B weights on the same dataset (known as World v2.1, the constituents of which are described <a href="https://huggingface.co/BlinkDL/rwkv-6-world">here</a>). The 14B model is derived from stacking two copies of the 7B model. Stacking effectively increases the short-term memory of the model (i.e. how much of the exact prompt feeds into the NN layers at each level) which has a different effect than widening the model.</p><p>We evaluated the Finch models using <a href="https://github.com/RWKV/lm-evaluation-harness">https://github.com/RWKV/lm-evaluation-harness</a>, a fork of the standard LLM evaluation framework which also powers HuggingFace&#8217;s <a href="https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard">Open LLM Leaderboard</a> (fork only to make the harness work via automation). </p><p>We ran a wide variety of benchmarks (235 in total), attempting to maximize breadth, while managing computation time (each of the models took 2 days to eval in our setup (!)).</p><p>Finch 7B improved +5.38% across all benchmarks while Finch 14B improved an additional +7.14% across all benchmarks (both figures relative to Eagle 7B). Given that Eagle 7B was the starting point for training for both models, the fact that there was increase is a given; the <em>amount</em> of increase is evidence of the value of Finch&#8217;s architectural changes, as well as that the depth of the model is not saturated by our data-set run (of 1.42T tokens).</p><p>If we focus specifically on the Open LLM Leaderboard v1 benchmarks, we see</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JmkG!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JmkG!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 424w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 848w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 1272w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JmkG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png" width="1026" height="470" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:470,&quot;width&quot;:1026,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:67012,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JmkG!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 424w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 848w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 1272w, https://substackcdn.com/image/fetch/$s_!JmkG!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F376d3a54-c793-4bc8-a7b6-bf823a7e2416_1026x470.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h1>Contributing GPU cluster time to RWKV!</h1><p>RWKV is an open source project <a href="https://lfaidata.foundation/projects/rwkv/">recognized by the Linux Foundation</a>. There are various bottlenecks to the project, <em>but GPU time is one of them</em>, and we gratefully accept donations. If your organization has idle time please reach out at <a href="mailto:eugene@rwkv.com">eugene@rwkv.com</a> or <a href="mailto:nathan@rwkv.com">nathan@rwkv.com</a> to explore a donation and learn about what kinds of training runs those spare cycles could power.</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support our work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div><h1>References</h1><ul><li><p>Model weights:</p><ul><li><p><a href="https://huggingface.co/RWKV/v6-Finch-14B-HF">14B</a></p></li><li><p><a href="https://huggingface.co/RWKV/v6-Finch-7B-HF">7B</a> </p></li><li><p><a href="https://huggingface.co/RWKV/v6-Finch-3B-HF">3B</a></p></li><li><p><a href="https://huggingface.co/RWKV/v6-Finch-1B6-HF">1.6B</a></p></li></ul></li><li><p>Hosted inference: <a href="https://featherless.ai/models/RWKV/Finch-14B">https://featherless.ai/models/RWKV/Finch-14B</a></p></li><li><p>Training code: <a href="https://github.com/BlinkDL/RWKV-LM">https://github.com/BlinkDL/RWKV-LM</a></p></li><li><p></p></li></ul>]]></content:encoded></item><item><title><![CDATA[🦅 EagleX v2 : Soaring past LLaMA2 7B in both English and Multi-lang evals (RWKV-v5)]]></title><description><![CDATA[You have seen the teaser with the EagleX 1.7T, now its here - the definitive version of linear transformer trained past, LLaMA 2 7B.]]></description><link>https://blog.rwkv.com/p/eaglex-v2-soaring-past-llama2-7b</link><guid isPermaLink="false">https://blog.rwkv.com/p/eaglex-v2-soaring-past-llama2-7b</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Thu, 18 Apr 2024 07:20:08 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!Nnvz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!Nnvz!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!Nnvz!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 424w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 848w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!Nnvz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png" width="1456" height="832" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:832,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:6403497,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!Nnvz!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 424w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 848w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 1272w, https://substackcdn.com/image/fetch/$s_!Nnvz!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F304f2c7a-fc67-4df4-ba57-c6f38f86826c_2688x1536.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><h1>EagleX v2 - in short</h1><p>We extended the training of the previous Eagle 7B from 1.1 Trillion tokens to 2.25 Trillion tokens.</p><ul><li><p><a href="https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers">A continuation based on the original Eagle 7B model</a>, <br><a href="https://substack.recursal.ai/p/eaglex-17t-soaring-past-llama-7b">and the EagleX 1.7T model</a></p></li><li><p><a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs">Ranks as the world&#8217;s greenest 7B model (per token)</a></p></li><li><p>Trained on 2.25 Trillion tokens across 100+ languages</p></li><li><p>Outperforms all 7B class models in multi-lingual benchmarks</p></li><li><p>Passes LLaMA2 (2T) in multiple English evals, approaches Mistral (&gt;2T?)</p></li><li><p><a href="https://www.isattentionallyouneed.com/">All while being an &#8220;Attention-Free Transformer&#8221;</a></p></li></ul><p>We are releasing RWKV-v5 Eagle v2, <a href="https://blog.rwkv.com/p/rwkv-joins-the-linux-foundation-as">licensed under Apache 2.0</a>, which can be used personally or commercially without restrictions.</p><ul><li><p><a href="https://huggingface.co/RWKV/v5-EagleX-v2-7B-HF">Download from HuggingFace</a></p></li><li><p>Try it online today on: <a href="https://recursal.ai/">recursal.ai cloud platform</a></p></li><li><p>Try on: <a href="https://huggingface.co/spaces/RWKV/v5-EagleX-v2-7B-gradio">our HF gradio demo</a></p></li><li><p>Use our reference <a href="https://pypi.org/project/rwkv/">pip inference package</a>, or any other community inference options (<a href="https://github.com/josStorer/RWKV-Runner">Desktop App</a>, <a href="https://github.com/saharNooby/rwkv.cpp">RWKV.cpp</a>, <a href="https://wiki.rwkv.com/basic/play.html">etc</a>), and use it anywhere (even locally)</p></li><li><p><a href="https://github.com/RWKV/RWKV-infctx-trainer">Fine-tune using our Infctx trainer</a></p></li><li><p><a href="https://github.com/huggingface/transformers/pull/26963">[Pending PR] Get support merged into Huggingface transformers!</a></p></li><li><p><a href="https://docs.google.com/spreadsheets/d/1CBLU6yKkW-8FMvGD4INO3qjeHZ0qkKnZFcM6n6lWNOs/edit?usp=sharing">All eval data can be found in the google sheet here</a></p></li></ul><h1>Building on bold claims</h1><p><a href="https://substack.recursal.ai/p/eaglex-17t-soaring-past-llama-7b">The original EagleX 7B 1.7T, trained by Recursal AI</a>, made history as the first sub-quadratic model, to pass llama2 7B 2T on average in English eval. </p><p>Today we are releasing the 2.25T trained variant, which furthers the gap with the llama2 model.</p><blockquote><p>The following report follows the same general format of the 1.7T model release, in eval details - to make direct comparision easier.</p></blockquote><h2>Winning English Perplexity</h2><p>We start with the basics: Perplexity. Which is the loss value against the test dataset (lower score = better), i.e. how good the model is with the next token prediction.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!aihV!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!aihV!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 424w, https://substackcdn.com/image/fetch/$s_!aihV!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 848w, https://substackcdn.com/image/fetch/$s_!aihV!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 1272w, https://substackcdn.com/image/fetch/$s_!aihV!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!aihV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png" width="1152" height="552" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:552,&quot;width&quot;:1152,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:168868,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!aihV!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 424w, https://substackcdn.com/image/fetch/$s_!aihV!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 848w, https://substackcdn.com/image/fetch/$s_!aihV!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 1272w, https://substackcdn.com/image/fetch/$s_!aihV!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe7afd340-4317-489a-817b-00e3dbbc1bb1_1152x552.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>In a major first, the EagleX model - now passes mistral in perplexity. And takes the lead in the 7B model weight class.</p><div class="pullquote"><p><em><strong>Why do experts care about perplexity?<br></strong>Eval in general can be very subjective, and opinion-driven, and commonly give mixed results. Perplexity in a way gives the TLDR summary for most experts to start with</em></p></div><h1>Leading Multi-lang Perplexity &amp; evals</h1><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!TX-F!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!TX-F!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 424w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 848w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 1272w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!TX-F!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png" width="1200" height="332.967032967033" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/bae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:404,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:459841,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!TX-F!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 424w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 848w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 1272w, https://substackcdn.com/image/fetch/$s_!TX-F!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fbae154cf-8fea-404e-a5ee-a6bf65f236f1_3298x914.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!1a7j!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!1a7j!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 424w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 848w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 1272w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!1a7j!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png" width="1200" height="443.4065934065934" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:538,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:305774,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!1a7j!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 424w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 848w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 1272w, https://substackcdn.com/image/fetch/$s_!1a7j!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7fc1910f-2ebf-4e03-a449-ff93de1b0e63_2094x774.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>EagleX maintains the lead for best-in-class multi-lingual performance, with the incremental improvements we&#8217;re making to the Eagle line of models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!7kfa!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!7kfa!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 424w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 848w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 1272w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!7kfa!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png" width="1200" height="358.5164835164835" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:435,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:101919,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!7kfa!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 424w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 848w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 1272w, https://substackcdn.com/image/fetch/$s_!7kfa!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F1fbc77b2-4c44-4cf3-ab09-c709fe5b2455_1739x519.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Most of the tasks here are common sense reasoning tests of a wide variety of formats, across languages including <a href="https://blog.rwkv.com/i/141130059/multi-lingual-performance-details">23 of the world&#8217;s most widely used languages.</a></p><p>For the remaining languages, we urge the community to test and judge them themselves, over 100+ languages were trained. Over time, we would want more languages to be added to evals.</p><div class="pullquote"><p><em><strong>Why is multi-lingual perf important?<br></strong><br>The goal of the RWKV project &amp; Eagle line of models is to build <strong>inclusive</strong> AI for everyone regardless of their language. Our mission is to build AI models not just made for English, but also for the 83% of the world&#8217;s population using a non-English language everyday.</em></p></div><h1>Going big on eval data</h1><p>As per the previous 1.7T model, we ran ALL the benchmarks in EleutherAI `<a href="https://github.com/EleutherAI/lm-evaluation-harness">lm-eval-harness</a>`, at commit `f78e2da`, with the following limitations:</p><ul><li><p>It has to be completed in under 30 minutes on 8x4090 (we were running lots of evals)</p><ul><li><p>This rules out some of the rather more expensive long chain of thought evals</p></li></ul></li><li><p>We excluded all the personality/alignment evals</p></li><li><p>Eval has to be executable across a wide variety of models, via lm-eval-harness</p></li><li><p>All evals are 0 shot (no 5 shot-ing an MCQ question)</p></li><li><p>We limited comparison to other models within the 7B weight class</p></li></ul><p>These resulted in running 60+ major eval groups, which generated over 1,000+ data points per model. A data point count so high, that we had to drop standard error deviations, just to ensure the raw CSV file can be loaded in MacOS numbers.</p><div><hr></div><h1>21 English Evals</h1><p>However, because 180+ evals are overwhelming, let&#8217;s first reduce down to 21 of the arguably most popular English evals, such as Lambada, Glue, Swag, Winogrande, TruthfulQA, MMLU:</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!9WH0!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!9WH0!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 424w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 848w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 1272w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!9WH0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png" width="1456" height="241" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:241,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:388822,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!9WH0!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 424w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 848w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 1272w, https://substackcdn.com/image/fetch/$s_!9WH0!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F4736807d-7e50-4ec4-92de-f2d0ce77db77_2930x484.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Narrowing it down to the models that most of us actually care about - LLaMA, Mistral, EagleX, and Eagle-7b - the new EagleX v2 model outperforms LLaMA-2-7b on average across the 21 evals, and lags not far behind Mistral.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!P4Qb!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!P4Qb!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 424w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 848w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 1272w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!P4Qb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png" width="1434" height="598" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:598,&quot;width&quot;:1434,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:119356,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!P4Qb!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 424w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 848w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 1272w, https://substackcdn.com/image/fetch/$s_!P4Qb!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff6d9a27e-c90b-40ff-bbfd-1012e2e0cd35_1434x598.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Keep in mind that this average shown is across all 21 evals</figcaption></figure></div><div><hr></div><h4><strong>The Good</strong></h4><p>Now, let&#8217;s look at where our model is blowing the rest of the models out of the water.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!xtgt!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!xtgt!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 424w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 848w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 1272w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!xtgt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png" width="728" height="349" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:698,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:360558,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!xtgt!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 424w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 848w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 1272w, https://substackcdn.com/image/fetch/$s_!xtgt!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff2588fb2-57a9-4af7-86a1-23114c6aea53_2012x964.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>First, the big stand out is the following 5 evals, in which both our 1.7T and 2.25T models beat even mistral 2T++ trained model (glue, anli, mmnli, swag), across multiple tasks focused around either contextual-based simple Q&amp;A with common sense reasoning, or deductive logic.</p><blockquote><p>PS: The jump for glue/mnli was high enough, that we needed to check the dataset specifically for contamination. Which we were not be able to find any. This jump is currently being attributed to multiple training datasets, along with data augmented / machine rewritten instruct dataset following a similar structure.</p></blockquote><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!MNVD!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!MNVD!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 424w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 848w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 1272w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!MNVD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png" width="728" height="332.5" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/abcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:665,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:503104,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!MNVD!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 424w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 848w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 1272w, https://substackcdn.com/image/fetch/$s_!MNVD!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fabcf456e-dc9f-4c44-95a9-838cfdd770b9_2444x1116.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>EagleX 2.25T, also performs better than LLaMA-2-7b in, lambada next token prediction and more importantly &#8230;</p><p>winograde, wnli, truthfulqa evals, which imply that the EagleX model would be applicable in RAG use cases, which are mainly contextual Q&amp;A, with the right prompt engineering.</p><div class="pullquote"><p><em>Strong common sense reasoning over context,<br>has very strong applicable use cases for multiple RAG use cases</em></p></div><h4><strong>The Mixed</strong></h4><p>Next: the eval sets with mixed results. </p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lCnS!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lCnS!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 424w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 848w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 1272w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lCnS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png" width="728" height="431" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;normal&quot;,&quot;height&quot;:862,&quot;width&quot;:1456,&quot;resizeWidth&quot;:728,&quot;bytes&quot;:274780,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lCnS!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 424w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 848w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 1272w, https://substackcdn.com/image/fetch/$s_!lCnS!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F3766dffc-d220-4494-9f55-27bbb020500e_1588x940.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>For logiqa, we have very similar evals with 2 major variants. The results between EagleX and LLaMA are close enough, that it&#8217;s hard to say which model is clearly better between the two for these evals.</p><p>Similarly, sciq got slightly worse between 1.7T to 2.25T, but in general, all models are within trading blows of each other at 90%+ scoring.</p><div><hr></div><h4><strong>The &#8220;Not too bad&#8220; and the &#8220;Really Bad&#8221;</strong></h4><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!NdXr!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!NdXr!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 424w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 848w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 1272w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!NdXr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png" width="1456" height="535" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:535,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:473597,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!NdXr!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 424w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 848w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 1272w, https://substackcdn.com/image/fetch/$s_!NdXr!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F9ca92565-b480-497d-91e0-1a4dd43cce5d_2714x998.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>These are the evals the EagleX model performs worse when compared to both Mistral and LLaMA. However, for the evals that we&#8217;ve lost to LLaMA, it&#8217;s typically by a narrow margin. </p><p><a href="https://substack.recursal.ai/i/142632996/the-not-too-bad-and-the-really-bad">One major error that occurred in the 1.7T model</a>, was the accidental exclusion of the math dataset, which caused a degradation of math performance. </p><p>Since then, we have added back math text and math materials. Which boosted the arithmetic score. However, given the number of tokens between 1.7T and 2.25T, and the learning rate, the increase in math score was limited.</p><blockquote><p>Our recommendation still stands that realistically IMO - no one should be depending on a 7B model for math (just saying)</p></blockquote><div><hr></div><h2>180 English Evals</h2><p>As per the previous 1.7T release. let&#8217;s zoom out, and look at it holistically across 180 English evals.</p><p><a href="https://docs.google.com/spreadsheets/d/1CBLU6yKkW-8FMvGD4INO3qjeHZ0qkKnZFcM6n6lWNOs/edit?usp=sharing">You can view the full results here</a></p><p>Although using the overall averages across all the evals does have a bias on the results towards larger eval sets (due to double counting, e.g. mmlu overall and many individual mmlu test), it does not change the ranking among the EagleX, Mistral, LLaMA and the original Eagle models.</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bL_m!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bL_m!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 424w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 848w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 1272w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bL_m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png" width="1154" height="916" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:916,&quot;width&quot;:1154,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:193443,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bL_m!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 424w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 848w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 1272w, https://substackcdn.com/image/fetch/$s_!bL_m!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F541e0bbe-08fd-4afd-9d1a-8c07a64cf342_1154x916.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>However, these results are useful for smaller insights (as per the previous model as well). Such as measuring specifically gaps in knowledge by &#8220;subject domain&#8221; within the models.</p><div><hr></div><h1>Perhaps a good dataset + Scalable architecture:<br>is all you need?</h1><p>The RWKV Open Source Foundation's goal is to ensure AI access is made accessible to everyone in the world, regardless of language or economic status.</p><p>In line with our goal, it does repeat the question. If the exact architecture, matter less than the data for the model performance?</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RpeI!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RpeI!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 424w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 848w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 1272w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RpeI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png" width="936" height="674" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/a8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:674,&quot;width&quot;:936,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:82395,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RpeI!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 424w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 848w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 1272w, https://substackcdn.com/image/fetch/$s_!RpeI!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fa8ac11c6-e329-4db8-8e18-3a29a0019a68_936x674.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">CUDA computational time, for RWKV-based architecture -vs- transformer architecture: that quadratic-vs-linear really scales!</figcaption></figure></div><p>If true, perhaps we should seek more efficient and scalable architecture, to increase accessibility for everyone regardless of language or economic status. </p><p>All while going beyond the English language, which represents only 17% of the global population.</p><p>And <a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs">reducing the impact on our environment.</a></p><div><hr></div><h1>What&#8217;s next for the RWKV group?</h1><p>This release marks the final release of the Eagle line of RWKV models. With the finalization of the v6 architecture as outlined in the paper here - <a href="https://arxiv.org/abs/2404.05892">https://arxiv.org/abs/2404.05892</a></p><p>Our next step is to move onto the v6 Finch line of architecture, which we expect to bring an incremental improvement on the v5 Eagle architecture.</p><p>This is made in consideration, that upcycling from Eagle to Finch line of models works from our existing experiments.</p><p><strong>Roadmap</strong></p><ul><li><p>v6 Finch: 0.1B, 1.6B, 3B model release</p></li><li><p>v6 Finch: 7B, 14B training, this would be an upcycle of the Eagle models</p></li><li><p>MoE: (approximately) 8 x 22B</p></li></ul><p></p><p>Basically newer, better, bigger models - as we keep iterating on our goal to build a multi-lingual GPT4 class model, in open-source space, that can run on commodity hardware.</p><p>And ensure AI is accessible to everyone, regardless of language, or economic status.</p><div><hr></div><h1>Acknowledgment</h1><p>We are grateful and would like to thank the following key groups:</p><ul><li><p><a href="https://recursal.ai/">Recursal.ai</a> team for financing the GPU resources, and managing the training of this foundation model - you can run the Eagle line of RWKV models on their cloud / on-premise platform today.</p></li><li><p><a href="https://www.eleuther.ai/">EleutherAI</a> for their support, especially in the v5/v6 Eagle/Finch paper</p></li><li><p><a href="https://lfaidata.foundation/">Linux Foundation AI &amp; Data group</a> for supporting and hosting the RWKV project</p></li></ul><p>Along with the various developers, working on the growing collection of <a href="https://wiki.rwkv.com/">RWKV-related projects</a>.</p><p></p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[🦅 Eagle & 🐦 Finch - architecture paper is here]]></title><description><![CDATA[Available at your local arxiv]]></description><link>https://blog.rwkv.com/p/eagle-and-finch-architecture-paper</link><guid isPermaLink="false">https://blog.rwkv.com/p/eagle-and-finch-architecture-paper</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Wed, 10 Apr 2024 05:58:59 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!ZlR3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ZlR3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ZlR3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 424w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 848w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ZlR3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg" width="1456" height="1657" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1657,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:577701,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/jpeg&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ZlR3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 424w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 848w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 1272w, https://substackcdn.com/image/fetch/$s_!ZlR3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F18309682-903e-4f7e-bfc2-e2c8b12f30d4_1629x1854.jpeg 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>We present the Eagle and Finch architecture paper at arxiv: <a href="https://arxiv.org/abs/2404.05892">https://arxiv.org/abs/2404.05892</a></p><p>Which covers and documents the architecture changes from RWKV-v4 onwards. This paper is a collaborative effort with the folks at Eleuther AI, who helped us in the paper-writing process</p><p>Special shout-out to</p><ul><li><p>BlinkDL: The creator of RWKV project</p></li><li><p>Eleuther AI: Who helped us throughout the paper writing process</p></li><li><p>Linux Foundation AI &amp; Data: For hosting our project</p></li><li><p>Stability AI: Who sponsored the bulk of the compute, for the models covered.</p></li></ul><div><hr></div><p>Does this cover our latest model? </p><p>No - this covers our previously released Eagle and Finch line of models, trained up to 1.1T tokens </p><p>A reminder, that as a fully Open Source project, we release in the following sequence: Code, Weights, then the paper Not the other way around</p><div><hr></div><p>Stay tuned for more details on our upcoming models this week</p><ul><li><p>Eagle: 2.25T 7B </p></li><li><p>Finch: 2.5T 1.6B </p></li></ul><p>(Some of you probably already know where to find it, if you search through our repos / discord)</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[🦅 Eagle 7B : Soaring past Transformers with 1 Trillion Tokens Across 100+ Languages (RWKV-v5)]]></title><description><![CDATA[A brand new era for the RWKV-v5 architecture and linear transformer's has arrived - with the strongest multi-lingual model in open source today]]></description><link>https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers</link><guid isPermaLink="false">https://blog.rwkv.com/p/eagle-7b-soaring-past-transformers</guid><dc:creator><![CDATA[Eugene Cheah]]></dc:creator><pubDate>Mon, 29 Jan 2024 01:22:56 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!lHFF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!lHFF!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!lHFF!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 424w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 848w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 1272w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!lHFF!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png" width="1200" height="967.5824175824176" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:1174,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:7661615,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!lHFF!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 424w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 848w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 1272w, https://substackcdn.com/image/fetch/$s_!lHFF!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6bbd31a7-21b4-4ff6-b43f-8735d1decf25_2048x1652.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">An eagle, flying past a transformer-looking robot</figcaption></figure></div><h1>Eagle 7B - in short</h1><p>Eagle 7B is a 7.52B parameter model that:</p><ul><li><p>Built on the <a href="https://wiki.rwkv.com">RWKV-v5 architecture</a><br>(a linear transformer with 10-100x+ lower inference cost)</p></li><li><p><a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs">Ranks as the world&#8217;s greenest 7B model (per token)</a></p></li><li><p>Trained on 1.1 Trillion Tokens across 100+ languages</p></li><li><p>Outperforms all 7B class models in multi-lingual benchmarks</p></li><li><p>Approaches Falcon (1.5T), LLaMA2 (2T), Mistral (&gt;2T?) level of performance in English evals</p></li><li><p>Trade blows with MPT-7B (1T) in English evals</p></li><li><p><a href="https://www.isattentionallyouneed.com/">All while being an &#8220;Attention-Free Transformer&#8221;</a></p></li><li><p>Is a foundation model, with a very small instruct tune - further fine-tuning is required for various use cases!<br></p></li></ul><p>We are releasing RWKV-v5 Eagle 7B, <a href="https://blog.rwkv.com/p/rwkv-joins-the-linux-foundation-as">licensed as Apache 2.0 license, under the Linux Foundation</a>, and can be used personally or commercially without restrictions</p><ul><li><p><a href="https://huggingface.co/RWKV/v5-Eagle-7B/blob/main/RWKV-v5-Eagle-World-7B-v2-20240128-ctx4096.pth">Download from Huggingface</a>, and use it anywhere (even locally)</p></li><li><p>Use our reference <a href="https://pypi.org/project/rwkv/">pip inference package</a>, or any other community inference options (<a href="https://github.com/josStorer/RWKV-Runner">Desktop App</a>, <a href="https://github.com/saharNooby/rwkv.cpp">RWKV.cpp</a>, <a href="https://wiki.rwkv.com/basic/play.html">etc</a>)</p></li><li><p><a href="https://github.com/RWKV/RWKV-infctx-trainer">Fine-tune using our Infctx trainer</a></p></li><li><p><a href="https://huggingface.co/spaces/BlinkDL/RWKV-Gradio-2">Try it online on Huggingface</a></p></li><li><p><a href="https://github.com/huggingface/transformers/pull/26963">[Pending PR] Get it merged into Huggingface transformers!</a></p><p></p></li></ul><h1>Multi-Lingual Performance details</h1><p>We performed multi-lingual performance across the following benchmarks: <a href="https://github.com/EleutherAI/lm-evaluation-harness?tab=readme-ov-file#advanced-usage-tips">xLAMBDA</a>, <a href="https://huggingface.co/datasets/Muennighoff/xstory_cloze">xStoryCloze</a>, <a href="https://huggingface.co/datasets/Muennighoff/xwinograd">xWinograd</a>, <a href="https://huggingface.co/datasets/xcopa">xCopa</a></p><p>Across a total of 23 languages</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!eh8Y!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!eh8Y!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 424w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 848w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 1272w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!eh8Y!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png" width="1200" height="362.6373626373626" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:440,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:185118,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!eh8Y!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 424w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 848w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 1272w, https://substackcdn.com/image/fetch/$s_!eh8Y!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F432a507a-d2a1-4741-b196-d34a411aa960_1462x442.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!RacH!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!RacH!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 424w, https://substackcdn.com/image/fetch/$s_!RacH!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 848w, https://substackcdn.com/image/fetch/$s_!RacH!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 1272w, https://substackcdn.com/image/fetch/$s_!RacH!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!RacH!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png" width="1200" height="358.5164835164835" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:435,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:129293,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!RacH!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 424w, https://substackcdn.com/image/fetch/$s_!RacH!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 848w, https://substackcdn.com/image/fetch/$s_!RacH!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 1272w, https://substackcdn.com/image/fetch/$s_!RacH!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F51648027-6abf-4b05-88c0-06175e3b9cb5_1739x519.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>Most of these benchmarks cover common sense reasoning, in their respective languages. And show a huge overall jump in multi-lingual performance for RWKV v4-to-v5 architecture. And the v2 world dataset.</p><p>It should also be noted, that there is a lack of multi-lingual benchmarks, as the above covers approximately the top 23 languages.</p><p>This makes it hard to evaluate model language performance directly over the remaining 75+ languages, over the total 100+ trained languages. A shortcoming we hope to improve in future models.</p><h1>English Performance details</h1><p>English performance was measured across 12 separate benchmarks, across commonsense reasoning, and world knowledge</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!FcpW!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!FcpW!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 424w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 848w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 1272w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!FcpW!,w_2400,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png" width="1200" height="206.86813186813185" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;large&quot;,&quot;height&quot;:251,&quot;width&quot;:1456,&quot;resizeWidth&quot;:1200,&quot;bytes&quot;:349737,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-large" alt="" srcset="https://substackcdn.com/image/fetch/$s_!FcpW!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 424w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 848w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 1272w, https://substackcdn.com/image/fetch/$s_!FcpW!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F6ea68f59-8f76-48ea-b4c6-15f292c8a46c_2559x441.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a></figure></div><p>Once again we see a huge overall jump from RWKV v4-to-v5 architecture. And the v2 world dataset.</p><p>Where v4 previously lost out to MPT-7b, the top model in the 1T token tier. </p><p>v5 begins trading blows in benchmarks, in some cases even coming on top in certain benchmarks ( LAMBADA, StoryCloze16, WinoGrande, HeadQA_en, Sciq ) over Falcon, or even llama2.</p><p>In addition, v5 performance starts to fall in line with the expected transformer performance level, with its given approximate token training count.</p><p>With Mistral-7B maintaining its lead with its rumored 2~7 Trillion token training.</p><p>We expect to narrow the gap, as we train an additional 1T token, to cross the llama2 line and hopefully reach the mistral line.</p><p>Alternatively, as a base model, which is lightly tuned (really small instruct set mixed in), we are eager to see how the various community and instruct-tuned variants</p><div><hr></div><h1>Perhaps a good dataset + Scalable architecture:<br>is all you need?</h1><p>A notable observation was that our checkpoints near the 300 Billion token point, show similar performance to pythia-6.9b</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!_0Q3!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!_0Q3!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 424w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 848w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 1272w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!_0Q3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png" width="988" height="268" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:268,&quot;width&quot;:988,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:85110,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!_0Q3!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 424w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 848w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 1272w, https://substackcdn.com/image/fetch/$s_!_0Q3!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F97b32c55-9a16-4a70-9e1e-8d2370721660_988x268.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>This is consistent with previous pile-based experiments on our RWKV-v4 architecture, that linear transformers like RWKV scale similarly in performance levels to transformers, with the same token count training.</p><p>If so, it does repeat the question. If the exact architecture, matter less than the data for the model eval performance?</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!vG8-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!vG8-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!vG8-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png" width="616" height="463" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/f819aa3e-d559-41a9-879a-2a157c323787_616x463.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:463,&quot;width&quot;:616,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:61048,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!vG8-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!vG8-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Ff819aa3e-d559-41a9-879a-2a157c323787_616x463.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">CUDA computational cost, for RWKV-based architecture vs transformer models - that quadratic-vs-linear really scales!</figcaption></figure></div><p>If true, perhaps we should seek more efficient and scalable architecture, to increase accessibility, drive the cost of AI downwards for everyone, and <a href="https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs">lessen the impact on our environment.</a></p><div><hr></div><h1>Building inclusive AI for everyone in this world - not just the English</h1><p>A common feedback we receive for the RWKV multi-lingual approach is</p><ul><li><p>it hurts our English evaluation scores and slows the growth of linear transformers </p></li><li><p>that it is not fair to compare the multi-lingual performance of a multi-lingual model -vs- a purely English model</p></li></ul><p>And for most parts, we agree on both points.</p><p>But we have no plans on changing this, as we are building AI for the world - which is not just an English world.</p><p><a href="https://preply.com/en/blog/english-language-statistics/#:~:text=Current%20research%20suggests%20that%20the,widely%20spoken%20language%20in%202022%3F">In 2023, only 17% of the world's population speaks English</a><br><a href="https://preply.com/en/blog/english-language-statistics/#:~:text=Current%20research%20suggests%20that%20the,widely%20spoken%20language%20in%202022%3F">( 1.3 billion people ) </a></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!bKV7!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!bKV7!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 424w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 848w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 1272w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!bKV7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png" width="1456" height="752" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:752,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:604186,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!bKV7!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 424w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 848w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 1272w, https://substackcdn.com/image/fetch/$s_!bKV7!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F731bdcef-10fa-4394-8edf-5448279d9e78_3002x1551.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">World Map showing the distribution of regions and people who are fluent in English (source: <a href="https://en.wikipedia.org/wiki/List_of_countries_by_English-speaking_population">Wikipedia</a>)</figcaption></figure></div><p>However, by ensuring support for the top 25 languages in the world and beyond, we can cover approximately <a href="https://en.wikipedia.org/wiki/List_of_languages_by_number_of_native_speakers#Top_languages_by_population">4 billion people, or 50% of the world</a></p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!ycrf!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!ycrf!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 424w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 848w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 1272w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!ycrf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png" width="1456" height="767" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:767,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:2010270,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!ycrf!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 424w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 848w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 1272w, https://substackcdn.com/image/fetch/$s_!ycrf!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F7cac37b8-7e37-4b83-8c10-3e2364562c6b_6460x3401.png 1456w" sizes="100vw" loading="lazy"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Flawed map, highlighting where the eagle language model will support entirely or partially - the goal is to be able paint the whole map green with confidence</figcaption></figure></div><p>This aligns well with the team&#8217;s common goal, of getting AI to support everyone, not just by allowing it to run cheaply and affordably even on lower-end hardware. But by supporting their language.</p><p>Over time, we intend to grow the multi-lingual dataset, to support a wider variety of languages, and to slowly grow that coverage to 100% of the world - to ensure no language gets left behind.</p><div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!fse8!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!fse8!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 424w, https://substackcdn.com/image/fetch/$s_!fse8!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 848w, https://substackcdn.com/image/fetch/$s_!fse8!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 1272w, https://substackcdn.com/image/fetch/$s_!fse8!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!fse8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png" width="472" height="225" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:225,&quot;width&quot;:472,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:23370,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:true,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!fse8!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 424w, https://substackcdn.com/image/fetch/$s_!fse8!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 848w, https://substackcdn.com/image/fetch/$s_!fse8!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 1272w, https://substackcdn.com/image/fetch/$s_!fse8!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F5ad5b93b-9738-4ce7-93f5-0f4f18ef3bbf_472x225.png 1456w" sizes="100vw" loading="lazy"></picture><div></div></div></a><figcaption class="image-caption">The RWKV discord community today grew due to our low inference cost, and its wide range of support for various languages.<a href="https://discord.com/invite/T5JGfMvWA5">(https://discord.com/invite/T5JGfMvWA5)</a></figcaption></figure></div><p>A major example of this in our community is the <a href="https://discord.gg/dy9YWXjV">Indonesian-NLP discord group</a>, which finetunes an Indonesian language model from the RWKV line of base models.</p><p>Allowing them to build strong language-specific models - on a cheap affordable basis (ie. single node), without needing to do half a million dollars of pre-training.</p><div><hr></div><h1>Future Plans</h1><p>This release marks the release of the strongest linear transformer (in terms of eval benchmarks) to date. </p><p>While it may not have succeeded in passing LLaMA2 and Mistral. It provides strong evidence of the following</p><ul><li><p>The RWKV-v5 model architecture scales similarly to transformer performance with a similar token count</p></li><li><p>You can achieve a near LLaMA2-like level of performance, with a substantially lower inference cost</p></li><li><p>While supporting multi-lingual levels of performance<br></p></li></ul><p>We plan to follow by pushing further ahead with</p><ul><li><p>[Feb 2024] An updated RWKV v5: Eagle paper, where we will go deeper in-depth on the architecture changes since v4, and the model benchmarks and evals</p></li><li><p>[Feb 2024] A further 1T token in training (2T total), for direct comparisons with the LLaMA2 7B model</p></li><li><p>[Mar 2024] An MoE model based on the v5 Eagle 2T model</p></li><li><p>[Mar 2024] RWKV-v6: &#8220;Finch&#8221; 1.5B, 3B world models</p></li></ul><blockquote><p>Disclaimer: All dates are approximate, and is heavily subjected to compute avaliability from our sponsors/provider</p></blockquote><p>Find more about the RWKV Project at </p><ul><li><p>Wiki: <a href="https://wiki.rwkv.com/">https://wiki.rwkv.com/</a></p></li><li><p>Discord: <a href="https://discord.gg/bDSBUMeFpc">https://discord.gg/bDSBUMeFpc</a></p></li></ul><div><hr></div><h1>Acknowledgment</h1><p>We are grateful and would like to thank the following key groups:</p><ul><li><p><a href="https://stability.ai/">StabilityAI</a> for the bulk of the computing provided to train this foundation model</p></li><li><p><a href="https://www.eleuther.ai/">EleutherAI</a> for their support, especially in the ongoing paper-writing process</p></li><li><p><a href="https://lfaidata.foundation/">Linux Foundation AI &amp; Data group</a> for supporting and hosting the RWKV project</p></li></ul><p>Along with the various developers, working on the growing collection of <a href="https://wiki.rwkv.com">RWKV-related projects</a>.</p><div class="subscription-widget-wrap-editor" data-attrs="{&quot;url&quot;:&quot;https://blog.rwkv.com/subscribe?&quot;,&quot;text&quot;:&quot;Subscribe&quot;,&quot;language&quot;:&quot;en&quot;}" data-component-name="SubscribeWidgetToDOM"><div class="subscription-widget show-subscribe"><div class="preamble"><p class="cta-caption">Thanks for reading RWKV Open Source Development Blog! Subscribe for free to receive new posts and support my work.</p></div><form class="subscription-widget-subscribe"><input type="email" class="email-input" name="email" placeholder="Type your email&#8230;" tabindex="-1"><input type="submit" class="button primary" value="Subscribe"><div class="fake-input-wrapper"><div class="fake-input"></div><div class="fake-button"></div></div></form></div></div>]]></content:encoded></item><item><title><![CDATA[🌳 The World's Greenest AI Model: RWKV's Pioneering Sustainability]]></title><description><![CDATA[10-100x lower inference cost = lower carbon footprint]]></description><link>https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs</link><guid isPermaLink="false">https://blog.rwkv.com/p/the-worlds-greenest-ai-model-rwkvs</guid><dc:creator><![CDATA[RWKV]]></dc:creator><pubDate>Sun, 28 Jan 2024 19:35:32 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!QxrX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>With the rapidly growing usage of AI models worldwide, and the threat of global warming. The need for a greener AI model to reduce our carbon footprint is more important than ever.</p><p>In that regard, we are proud to say that <a href="https://wiki.rwkv.com/">RWKV</a> (the model which our team works on), has been <a href="https://ml.energy/leaderboard/">independently benchmarked</a> as the world's greenest and most energy-efficient AI model/architecture, on a per token output basis, for models of the same param sizes (7B params).</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!QxrX!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!QxrX!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 424w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 848w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 1272w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!QxrX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png" width="1456" height="1052" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:1052,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:175404,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!QxrX!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 424w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 848w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 1272w, https://substackcdn.com/image/fetch/$s_!QxrX!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F93c2d6b5-794b-4167-b012-2dfedef82fd3_1592x1150.png 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Benchmarks was done for 7B weight class at : <a href="https://ml.energy/leaderboard/">https://ml.energy/leaderboard/</a></figcaption></figure></div><p>The energy efficiency of the RWKV architecture is derived from the 10-100 times compute efficiency of our linear transformer architecture vs the quadratic scaling of transformer architectures. A benefit we expect to scale better as our models get large</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!JddR!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!JddR!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!JddR!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!JddR!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!JddR!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!JddR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png" width="616" height="463" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/c3670048-19d2-4da6-94f4-0866a5a38640_616x463.png&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:463,&quot;width&quot;:616,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:61048,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/png&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:false,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!JddR!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 424w, https://substackcdn.com/image/fetch/$s_!JddR!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 848w, https://substackcdn.com/image/fetch/$s_!JddR!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 1272w, https://substackcdn.com/image/fetch/$s_!JddR!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fc3670048-19d2-4da6-94f4-0866a5a38640_616x463.png 1456w" sizes="100vw"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a><figcaption class="image-caption">Graph is taken from the <a href="https://arxiv.org/abs/2305.13048">RWKV-v4 paper</a>: 5 x Cheaper compute at 1k tokens, 10x Cheaper compute at 2k tokens, 100x+ Cheaper beyond 20k tokens</figcaption></figure></div><p>Combined with how RWKV models scales similarly to transformers in evals, against other models with the same dataset.</p><p>The industry wide benefits for scaling more energy efficient architecture, like RWKV, will be significant for our industry as a whole.</p><blockquote><p>Towards a future with more, not less alternatives to AI, <br>with the various unique benefits each architecture will bring us.</p></blockquote><blockquote><p>Repost note: This is a repost of a <a href="https://substack.recursal.ai/p/the-worlds-greenest-ai-model-rwkvs">past blogpost</a>, prior to the setup of this blog</p></blockquote>]]></content:encoded></item><item><title><![CDATA[🐣 RWKV v5 1.5B - Achieves SOTA multi-lingual performance]]></title><description><![CDATA[The best AI model in the smol <2B param weight class has arrived]]></description><link>https://blog.rwkv.com/p/rwkv-v5-15b-achieves-sota-multi-lingual</link><guid isPermaLink="false">https://blog.rwkv.com/p/rwkv-v5-15b-achieves-sota-multi-lingual</guid><pubDate>Tue, 23 Jan 2024 21:55:46 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!8gia!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp" length="0" type="image/jpeg"/><content:encoded><![CDATA[<div class="captioned-image-container"><figure><a class="image-link image2" target="_blank" href="https://substackcdn.com/image/fetch/$s_!8gia!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp" data-component-name="Image2ToDOM"><div class="image2-inset image2-full-screen"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!8gia!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 424w, https://substackcdn.com/image/fetch/$s_!8gia!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 848w, https://substackcdn.com/image/fetch/$s_!8gia!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 1272w, https://substackcdn.com/image/fetch/$s_!8gia!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!8gia!,w_5760,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/e783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:false,&quot;imageSize&quot;:&quot;full&quot;,&quot;height&quot;:307,&quot;width&quot;:1456,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:128674,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-fullscreen" alt="" srcset="https://substackcdn.com/image/fetch/$s_!8gia!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 424w, https://substackcdn.com/image/fetch/$s_!8gia!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 848w, https://substackcdn.com/image/fetch/$s_!8gia!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 1272w, https://substackcdn.com/image/fetch/$s_!8gia!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2Fe783ea60-ad51-4c63-98bf-7d712a48aa4b_1456x307.webp 1456w" sizes="100vw" fetchpriority="high"></picture><div></div></div></a></figure></div><p>RWKV v5 1.5B achieves SOTA status with</p><ul><li><p>Industry leading multi-lingual performance (across xLBD, xSC, xWG, xCOPA benchmarks) by significant margins, against all existing models</p></li><li><p>Comparable performance to falcon-rw-1b in english based benchmark </p><ul><li><p>We win out in LAMBDA, StoryCloze16, arch_challenge, arc_easy, headQA_en, openbookQA, sciq, COPA </p></li><li><p>but looses out very slightly on PIQA, Hellaswag, WinoGrade,ReCoRD, COPA</p></li></ul></li></ul><p>For nearly all use cases under the 2B param model class, RWKV V5 now represents either the best model for multi-lingual use, or a tied 1st place model with falcon-rw-1b</p><p>Making this a strong default model of choice within its weight class.</p><p>A pattern we intend to repeat in the 3, 7, and 14B weight classes respectively. We expect the 3B model to be out by first week december.</p><div><hr></div><p>You can access the model today via the following options</p><ul><li><p>Public Demo: <a href="https://huggingface.co/spaces/BlinkDL/ChatRWKV-gradio">https://huggingface.co/spaces/BlinkDL/ChatRWKV-gradio</a></p></li></ul><ul><li><p>Model Download : <a href="https://huggingface.co/BlinkDL/rwkv-5-world/tree/main">https://huggingface.co/BlinkDL/rwkv-5-world/tree/main</a></p></li></ul><blockquote><p>This is a repost of a past event, prior to the setup of this blog</p></blockquote>]]></content:encoded></item><item><title><![CDATA[🏘️ RWKV joins the Linux Foundation - As the first AI model under the Generative AI Commons]]></title><description><![CDATA[Putting the "Open Source" into "Open AI"]]></description><link>https://blog.rwkv.com/p/rwkv-joins-the-linux-foundation-as</link><guid isPermaLink="false">https://blog.rwkv.com/p/rwkv-joins-the-linux-foundation-as</guid><pubDate>Tue, 23 Jan 2024 21:51:51 GMT</pubDate><enclosure url="https://substackcdn.com/image/fetch/$s_!C5Z-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp" length="0" type="image/jpeg"/><content:encoded><![CDATA[<p>RWKV is the worlds first open source AI model to join the Linux Foundation.</p><p>Ensuring that RWKV continues to grow as a true OSS model (Just Apache 2 license) By the community, for the world </p><p>Thanks <a href="https://twitter.com/LFAIDataFdn">@LFAIDataFdn</a> for welcoming us on board OSS summit</p><div class="captioned-image-container"><figure><a class="image-link image2 is-viewable-img" target="_blank" href="https://substackcdn.com/image/fetch/$s_!C5Z-!,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp" data-component-name="Image2ToDOM"><div class="image2-inset"><picture><source type="image/webp" srcset="https://substackcdn.com/image/fetch/$s_!C5Z-!,w_424,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 424w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_848,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 848w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_1272,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 1272w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_1456,c_limit,f_webp,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 1456w" sizes="100vw"><img src="https://substackcdn.com/image/fetch/$s_!C5Z-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp" width="854" height="709" data-attrs="{&quot;src&quot;:&quot;https://substack-post-media.s3.amazonaws.com/public/images/2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp&quot;,&quot;srcNoWatermark&quot;:null,&quot;fullscreen&quot;:null,&quot;imageSize&quot;:null,&quot;height&quot;:709,&quot;width&quot;:854,&quot;resizeWidth&quot;:null,&quot;bytes&quot;:67960,&quot;alt&quot;:null,&quot;title&quot;:null,&quot;type&quot;:&quot;image/webp&quot;,&quot;href&quot;:null,&quot;belowTheFold&quot;:false,&quot;topImage&quot;:true,&quot;internalRedirect&quot;:null,&quot;isProcessing&quot;:false,&quot;align&quot;:null,&quot;offset&quot;:false}" class="sizing-normal" alt="" srcset="https://substackcdn.com/image/fetch/$s_!C5Z-!,w_424,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 424w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_848,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 848w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_1272,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 1272w, https://substackcdn.com/image/fetch/$s_!C5Z-!,w_1456,c_limit,f_auto,q_auto:good,fl_progressive:steep/https%3A%2F%2Fsubstack-post-media.s3.amazonaws.com%2Fpublic%2Fimages%2F2f4a9428-1ba2-4e1a-b990-4ed26ec5b5d9_854x709.webp 1456w" sizes="100vw" fetchpriority="high"></picture><div class="image-link-expand"><div class="pencraft pc-display-flex pc-gap-8 pc-reset"><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container restack-image"><svg role="img" width="20" height="20" viewBox="0 0 20 20" fill="none" stroke-width="1.5" stroke="var(--color-fg-primary)" stroke-linecap="round" stroke-linejoin="round" xmlns="http://www.w3.org/2000/svg"><g><title></title><path d="M2.53001 7.81595C3.49179 4.73911 6.43281 2.5 9.91173 2.5C13.1684 2.5 15.9537 4.46214 17.0852 7.23684L17.6179 8.67647M17.6179 8.67647L18.5002 4.26471M17.6179 8.67647L13.6473 6.91176M17.4995 12.1841C16.5378 15.2609 13.5967 17.5 10.1178 17.5C6.86118 17.5 4.07589 15.5379 2.94432 12.7632L2.41165 11.3235M2.41165 11.3235L1.5293 15.7353M2.41165 11.3235L6.38224 13.0882"></path></g></svg></button><button tabindex="0" type="button" class="pencraft pc-reset pencraft icon-container view-image"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-maximize2 lucide-maximize-2"><polyline points="15 3 21 3 21 9"></polyline><polyline points="9 21 3 21 3 15"></polyline><line x1="21" x2="14" y1="3" y2="10"></line><line x1="3" x2="10" y1="21" y2="14"></line></svg></button></div></div></div></a></figure></div><p>More information can be found here on the official press briefing: <br><a href="https://lfaidata.foundation/blog/2023/09/21/lf-ai-data-launches-generative-ai-commons/">https://lfaidata.foundation/blog/2023/09/21/lf-ai-data-launches-generative-ai-commons/</a></p><blockquote><p>This is a repost of a past event, prior to the setup of this blog</p></blockquote><p></p>]]></content:encoded></item></channel></rss>