<?xml version="1.0"?>
<dblpperson name="Nolan Dey" pid="263/9353" n="14">
<person key="homepages/263/9353" mdate="2025-05-13">
<author pid="263/9353">Nolan Dey</author>
<author pid="263/9353">Nolan S. Dey</author>
<author pid="263/9353">Nolan Simran Dey</author>
</person>
<r><article key="journals/tmlr/DeyTWTT25" mdate="2025-06-20">
<author pid="263/9353">Nolan Simran Dey</author>
<author pid="278/2473">J. Eric Taylor</author>
<author pid="52/4401">Alexander Wong</author>
<author pid="52/8763">Bryan P. Tripp</author>
<author pid="17/1633">Graham W. Taylor</author>
<title>Neuron-based explanations of neural networks sacrifice completeness and interpretability.</title>
<year>2025</year>
<volume>2025</volume>
<journal>Trans. Mach. Learn. Res.</journal>
<ee type="oa">https://openreview.net/forum?id=UWNa9Pv6qA</ee>
<url>db/journals/tmlr/tmlr2025.html#DeyTWTT25</url>
</article>
</r>
<r><inproceedings key="conf/iclr/BergsmaDGGSH25" mdate="2025-05-15">
<author pid="57/2540">Shane Bergsma</author>
<author pid="263/9353">Nolan Simran Dey</author>
<author pid="344/4400">Gurpreet Gosal</author>
<author pid="392/4399">Gavia Gray</author>
<author pid="277/0944">Daria Soboleva</author>
<author pid="60/3063">Joel Hestness</author>
<title>Straight to Zero: Why Linearly Decaying the Learning Rate to Zero Works Best for LLMs.</title>
<year>2025</year>
<booktitle>ICLR</booktitle>
<ee type="oa">https://openreview.net/forum?id=hrOlBgHsMI</ee>
<crossref>conf/iclr/2025</crossref>
<url>db/conf/iclr/iclr2025.html#BergsmaDGGSH25</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2502-15938" mdate="2025-03-20">
<author pid="57/2540">Shane Bergsma</author>
<author pid="263/9353">Nolan Dey</author>
<author pid="344/4400">Gurpreet Gosal</author>
<author pid="392/4399">Gavia Gray</author>
<author pid="277/0944">Daria Soboleva</author>
<author pid="60/3063">Joel Hestness</author>
<title>Straight to Zero: Why Linearly Decaying the Learning Rate to Zero Works Best for LLMs.</title>
<year>2025</year>
<month>February</month>
<volume>abs/2502.15938</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2502.15938</ee>
<url>db/journals/corr/corr2502.html#abs-2502-15938</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2505-01618" mdate="2025-10-14">
<author pid="263/9353">Nolan Dey</author>
<author pid="407/6648">Bin Claire Zhang</author>
<author pid="268/6839">Lorenzo Noci</author>
<author pid="350/5111">Mufan Bill Li</author>
<author pid="228/6993">Blake Bordelon</author>
<author pid="57/2540">Shane Bergsma</author>
<author pid="145/3480">Cengiz Pehlevan</author>
<author orcid="0000-0003-1614-0131" pid="205/2534">Boris Hanin</author>
<author pid="60/3063">Joel Hestness</author>
<title>Don't be lazy: CompleteP enables compute-efficient deep transformers.</title>
<year>2025</year>
<month>May</month>
<volume>abs/2505.01618</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2505.01618</ee>
<url>db/journals/corr/corr2505.html#abs-2505-01618</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2505-13738" mdate="2025-06-25">
<author pid="57/2540">Shane Bergsma</author>
<author pid="263/9353">Nolan Dey</author>
<author pid="344/4400">Gurpreet Gosal</author>
<author pid="392/4399">Gavia Gray</author>
<author pid="277/0944">Daria Soboleva</author>
<author pid="60/3063">Joel Hestness</author>
<title>Power Lines: Scaling Laws for Weight Decay and Batch Size in LLM Pre-training.</title>
<year>2025</year>
<month>May</month>
<volume>abs/2505.13738</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2505.13738</ee>
<url>db/journals/corr/corr2505.html#abs-2505-13738</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2509-25087" mdate="2025-10-21">
<author pid="57/2540">Shane Bergsma</author>
<author pid="407/6648">Bin Claire Zhang</author>
<author pid="263/9353">Nolan Dey</author>
<author pid="206/2867">Shaheer Muhammad</author>
<author pid="344/4400">Gurpreet Gosal</author>
<author pid="60/3063">Joel Hestness</author>
<title>Scaling with Collapse: Efficient and Predictable Training of LLM Families.</title>
<year>2025</year>
<month>September</month>
<volume>abs/2509.25087</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2509.25087</ee>
<url>db/journals/corr/corr2509.html#abs-2509-25087</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2509-25380" mdate="2025-10-21">
<author pid="57/2540">Shane Bergsma</author>
<author pid="263/9353">Nolan Dey</author>
<author pid="60/3063">Joel Hestness</author>
<title>Predicting Training Re-evaluation Curves Enables Effective Data Curriculums for LLMs.</title>
<year>2025</year>
<month>September</month>
<volume>abs/2509.25380</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2509.25380</ee>
<url>db/journals/corr/corr2509.html#abs-2509-25380</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><inproceedings key="conf/nips/DeyBH24" mdate="2025-02-13">
<author pid="263/9353">Nolan Dey</author>
<author pid="57/2540">Shane Bergsma</author>
<author pid="60/3063">Joel Hestness</author>
<title>Sparse maximal update parameterization: A holistic approach to sparse training dynamics.</title>
<year>2024</year>
<booktitle>NeurIPS</booktitle>
<ee type="oa">http://papers.nips.cc/paper_files/paper/2024/hash/3b6aaffec941f98930753fa6d6de7263-Abstract-Conference.html</ee>
<crossref>conf/nips/2024</crossref>
<url>db/conf/nips/neurips2024.html#DeyBH24</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2405-15743" mdate="2024-06-19">
<author pid="263/9353">Nolan Dey</author>
<author pid="57/2540">Shane Bergsma</author>
<author pid="60/3063">Joel Hestness</author>
<title>Sparse maximal update parameterization: A holistic approach to sparse training dynamics.</title>
<year>2024</year>
<volume>abs/2405.15743</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2405.15743</ee>
<url>db/journals/corr/corr2405.html#abs-2405-15743</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2304-03208" mdate="2023-04-18">
<author pid="263/9353">Nolan Dey</author>
<author pid="344/4400">Gurpreet Gosal</author>
<author pid="25/2326">Zhiming Chen</author>
<author pid="344/4213">Hemant Khachane</author>
<author pid="72/6664">William Marshall</author>
<author pid="344/4496">Ribhu Pathria</author>
<author pid="85/3406">Marvin Tom</author>
<author pid="60/3063">Joel Hestness</author>
<title>Cerebras-GPT: Open Compute-Optimal Language Models Trained on the Cerebras Wafer-Scale Cluster.</title>
<year>2023</year>
<volume>abs/2304.03208</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2304.03208</ee>
<url>db/journals/corr/corr2304.html#abs-2304-03208</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2309-11568" mdate="2023-09-25">
<author pid="263/9353">Nolan Dey</author>
<author pid="277/0944">Daria Soboleva</author>
<author pid="322/8790">Faisal Al-Khateeb</author>
<author pid="176/3974">Bowen Yang</author>
<author pid="344/4496">Ribhu Pathria</author>
<author pid="344/4213">Hemant Khachane</author>
<author pid="206/2867">Shaheer Muhammad</author>
<author pid="25/2326">Zhiming Chen</author>
<author pid="50/2882">Robert Myers</author>
<author pid="357/3716">Jacob Robert Steeves</author>
<author pid="97/1208">Natalia Vassilieva</author>
<author pid="85/3406">Marvin Tom</author>
<author pid="60/3063">Joel Hestness</author>
<title>BTLM-3B-8K: 7B Parameter Performance in a 3B Parameter Model.</title>
<year>2023</year>
<volume>abs/2309.11568</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2309.11568</ee>
<url>db/journals/corr/corr2309.html#abs-2309-11568</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2310-13017" mdate="2023-10-27">
<author pid="322/8790">Faisal Al-Khateeb</author>
<author pid="263/9353">Nolan Dey</author>
<author pid="277/0944">Daria Soboleva</author>
<author pid="60/3063">Joel Hestness</author>
<title>Position Interpolation Improves ALiBi Extrapolation.</title>
<year>2023</year>
<volume>abs/2310.13017</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2310.13017</ee>
<url>db/journals/corr/corr2310.html#abs-2310-13017</url>
</article>
</r>
<r><article key="journals/ral/OsorioIYBRDT20" mdate="2020-05-22">
<author pid="203/1530">Victor Reyes Osorio</author>
<author orcid="0000-0002-3177-0556" pid="232/3393">Rajan Iyengar</author>
<author pid="263/9392">Xueyang Yao</author>
<author pid="232/3025">Presish Bhattachan</author>
<author pid="232/3270">Adrian Ragobar</author>
<author pid="263/9353">Nolan Dey</author>
<author orcid="0000-0002-2150-297X" pid="52/8763">Bryan P. Tripp</author>
<title>37, 000 Human-Planned Robotic Grasps With Six Degrees of Freedom.</title>
<pages>3346-3351</pages>
<year>2020</year>
<volume>5</volume>
<journal>IEEE Robotics Autom. Lett.</journal>
<number>2</number>
<ee>https://doi.org/10.1109/LRA.2020.2976295</ee>
<url>db/journals/ral/ral5.html#OsorioIYBRDT20</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2011-03043" mdate="2020-11-12">
<author pid="263/9353">Nolan S. Dey</author>
<author pid="278/2473">J. Eric Taylor</author>
<author pid="52/8763">Bryan P. Tripp</author>
<author pid="52/4401">Alexander Wong</author>
<author pid="17/1633">Graham W. Taylor</author>
<title>Identifying and interpreting tuning dimensions in deep networks.</title>
<year>2020</year>
<volume>abs/2011.03043</volume>
<journal>CoRR</journal>
<ee type="oa">https://arxiv.org/abs/2011.03043</ee>
<url>db/journals/corr/corr2011.html#abs-2011-03043</url>
</article>
</r>
<coauthors n="31" nc="2">
<co c="0"><na f="a/Al=Khateeb:Faisal" pid="322/8790">Faisal Al-Khateeb</na></co>
<co c="0"><na f="b/Bergsma:Shane" pid="57/2540">Shane Bergsma</na></co>
<co c="1"><na f="b/Bhattachan:Presish" pid="232/3025">Presish Bhattachan</na></co>
<co c="0"><na f="b/Bordelon:Blake" pid="228/6993">Blake Bordelon</na></co>
<co c="0"><na f="c/Chen:Zhiming" pid="25/2326">Zhiming Chen</na></co>
<co c="0"><na f="g/Gosal:Gurpreet" pid="344/4400">Gurpreet Gosal</na></co>
<co c="0"><na f="g/Gray:Gavia" pid="392/4399">Gavia Gray</na></co>
<co c="0"><na f="h/Hanin:Boris" pid="205/2534">Boris Hanin</na></co>
<co c="0"><na f="h/Hestness:Joel" pid="60/3063">Joel Hestness</na></co>
<co c="1"><na f="i/Iyengar:Rajan" pid="232/3393">Rajan Iyengar</na></co>
<co c="0"><na f="k/Khachane:Hemant" pid="344/4213">Hemant Khachane</na></co>
<co c="0"><na f="l/Li:Mufan_Bill" pid="350/5111">Mufan Bill Li</na></co>
<co c="0"><na f="m/Marshall:William" pid="72/6664">William Marshall</na></co>
<co c="0"><na f="m/Muhammad:Shaheer" pid="206/2867">Shaheer Muhammad</na></co>
<co c="0"><na f="m/Myers:Robert" pid="50/2882">Robert Myers</na></co>
<co c="0"><na f="n/Noci:Lorenzo" pid="268/6839">Lorenzo Noci</na></co>
<co c="1"><na f="o/Osorio:Victor_Reyes" pid="203/1530">Victor Reyes Osorio</na></co>
<co c="0"><na f="p/Pathria:Ribhu" pid="344/4496">Ribhu Pathria</na></co>
<co c="0"><na f="p/Pehlevan:Cengiz" pid="145/3480">Cengiz Pehlevan</na></co>
<co c="1"><na f="r/Ragobar:Adrian" pid="232/3270">Adrian Ragobar</na></co>
<co c="0"><na f="s/Soboleva:Daria" pid="277/0944">Daria Soboleva</na></co>
<co c="0"><na f="s/Steeves:Jacob_Robert" pid="357/3716">Jacob Robert Steeves</na></co>
<co c="1"><na f="t/Taylor:Graham_W=" pid="17/1633">Graham W. Taylor</na></co>
<co c="1"><na f="t/Taylor:J=_Eric" pid="278/2473">J. Eric Taylor</na></co>
<co c="0"><na f="t/Tom:Marvin" pid="85/3406">Marvin Tom</na></co>
<co c="1"><na f="t/Tripp:Bryan_P=" pid="52/8763">Bryan P. Tripp</na></co>
<co c="0"><na f="v/Vassilieva:Natalia" pid="97/1208">Natalia Vassilieva</na></co>
<co c="1"><na f="w/Wong:Alexander" pid="52/4401">Alexander Wong</na></co>
<co c="0"><na f="y/Yang:Bowen" pid="176/3974">Bowen Yang</na></co>
<co c="1"><na f="y/Yao:Xueyang" pid="263/9392">Xueyang Yao</na></co>
<co c="0"><na f="z/Zhang:Bin_Claire" pid="407/6648">Bin Claire Zhang</na></co>
</coauthors>
</dblpperson>

