<?xml version="1.0"?>
<dblpperson name="Dhawal Gupta" pid="231/0618" n="24">
<person key="homepages/231/0618" mdate="2018-12-03">
<author pid="231/0618">Dhawal Gupta</author>
</person>
<r><article publtype="informal" key="journals/corr/abs-2503-06810" mdate="2025-04-11">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="182/2211">Adam Fisch</author>
<author pid="117/5869">Christoph Dann</author>
<author pid="24/4383">Alekh Agarwal</author>
<title>Mitigating Preference Hacking in Policy Optimization with Pessimism.</title>
<year>2025</year>
<month>March</month>
<volume>abs/2503.06810</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2503.06810</ee>
<url>db/journals/corr/corr2503.html#abs-2503-06810</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><article key="conf/rlc/AyoubSZCGSS24" mdate="2025-03-26">
<author pid="266/8071">Alex Ayoub</author>
<author pid="191/6739">David Szepesvari</author>
<author pid="48/7129">Francesco Zanini</author>
<author pid="45/2096">Bryan Chan</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="75/3139">Bruno Castro da Silva</author>
<author pid="96/6660">Dale Schuurmans</author>
<title>Mitigating the Curse of Horizon in Monte-Carlo Returns.</title>
<pages>563-572</pages>
<year>2024</year>
<journal>RLJ</journal>
<volume>2</volume>
<url>db/journals/rlj/rlj2.html#AyoubSZCGSS24</url>
<ee type="oa">https://rlj.cs.umass.edu/2024/papers/Paper80.html</ee>
</article>
</r>
<r><article key="conf/rlc/ChoudharyGT24" mdate="2025-03-26">
<author pid="380/3172">Kartik Choudhary</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="46/11107">Philip S. Thomas</author>
<title>ICU-Sepsis: A Benchmark MDP Built from Real Medical Data.</title>
<pages>1546-1566</pages>
<year>2024</year>
<journal>RLJ</journal>
<volume>4</volume>
<url>db/journals/rlj/rlj4.html#ChoudharyGT24</url>
<ee type="oa">https://rlj.cs.umass.edu/2024/papers/Paper194.html</ee>
</article>
</r>
<r><article key="journals/access/WeqarMGU24" mdate="2025-01-19">
<author orcid="0009-0008-2046-1521" pid="367/1231">Mehwash Weqar</author>
<author orcid="0000-0002-5451-6964" pid="26/6280">Shabana Mehfuz</author>
<author pid="231/0618">Dhawal Gupta</author>
<author orcid="0000-0002-1477-8759" pid="82/10092">Shabana Urooj</author>
<title>Adaptive Switching Based Data-Communication Model for Internet of Healthcare Things Networks.</title>
<pages>11530-11548</pages>
<year>2024</year>
<volume>12</volume>
<journal>IEEE Access</journal>
<ee type="oa">https://doi.org/10.1109/ACCESS.2024.3354722</ee>
<ee>https://www.wikidata.org/entity/Q130050779</ee>
<url>db/journals/access/access12.html#WeqarMGU24</url>
</article>
</r>
<r><inproceedings key="conf/aaai/GuptaJCLTS24" mdate="2024-08-01">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="222/1982">Scott M. Jordan</author>
<author pid="209/9835">Shreyas Chaudhari</author>
<author pid="58/2670-6">Bo Liu 0006</author>
<author pid="46/11107">Philip S. Thomas</author>
<author pid="75/3139">Bruno Castro da Silva</author>
<title>From Past to Future: Rethinking Eligibility Traces.</title>
<pages>12253-12260</pages>
<year>2024</year>
<booktitle>AAAI</booktitle>
<ee type="oa">https://doi.org/10.1609/aaai.v38i11.29115</ee>
<crossref>conf/aaai/2024</crossref>
<url>db/conf/aaai/aaai2024.html#GuptaJCLTS24</url>
</inproceedings>
</r>
<r><inproceedings key="conf/icccnt/WeqarMG24" mdate="2025-06-27">
<author pid="367/1231">Mehwash Weqar</author>
<author pid="26/6280">Shabana Mehfuz</author>
<author pid="231/0618">Dhawal Gupta</author>
<title>Authentication in IoT Networks via Machine Learning and Deep Learning: A Review.</title>
<pages>1-6</pages>
<year>2024</year>
<booktitle>ICCCNT</booktitle>
<ee>https://doi.org/10.1109/ICCCNT61001.2024.10724010</ee>
<crossref>conf/icccnt/2024</crossref>
<url>db/conf/icccnt/icccnt2024.html#WeqarMG24</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2406-05646" mdate="2024-07-13">
<author pid="380/3172">Kartik Choudhary</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="46/11107">Philip S. Thomas</author>
<title>ICU-Sepsis: A Benchmark MDP Built from Real Medical Data.</title>
<year>2024</year>
<volume>abs/2406.05646</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2406.05646</ee>
<url>db/journals/corr/corr2406.html#abs-2406-05646</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2408-00997" mdate="2024-09-09">
<author pid="384/7165">Erfan Entezami</author>
<author pid="263/6980">Mahsa Sahebdel</author>
<author pid="231/0618">Dhawal Gupta</author>
<title>A Safe Exploration Strategy for Model-free Task Adaptation in Safety-constrained Grid Environments.</title>
<year>2024</year>
<volume>abs/2408.00997</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2408.00997</ee>
<url>db/journals/corr/corr2408.html#abs-2408-00997</url>
<stream>streams/journals/corr</stream>
</article>
</r>
<r><inproceedings key="conf/iclr/ChowTNGRGB23" mdate="2024-07-24">
<author pid="146/7869">Yinlam Chow</author>
<author pid="321/9887">Aza Tulepbergenov</author>
<author pid="190/7259">Ofir Nachum</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="67/8693">Moonkyung Ryu</author>
<author pid="88/6389">Mohammad Ghavamzadeh</author>
<author pid="10/3411">Craig Boutilier</author>
<title>A Mixture-of-Expert Approach to RL-based Dialogue Management.</title>
<year>2023</year>
<booktitle>ICLR</booktitle>
<ee type="oa">https://openreview.net/forum?id=4FBUihxz5nm</ee>
<crossref>conf/iclr/2023</crossref>
<url>db/conf/iclr/iclr2023.html#ChowTNGRGB23</url>
</inproceedings>
</r>
<r><inproceedings key="conf/nips/GuptaCJT023" mdate="2024-03-01">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="168/8450">Yash Chandak</author>
<author pid="222/1982">Scott M. Jordan</author>
<author pid="46/11107">Philip S. Thomas</author>
<author pid="75/3139">Bruno C. da Silva 0001</author>
<title>Behavior Alignment via Reward Function Optimization.</title>
<year>2023</year>
<booktitle>NeurIPS</booktitle>
<ee type="oa">http://papers.nips.cc/paper_files/paper/2023/hash/a5357781c204d4412e44ed9cbcdb08d5-Abstract-Conference.html</ee>
<crossref>conf/nips/2023</crossref>
<url>db/conf/nips/neurips2023.html#GuptaCJT023</url>
</inproceedings>
</r>
<r><inproceedings key="conf/nips/GuptaCTGB23" mdate="2024-03-01">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="146/7869">Yinlam Chow</author>
<author pid="321/9887">Azamat Tulepbergenov</author>
<author pid="88/6389">Mohammad Ghavamzadeh</author>
<author pid="10/3411">Craig Boutilier</author>
<title>Offline Reinforcement Learning for Mixture-of-Expert Dialogue Management.</title>
<year>2023</year>
<booktitle>NeurIPS</booktitle>
<ee type="oa">http://papers.nips.cc/paper_files/paper/2023/hash/12bcf58a1c09a0fcb5310f3589291ab4-Abstract-Conference.html</ee>
<crossref>conf/nips/2023</crossref>
<url>db/conf/nips/neurips2023.html#GuptaCTGB23</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2302-10850" mdate="2023-02-24">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="146/7869">Yinlam Chow</author>
<author pid="88/6389">Mohammad Ghavamzadeh</author>
<author pid="10/3411">Craig Boutilier</author>
<title>Offline Reinforcement Learning for Mixture-of-Expert Dialogue Management.</title>
<year>2023</year>
<volume>abs/2302.10850</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2302.10850</ee>
<url>db/journals/corr/corr2302.html#abs-2302-10850</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2305-09838" mdate="2023-05-24">
<author pid="236/4207">James E. Kostas</author>
<author pid="222/1982">Scott M. Jordan</author>
<author pid="168/8450">Yash Chandak</author>
<author pid="90/508">Georgios Theocharous</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="60/7057">Martha White</author>
<author pid="75/3139">Bruno Castro da Silva</author>
<author pid="46/11107">Philip S. Thomas</author>
<title>Coagent Networks: Generalized and Scaled.</title>
<year>2023</year>
<volume>abs/2305.09838</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2305.09838</ee>
<url>db/journals/corr/corr2305.html#abs-2305-09838</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2309-09055" mdate="2023-09-22">
<author pid="204/8261">Simeng Sun</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="148/9178">Mohit Iyyer</author>
<title>Exploring the impact of low-rank adaptation on the performance, efficiency, and regularization of RLHF.</title>
<year>2023</year>
<volume>abs/2309.09055</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2309.09055</ee>
<url>db/journals/corr/corr2309.html#abs-2309-09055</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2310-19007" mdate="2023-11-02">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="168/8450">Yash Chandak</author>
<author pid="222/1982">Scott M. Jordan</author>
<author pid="46/11107">Philip S. Thomas</author>
<author pid="75/3139">Bruno Castro da Silva</author>
<title>Behavior Alignment via Reward Function Optimization.</title>
<year>2023</year>
<volume>abs/2310.19007</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2310.19007</ee>
<url>db/journals/corr/corr2310.html#abs-2310-19007</url>
</article>
</r>
<r><article publtype="informal" key="journals/corr/abs-2312-12972" mdate="2024-08-01">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="222/1982">Scott M. Jordan</author>
<author pid="209/9835">Shreyas Chaudhari</author>
<author pid="58/2670-6">Bo Liu 0006</author>
<author pid="46/11107">Philip S. Thomas</author>
<author pid="75/3139">Bruno Castro da Silva</author>
<title>From Past to Future: Rethinking Eligibility Traces.</title>
<year>2023</year>
<volume>abs/2312.12972</volume>
<journal>CoRR</journal>
<ee type="oa">https://doi.org/10.48550/arXiv.2312.12972</ee>
<url>db/journals/corr/corr2312.html#abs-2312-12972</url>
</article>
</r>
<r><article key="journals/cogcom/SahaGSB21" mdate="2025-01-19">
<author orcid="0000-0002-3252-0997" pid="230/8625">Tulika Saha</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="27/1664-1">Sriparna Saha 0001</author>
<author pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</author>
<title>Emotion Aided Dialogue Act Classification for Task-Independent Conversations in a Multi-modal Framework.</title>
<pages>277-289</pages>
<year>2021</year>
<volume>13</volume>
<journal>Cogn. Comput.</journal>
<number>2</number>
<ee>https://doi.org/10.1007/s12559-019-09704-5</ee>
<ee>https://www.wikidata.org/entity/Q126301585</ee>
<url>db/journals/cogcom/cogcom13.html#SahaGSB21</url>
</article>
</r>
<r><article key="journals/mta/SahaGSB21" mdate="2024-05-07">
<author orcid="0000-0002-3252-0997" pid="230/8625">Tulika Saha</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="27/1664-1">Sriparna Saha 0001</author>
<author pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</author>
<title>A hierarchical approach for efficient multi-intent dialogue policy learning.</title>
<pages>35025-35050</pages>
<year>2021</year>
<volume>80</volume>
<journal>Multim. Tools Appl.</journal>
<number>28-29</number>
<ee>https://doi.org/10.1007/s11042-020-09070-7</ee>
<url>db/journals/mta/mta80.html#SahaGSB21</url>
</article>
</r>
<r><article key="journals/talip/SahaGSB21" mdate="2021-11-15">
<author pid="230/8625">Tulika Saha</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="27/1664-1">Sriparna Saha 0001</author>
<author pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</author>
<title>A Unified Dialogue Management Strategy for Multi-intent Dialogue Conversations in Multiple Languages.</title>
<pages>99:1-99:22</pages>
<year>2021</year>
<volume>20</volume>
<journal>ACM Trans. Asian Low Resour. Lang. Inf. Process.</journal>
<number>6</number>
<ee>https://doi.org/10.1145/3461763</ee>
<url>db/journals/talip/talip20.html#SahaGSB21</url>
</article>
</r>
<r><inproceedings key="conf/nips/GuptaMSKTW21" mdate="2022-05-03">
<author pid="231/0618">Dhawal Gupta</author>
<author pid="319/4766">Gabor Mihucz</author>
<author pid="203/4463">Matthew Schlegel</author>
<author pid="236/4207">James E. Kostas</author>
<author pid="46/11107">Philip S. Thomas</author>
<author pid="60/7057">Martha White</author>
<title>Structural Credit Assignment in Neural Networks using Reinforcement Learning.</title>
<pages>30257-30270</pages>
<year>2021</year>
<booktitle>NeurIPS</booktitle>
<ee type="oa">https://proceedings.neurips.cc/paper/2021/hash/fe1f9c70bdf347497e1a01b6c486bdb9-Abstract.html</ee>
<crossref>conf/nips/2021</crossref>
<url>db/conf/nips/neurips2021.html#GuptaMSKTW21</url>
</inproceedings>
</r>
<r><article key="journals/eswa/SahaGSB20" mdate="2020-11-14">
<author orcid="0000-0002-3252-0997" pid="230/8625">Tulika Saha</author>
<author orcid="0000-0002-2486-866X" pid="231/0618">Dhawal Gupta</author>
<author orcid="0000-0001-5458-9381" pid="27/1664-1">Sriparna Saha 0001</author>
<author pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</author>
<title>Towards integrated dialogue policy learning for multiple domains and intents using Hierarchical Deep Reinforcement Learning.</title>
<pages>113650</pages>
<year>2020</year>
<volume>162</volume>
<journal>Expert Syst. Appl.</journal>
<ee>https://doi.org/10.1016/j.eswa.2020.113650</ee>
<url>db/journals/eswa/eswa162.html#SahaGSB20</url>
</article>
</r>
<r><inproceedings key="conf/icml/GhiassianP0GWW20" mdate="2022-04-21">
<author pid="200/7870">Sina Ghiassian</author>
<author pid="41/467">Andrew Patterson</author>
<author pid="268/8270">Shivam Garg 0006</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="05/3110">Adam White 0001</author>
<author pid="60/7057">Martha White</author>
<title>Gradient Temporal-Difference Learning with Regularized Corrections.</title>
<pages>3524-3534</pages>
<year>2020</year>
<booktitle>ICML</booktitle>
<ee type="oa">http://proceedings.mlr.press/v119/ghiassian20a.html</ee>
<crossref>conf/icml/2020</crossref>
<url>db/conf/icml/icml2020.html#GhiassianP0GWW20</url>
</inproceedings>
</r>
<r><article publtype="informal" key="journals/corr/abs-2007-00611" mdate="2022-04-21">
<author pid="200/7870">Sina Ghiassian</author>
<author pid="41/467">Andrew Patterson</author>
<author pid="268/8270">Shivam Garg 0006</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="05/3110">Adam White 0001</author>
<author pid="60/7057">Martha White</author>
<title>Gradient Temporal-Difference Learning with Regularized Corrections.</title>
<year>2020</year>
<volume>abs/2007.00611</volume>
<journal>CoRR</journal>
<ee type="oa">https://arxiv.org/abs/2007.00611</ee>
<url>db/journals/corr/corr2007.html#abs-2007-00611</url>
</article>
</r>
<r><inproceedings key="conf/iconip/SahaGSB18" mdate="2024-05-07">
<author orcid="0000-0002-3252-0997" pid="230/8625">Tulika Saha</author>
<author pid="231/0618">Dhawal Gupta</author>
<author pid="27/1664-1">Sriparna Saha 0001</author>
<author pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</author>
<title>Reinforcement Learning Based Dialogue Management Strategy.</title>
<pages>359-372</pages>
<year>2018</year>
<booktitle>ICONIP (3)</booktitle>
<ee>https://doi.org/10.1007/978-3-030-04182-3_32</ee>
<crossref>conf/iconip/2018-3</crossref>
<url>db/conf/iconip/iconip2018-3.html#SahaGSB18</url>
</inproceedings>
</r>
<coauthors n="40" nc="5">
<co c="0"><na f="a/Agarwal:Alekh" pid="24/4383">Alekh Agarwal</na></co>
<co c="0"><na f="a/Ayoub:Alex" pid="266/8071">Alex Ayoub</na></co>
<co c="1"><na f="b/Bhattacharyya:Pushpak" pid="p/PushpakBhattacharyya">Pushpak Bhattacharyya</na></co>
<co c="0"><na f="b/Boutilier:Craig" pid="10/3411">Craig Boutilier</na></co>
<co c="0"><na f="c/Chan:Bryan" pid="45/2096">Bryan Chan</na></co>
<co c="0"><na f="c/Chandak:Yash" pid="168/8450">Yash Chandak</na></co>
<co c="0"><na f="c/Chaudhari:Shreyas" pid="209/9835">Shreyas Chaudhari</na></co>
<co c="0"><na f="c/Choudhary:Kartik" pid="380/3172">Kartik Choudhary</na></co>
<co c="0"><na f="c/Chow:Yinlam" pid="146/7869">Yinlam Chow</na></co>
<co c="0"><na f="d/Dann:Christoph" pid="117/5869">Christoph Dann</na></co>
<co c="4"><na f="e/Entezami:Erfan" pid="384/7165">Erfan Entezami</na></co>
<co c="0"><na f="f/Fisch:Adam" pid="182/2211">Adam Fisch</na></co>
<co c="0"><na f="g/Garg_0006:Shivam" pid="268/8270">Shivam Garg 0006</na></co>
<co c="0"><na f="g/Ghavamzadeh:Mohammad" pid="88/6389">Mohammad Ghavamzadeh</na></co>
<co c="0"><na f="g/Ghiassian:Sina" pid="200/7870">Sina Ghiassian</na></co>
<co c="3"><na f="i/Iyyer:Mohit" pid="148/9178">Mohit Iyyer</na></co>
<co c="0"><na f="j/Jordan:Scott_M=" pid="222/1982">Scott M. Jordan</na></co>
<co c="0"><na f="k/Kostas:James_E=" pid="236/4207">James E. Kostas</na></co>
<co c="0"><na f="l/Liu_0006:Bo" pid="58/2670-6">Bo Liu 0006</na></co>
<co c="2"><na f="m/Mehfuz:Shabana" pid="26/6280">Shabana Mehfuz</na></co>
<co c="0"><na f="m/Mihucz:Gabor" pid="319/4766">Gabor Mihucz</na></co>
<co c="0"><na f="n/Nachum:Ofir" pid="190/7259">Ofir Nachum</na></co>
<co c="0"><na f="p/Patterson:Andrew" pid="41/467">Andrew Patterson</na></co>
<co c="0"><na f="r/Ryu:Moonkyung" pid="67/8693">Moonkyung Ryu</na></co>
<co c="1"><na f="s/Saha_0001:Sriparna" pid="27/1664-1">Sriparna Saha 0001</na></co>
<co c="1"><na f="s/Saha:Tulika" pid="230/8625">Tulika Saha</na></co>
<co c="4"><na f="s/Sahebdel:Mahsa" pid="263/6980">Mahsa Sahebdel</na></co>
<co c="0"><na f="s/Schlegel:Matthew" pid="203/4463">Matthew Schlegel</na></co>
<co c="0"><na f="s/Schuurmans:Dale" pid="96/6660">Dale Schuurmans</na></co>
<co c="0" n="2"><na f="s/Silva_0001:Bruno_C=_da" pid="75/3139">Bruno C. da Silva 0001</na><na>Bruno Castro da Silva</na></co>
<co c="3"><na f="s/Sun:Simeng" pid="204/8261">Simeng Sun</na></co>
<co c="0"><na f="s/Szepesvari:David" pid="191/6739">David Szepesvari</na></co>
<co c="0"><na f="t/Theocharous:Georgios" pid="90/508">Georgios Theocharous</na></co>
<co c="0"><na f="t/Thomas:Philip_S=" pid="46/11107">Philip S. Thomas</na></co>
<co c="0" n="2"><na f="t/Tulepbergenov:Aza" pid="321/9887">Aza Tulepbergenov</na><na>Azamat Tulepbergenov</na></co>
<co c="2"><na f="u/Urooj:Shabana" pid="82/10092">Shabana Urooj</na></co>
<co c="2"><na f="w/Weqar:Mehwash" pid="367/1231">Mehwash Weqar</na></co>
<co c="0"><na f="w/White_0001:Adam" pid="05/3110">Adam White 0001</na></co>
<co c="0"><na f="w/White:Martha" pid="60/7057">Martha White</na></co>
<co c="0"><na f="z/Zanini:Francesco" pid="48/7129">Francesco Zanini</na></co>
</coauthors>
</dblpperson>

