


default search action
BibTeX records: Hanyang Zhao
@article{DBLP:journals/jair/WinataZDTYZS25,
author = {Genta Indra Winata and
Hanyang Zhao and
Anirban Das and
Wenpin Tang and
David D. Yao and
Shi{-}Xiong Zhang and
Sambit Sahu},
title = {Preference Tuning with Human Feedback on Language, Speech, and Vision
Tasks: {A} Survey},
journal = {J. Artif. Intell. Res.},
volume = {82},
pages = {2595--2661},
year = {2025},
url = {https://doi.org/10.1613/jair.1.17541},
doi = {10.1613/JAIR.1.17541},
timestamp = {Wed, 14 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/jair/WinataZDTYZS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/0002ZLYT25,
author = {Haoxian Chen and
Hanyang Zhao and
Henry Lam and
David D. Yao and
Wenpin Tang},
title = {MallowsPO: Fine-Tune Your {LLM} with Preference Dispersions},
booktitle = {The Thirteenth International Conference on Learning Representations,
{ICLR} 2025, Singapore, April 24-28, 2025},
publisher = {OpenReview.net},
year = {2025},
url = {https://openreview.net/forum?id=d8cnezVcaW},
timestamp = {Thu, 15 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iclr/0002ZLYT25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ZhaoWDZYTS25,
author = {Hanyang Zhao and
Genta Indra Winata and
Anirban Das and
Shi{-}Xiong Zhang and
David D. Yao and
Wenpin Tang and
Sambit Sahu},
title = {RainbowPO: {A} Unified Framework for Combining Improvements in Preference
Optimization},
booktitle = {The Thirteenth International Conference on Learning Representations,
{ICLR} 2025, Singapore, April 24-28, 2025},
publisher = {OpenReview.net},
year = {2025},
url = {https://openreview.net/forum?id=trKee5pIFv},
timestamp = {Thu, 15 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/conf/iclr/ZhaoWDZYTS25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Zhao0ZYT25,
author = {Hanyang Zhao and
Haoxian Chen and
Ji Zhang and
David D. Yao and
Wenpin Tang},
editor = {Aarti Singh and
Maryam Fazel and
Daniel Hsu and
Simon Lacoste{-}Julien and
Felix Berkenkamp and
Tegan Maharaj and
Kiri Wagstaff and
Jerry Zhu},
title = {Score as Action: Fine Tuning Diffusion Generative Models by Continuous-time
Reinforcement Learning},
booktitle = {Forty-second International Conference on Machine Learning, {ICML}
2025, Vancouver, BC, Canada, July 13-19, 2025},
series = {Proceedings of Machine Learning Research},
publisher = {{PMLR} / OpenReview.net},
year = {2025},
url = {https://proceedings.mlr.press/v267/zhao25f.html},
timestamp = {Wed, 04 Feb 2026 16:54:16 +0100},
biburl = {https://dblp.org/rec/conf/icml/Zhao0ZYT25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/WinataHIAPWNPOARDPAWMLA25,
author = {Genta Indra Winata and
Frederikus Hudi and
Patrick Amadeus Irawan and
David Anugraha and
Rifki Afina Putri and
Yutong Wang and
Adam Nohejl and
Ubaidillah Ariq Prathama and
Nedjma Ousidhoum and
Afifa Amriani and
Anar Rzayev and
Anirban Das and
Ashmari Pramodya and
Aulia Adila and
Bryan Wilie and
Candy Olivia Mawalim and
Cheng Ching Lam and
Daud Abolade and
Emmanuele Chersoni and
Enrico Santus and
Fariz Ikhwantri and
Garry Kuwanto and
Hanyang Zhao and
Haryo Akbarianto Wibowo and
Holy Lovenia and
Jan Christian Blaise Cruz and
Jan Wira Gotama Putra and
Junho Myung and
Lucky Susanto and
Maria Angelica Riera Machin and
Marina Zhukova and
Michael Anugraha and
Muhammad Farid Adilazuarda and
Natasha Christabelle Santosa and
Peerat Limkonchotiwat and
Raj Dabre and
Rio Alexander Audino and
Samuel Cahyawijaya and
Shi{-}Xiong Zhang and
Stephanie Yulia Salim and
Yi Zhou and
Yinxuan Gui and
David Ifeoluwa Adelani and
En{-}Shiun Annie Lee and
Shogo Okada and
Ayu Purwarianti and
Alham Fikri Aji and
Taro Watanabe and
Derry Tanti Wijaya and
Alice Oh and
Chong{-}Wah Ngo},
editor = {Luis Chiruzzo and
Alan Ritter and
Lu Wang},
title = {WorldCuisines: {A} Massive-Scale Benchmark for Multilingual and Multicultural
Visual Question Answering on Global Cuisines},
booktitle = {Proceedings of the 2025 Conference of the Nations of the Americas
Chapter of the Association for Computational Linguistics: Human Language
Technologies, {NAACL} 2025 - Volume 1: Long Papers, Albuquerque, New
Mexico, USA, April 29 - May 4, 2025},
pages = {3242--3264},
publisher = {Association for Computational Linguistics},
year = {2025},
url = {https://doi.org/10.18653/v1/2025.naacl-long.167},
doi = {10.18653/V1/2025.NAACL-LONG.167},
timestamp = {Tue, 24 Mar 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/naacl/WinataHIAPWNPOARDPAWMLA25.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2502-01819,
author = {Hanyang Zhao and
Haoxian Chen and
Ji Zhang and
David D. Yao and
Wenpin Tang},
title = {Score as Action: Fine-Tuning Diffusion Generative Models by Continuous-time
Reinforcement Learning},
journal = {CoRR},
volume = {abs/2502.01819},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2502.01819},
doi = {10.48550/ARXIV.2502.01819},
eprinttype = {arXiv},
eprint = {2502.01819},
timestamp = {Mon, 10 Mar 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2502-01819.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2503-11720,
author = {Hanyang Zhao and
Haoxian Chen and
Yucheng Guo and
Genta Indra Winata and
Tingting Ou and
Ziyu Huang and
David D. Yao and
Wenpin Tang},
title = {Fine-Tuning Diffusion Generative Models via Rich Preference Optimization},
journal = {CoRR},
volume = {abs/2503.11720},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2503.11720},
doi = {10.48550/ARXIV.2503.11720},
eprinttype = {arXiv},
eprint = {2503.11720},
timestamp = {Sun, 13 Apr 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2503-11720.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-13388,
author = {David Anugraha and
Zilu Tang and
Lester James V. Miranda and
Hanyang Zhao and
Mohammad Rifqi Farhansyah and
Garry Kuwanto and
Derry Wijaya and
Genta Indra Winata},
title = {{R3:} Robust Rubric-Agnostic Reward Models},
journal = {CoRR},
volume = {abs/2505.13388},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2505.13388},
doi = {10.48550/ARXIV.2505.13388},
eprinttype = {arXiv},
eprint = {2505.13388},
timestamp = {Wed, 25 Jun 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2505-13388.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-01789,
author = {Genta Indra Winata and
David Anugraha and
Emmy Liu and
Alham Fikri Aji and
Shou{-}Yi Hung and
Aditya Parashar and
Patrick Amadeus Irawan and
Ruochen Zhang and
Zheng{-}Xin Yong and
Jan Christian Blaise Cruz and
Niklas Muennighoff and
Seungone Kim and
Hanyang Zhao and
Sudipta Kar and
Kezia Erina Suryoraharjo and
Muhammad Farid Adilazuarda and
En{-}Shiun Annie Lee and
Ayu Purwarianti and
Derry Tanti Wijaya and
Monojit Choudhury},
title = {Datasheets Aren't Enough: DataRubrics for Automated Quality Metrics
and Accountability},
journal = {CoRR},
volume = {abs/2506.01789},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2506.01789},
doi = {10.48550/ARXIV.2506.01789},
eprinttype = {arXiv},
eprint = {2506.01789},
timestamp = {Fri, 26 Sep 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2506-01789.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-02212,
author = {Hanyang Zhao and
Dawen Liang and
Wenpin Tang and
David D. Yao and
Nathan Kallus},
title = {DiFFPO: Training Diffusion LLMs to Reason Fast and Furious via Reinforcement
Learning},
journal = {CoRR},
volume = {abs/2510.02212},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2510.02212},
doi = {10.48550/ARXIV.2510.02212},
eprinttype = {arXiv},
eprint = {2510.02212},
timestamp = {Sun, 09 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2510-02212.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-10767,
author = {Jiayuan Sheng and
Hanyang Zhao and
Haoxian Chen and
David D. Yao and
Wenpin Tang},
title = {Understanding Sampler Stochasticity in Training Diffusion Models for
{RLHF}},
journal = {CoRR},
volume = {abs/2510.10767},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2510.10767},
doi = {10.48550/ARXIV.2510.10767},
eprinttype = {arXiv},
eprint = {2510.10767},
timestamp = {Wed, 12 Nov 2025 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2510-10767.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-00685,
author = {Haoting Zhang and
Haoxian Chen and
Donglin Zhan and
Hanyang Zhao and
Henry Lam and
Wenpin Tang and
David D. Yao and
Zeyu Zheng},
title = {{SOCRATES:} Simulation Optimization with Correlated Replicas and Adaptive
Trajectory Evaluations},
journal = {CoRR},
volume = {abs/2511.00685},
year = {2025},
url = {https://doi.org/10.48550/arXiv.2511.00685},
doi = {10.48550/ARXIV.2511.00685},
eprinttype = {arXiv},
eprint = {2511.00685},
timestamp = {Sat, 03 Jan 2026 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2511-00685.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-13115,
author = {Wenpin Tang and
Hanyang Zhao},
title = {Contractive Diffusion Probabilistic Models},
journal = {CoRR},
volume = {abs/2401.13115},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2401.13115},
doi = {10.48550/ARXIV.2401.13115},
eprinttype = {arXiv},
eprint = {2401.13115},
timestamp = {Mon, 05 Feb 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2401-13115.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-07487,
author = {Wenpin Tang and
Hanyang Zhao},
title = {Score-based Diffusion Models via Stochastic Differential Equations
- a Technical Tutorial},
journal = {CoRR},
volume = {abs/2402.07487},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2402.07487},
doi = {10.48550/ARXIV.2402.07487},
eprinttype = {arXiv},
eprint = {2402.07487},
timestamp = {Fri, 16 Feb 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2402-07487.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2405-14953,
author = {Haoxian Chen and
Hanyang Zhao and
Henry Lam and
David D. Yao and
Wenpin Tang},
title = {Mallows-DPO: Fine-Tune Your {LLM} with Preference Dispersions},
journal = {CoRR},
volume = {abs/2405.14953},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2405.14953},
doi = {10.48550/ARXIV.2405.14953},
eprinttype = {arXiv},
eprint = {2405.14953},
timestamp = {Wed, 19 Jun 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2405-14953.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-08400,
author = {Hanyang Zhao and
Haoxian Chen and
Ji Zhang and
David D. Yao and
Wenpin Tang},
title = {Scores as Actions: a framework of fine-tuning diffusion models by
continuous-time reinforcement learning},
journal = {CoRR},
volume = {abs/2409.08400},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.08400},
doi = {10.48550/ARXIV.2409.08400},
eprinttype = {arXiv},
eprint = {2409.08400},
timestamp = {Mon, 14 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-08400.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-11564,
author = {Genta Indra Winata and
Hanyang Zhao and
Anirban Das and
Wenpin Tang and
David D. Yao and
Shi{-}Xiong Zhang and
Sambit Sahu},
title = {Preference Tuning with Human Feedback on Language, Speech, and Vision
Tasks: {A} Survey},
journal = {CoRR},
volume = {abs/2409.11564},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2409.11564},
doi = {10.48550/ARXIV.2409.11564},
eprinttype = {arXiv},
eprint = {2409.11564},
timestamp = {Mon, 21 Oct 2024 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2409-11564.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-04203,
author = {Hanyang Zhao and
Genta Indra Winata and
Anirban Das and
Shi{-}Xiong Zhang and
David D. Yao and
Wenpin Tang and
Sambit Sahu},
title = {RainbowPO: {A} Unified Framework for Combining Improvements in Preference
Optimization},
journal = {CoRR},
volume = {abs/2410.04203},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2410.04203},
doi = {10.48550/ARXIV.2410.04203},
eprinttype = {arXiv},
eprint = {2410.04203},
timestamp = {Tue, 12 Nov 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/journals/corr/abs-2410-04203.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-12705,
author = {Genta Indra Winata and
Frederikus Hudi and
Patrick Amadeus Irawan and
David Anugraha and
Rifki Afina Putri and
Yutong Wang and
Adam Nohejl and
Ubaidillah Ariq Prathama and
Nedjma Ousidhoum and
Afifa Amriani and
Anar Rzayev and
Anirban Das and
Ashmari Pramodya and
Aulia Adila and
Bryan Wilie and
Candy Olivia Mawalim and
Ching Lam Cheng and
Daud Abolade and
Emmanuele Chersoni and
Enrico Santus and
Fariz Ikhwantri and
Garry Kuwanto and
Hanyang Zhao and
Haryo Akbarianto Wibowo and
Holy Lovenia and
Jan Christian Blaise Cruz and
Jan Wira Gotama Putra and
Junho Myung and
Lucky Susanto and
Maria Angelica Riera Machin and
Marina Zhukova and
Michael Anugraha and
Muhammad Farid Adilazuarda and
Natasha Santosa and
Peerat Limkonchotiwat and
Raj Dabre and
Rio Alexander Audino and
Samuel Cahyawijaya and
Shi{-}Xiong Zhang and
Stephanie Yulia Salim and
Yi Zhou and
Yinxuan Gui and
David Ifeoluwa Adelani and
En{-}Shiun Annie Lee and
Shogo Okada and
Ayu Purwarianti and
Alham Fikri Aji and
Taro Watanabe and
Derry Tanti Wijaya and
Alice Oh and
Chong{-}Wah Ngo},
title = {WorldCuisines: {A} Massive-Scale Benchmark for Multilingual and Multicultural
Visual Question Answering on Global Cuisines},
journal = {CoRR},
volume = {abs/2410.12705},
year = {2024},
url = {https://doi.org/10.48550/arXiv.2410.12705},
doi = {10.48550/ARXIV.2410.12705},
eprinttype = {arXiv},
eprint = {2410.12705},
timestamp = {Sat, 31 May 2025 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2410-12705.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhaoTY23,
author = {Hanyang Zhao and
Wenpin Tang and
David D. Yao},
editor = {Alice Oh and
Tristan Naumann and
Amir Globerson and
Kate Saenko and
Moritz Hardt and
Sergey Levine},
title = {Policy Optimization for Continuous Reinforcement Learning},
booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference
on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
LA, USA, December 10 - 16, 2023},
year = {2023},
url = {http://papers.nips.cc/paper\_files/paper/2023/hash/2c53bc01e30711a08f6ac86919193022-Abstract-Conference.html},
timestamp = {Fri, 01 Mar 2024 00:00:00 +0100},
biburl = {https://dblp.org/rec/conf/nips/ZhaoTY23.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-18901,
author = {Hanyang Zhao and
Wenpin Tang and
David D. Yao},
title = {Policy Optimization for Continuous Reinforcement Learning},
journal = {CoRR},
volume = {abs/2305.18901},
year = {2023},
url = {https://doi.org/10.48550/arXiv.2305.18901},
doi = {10.48550/ARXIV.2305.18901},
eprinttype = {arXiv},
eprint = {2305.18901},
timestamp = {Wed, 07 Jun 2023 01:00:00 +0200},
biburl = {https://dblp.org/rec/journals/corr/abs-2305-18901.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}

manage site settings
To protect your privacy, all features that rely on external API calls from your browser are turned off by default. You need to opt-in for them to become active. All settings here will be stored as cookies with your web browser. For more information see our F.A.Q.


Google
Google Scholar
Semantic Scholar
Internet Archive Scholar
CiteSeerX
ORCID













