BibTeX records: Hanyang Zhao

download as .bib file

@article{DBLP:journals/jair/WinataZDTYZS25,
  author       = {Genta Indra Winata and
                  Hanyang Zhao and
                  Anirban Das and
                  Wenpin Tang and
                  David D. Yao and
                  Shi{-}Xiong Zhang and
                  Sambit Sahu},
  title        = {Preference Tuning with Human Feedback on Language, Speech, and Vision
                  Tasks: {A} Survey},
  journal      = {J. Artif. Intell. Res.},
  volume       = {82},
  pages        = {2595--2661},
  year         = {2025},
  url          = {https://doi.org/10.1613/jair.1.17541},
  doi          = {10.1613/JAIR.1.17541},
  timestamp    = {Wed, 14 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/jair/WinataZDTYZS25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/0002ZLYT25,
  author       = {Haoxian Chen and
                  Hanyang Zhao and
                  Henry Lam and
                  David D. Yao and
                  Wenpin Tang},
  title        = {MallowsPO: Fine-Tune Your {LLM} with Preference Dispersions},
  booktitle    = {The Thirteenth International Conference on Learning Representations,
                  {ICLR} 2025, Singapore, April 24-28, 2025},
  publisher    = {OpenReview.net},
  year         = {2025},
  url          = {https://openreview.net/forum?id=d8cnezVcaW},
  timestamp    = {Thu, 15 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/0002ZLYT25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/iclr/ZhaoWDZYTS25,
  author       = {Hanyang Zhao and
                  Genta Indra Winata and
                  Anirban Das and
                  Shi{-}Xiong Zhang and
                  David D. Yao and
                  Wenpin Tang and
                  Sambit Sahu},
  title        = {RainbowPO: {A} Unified Framework for Combining Improvements in Preference
                  Optimization},
  booktitle    = {The Thirteenth International Conference on Learning Representations,
                  {ICLR} 2025, Singapore, April 24-28, 2025},
  publisher    = {OpenReview.net},
  year         = {2025},
  url          = {https://openreview.net/forum?id=trKee5pIFv},
  timestamp    = {Thu, 15 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/conf/iclr/ZhaoWDZYTS25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/Zhao0ZYT25,
  author       = {Hanyang Zhao and
                  Haoxian Chen and
                  Ji Zhang and
                  David D. Yao and
                  Wenpin Tang},
  editor       = {Aarti Singh and
                  Maryam Fazel and
                  Daniel Hsu and
                  Simon Lacoste{-}Julien and
                  Felix Berkenkamp and
                  Tegan Maharaj and
                  Kiri Wagstaff and
                  Jerry Zhu},
  title        = {Score as Action: Fine Tuning Diffusion Generative Models by Continuous-time
                  Reinforcement Learning},
  booktitle    = {Forty-second International Conference on Machine Learning, {ICML}
                  2025, Vancouver, BC, Canada, July 13-19, 2025},
  series       = {Proceedings of Machine Learning Research},
  publisher    = {{PMLR} / OpenReview.net},
  year         = {2025},
  url          = {https://proceedings.mlr.press/v267/zhao25f.html},
  timestamp    = {Wed, 04 Feb 2026 16:54:16 +0100},
  biburl       = {https://dblp.org/rec/conf/icml/Zhao0ZYT25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/naacl/WinataHIAPWNPOARDPAWMLA25,
  author       = {Genta Indra Winata and
                  Frederikus Hudi and
                  Patrick Amadeus Irawan and
                  David Anugraha and
                  Rifki Afina Putri and
                  Yutong Wang and
                  Adam Nohejl and
                  Ubaidillah Ariq Prathama and
                  Nedjma Ousidhoum and
                  Afifa Amriani and
                  Anar Rzayev and
                  Anirban Das and
                  Ashmari Pramodya and
                  Aulia Adila and
                  Bryan Wilie and
                  Candy Olivia Mawalim and
                  Cheng Ching Lam and
                  Daud Abolade and
                  Emmanuele Chersoni and
                  Enrico Santus and
                  Fariz Ikhwantri and
                  Garry Kuwanto and
                  Hanyang Zhao and
                  Haryo Akbarianto Wibowo and
                  Holy Lovenia and
                  Jan Christian Blaise Cruz and
                  Jan Wira Gotama Putra and
                  Junho Myung and
                  Lucky Susanto and
                  Maria Angelica Riera Machin and
                  Marina Zhukova and
                  Michael Anugraha and
                  Muhammad Farid Adilazuarda and
                  Natasha Christabelle Santosa and
                  Peerat Limkonchotiwat and
                  Raj Dabre and
                  Rio Alexander Audino and
                  Samuel Cahyawijaya and
                  Shi{-}Xiong Zhang and
                  Stephanie Yulia Salim and
                  Yi Zhou and
                  Yinxuan Gui and
                  David Ifeoluwa Adelani and
                  En{-}Shiun Annie Lee and
                  Shogo Okada and
                  Ayu Purwarianti and
                  Alham Fikri Aji and
                  Taro Watanabe and
                  Derry Tanti Wijaya and
                  Alice Oh and
                  Chong{-}Wah Ngo},
  editor       = {Luis Chiruzzo and
                  Alan Ritter and
                  Lu Wang},
  title        = {WorldCuisines: {A} Massive-Scale Benchmark for Multilingual and Multicultural
                  Visual Question Answering on Global Cuisines},
  booktitle    = {Proceedings of the 2025 Conference of the Nations of the Americas
                  Chapter of the Association for Computational Linguistics: Human Language
                  Technologies, {NAACL} 2025 - Volume 1: Long Papers, Albuquerque, New
                  Mexico, USA, April 29 - May 4, 2025},
  pages        = {3242--3264},
  publisher    = {Association for Computational Linguistics},
  year         = {2025},
  url          = {https://doi.org/10.18653/v1/2025.naacl-long.167},
  doi          = {10.18653/V1/2025.NAACL-LONG.167},
  timestamp    = {Tue, 24 Mar 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/naacl/WinataHIAPWNPOARDPAWMLA25.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2502-01819,
  author       = {Hanyang Zhao and
                  Haoxian Chen and
                  Ji Zhang and
                  David D. Yao and
                  Wenpin Tang},
  title        = {Score as Action: Fine-Tuning Diffusion Generative Models by Continuous-time
                  Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2502.01819},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2502.01819},
  doi          = {10.48550/ARXIV.2502.01819},
  eprinttype   = {arXiv},
  eprint       = {2502.01819},
  timestamp    = {Mon, 10 Mar 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2502-01819.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2503-11720,
  author       = {Hanyang Zhao and
                  Haoxian Chen and
                  Yucheng Guo and
                  Genta Indra Winata and
                  Tingting Ou and
                  Ziyu Huang and
                  David D. Yao and
                  Wenpin Tang},
  title        = {Fine-Tuning Diffusion Generative Models via Rich Preference Optimization},
  journal      = {CoRR},
  volume       = {abs/2503.11720},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2503.11720},
  doi          = {10.48550/ARXIV.2503.11720},
  eprinttype   = {arXiv},
  eprint       = {2503.11720},
  timestamp    = {Sun, 13 Apr 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2503-11720.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2505-13388,
  author       = {David Anugraha and
                  Zilu Tang and
                  Lester James V. Miranda and
                  Hanyang Zhao and
                  Mohammad Rifqi Farhansyah and
                  Garry Kuwanto and
                  Derry Wijaya and
                  Genta Indra Winata},
  title        = {{R3:} Robust Rubric-Agnostic Reward Models},
  journal      = {CoRR},
  volume       = {abs/2505.13388},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2505.13388},
  doi          = {10.48550/ARXIV.2505.13388},
  eprinttype   = {arXiv},
  eprint       = {2505.13388},
  timestamp    = {Wed, 25 Jun 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2505-13388.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2506-01789,
  author       = {Genta Indra Winata and
                  David Anugraha and
                  Emmy Liu and
                  Alham Fikri Aji and
                  Shou{-}Yi Hung and
                  Aditya Parashar and
                  Patrick Amadeus Irawan and
                  Ruochen Zhang and
                  Zheng{-}Xin Yong and
                  Jan Christian Blaise Cruz and
                  Niklas Muennighoff and
                  Seungone Kim and
                  Hanyang Zhao and
                  Sudipta Kar and
                  Kezia Erina Suryoraharjo and
                  Muhammad Farid Adilazuarda and
                  En{-}Shiun Annie Lee and
                  Ayu Purwarianti and
                  Derry Tanti Wijaya and
                  Monojit Choudhury},
  title        = {Datasheets Aren't Enough: DataRubrics for Automated Quality Metrics
                  and Accountability},
  journal      = {CoRR},
  volume       = {abs/2506.01789},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2506.01789},
  doi          = {10.48550/ARXIV.2506.01789},
  eprinttype   = {arXiv},
  eprint       = {2506.01789},
  timestamp    = {Fri, 26 Sep 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2506-01789.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-02212,
  author       = {Hanyang Zhao and
                  Dawen Liang and
                  Wenpin Tang and
                  David D. Yao and
                  Nathan Kallus},
  title        = {DiFFPO: Training Diffusion LLMs to Reason Fast and Furious via Reinforcement
                  Learning},
  journal      = {CoRR},
  volume       = {abs/2510.02212},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2510.02212},
  doi          = {10.48550/ARXIV.2510.02212},
  eprinttype   = {arXiv},
  eprint       = {2510.02212},
  timestamp    = {Sun, 09 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2510-02212.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2510-10767,
  author       = {Jiayuan Sheng and
                  Hanyang Zhao and
                  Haoxian Chen and
                  David D. Yao and
                  Wenpin Tang},
  title        = {Understanding Sampler Stochasticity in Training Diffusion Models for
                  {RLHF}},
  journal      = {CoRR},
  volume       = {abs/2510.10767},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2510.10767},
  doi          = {10.48550/ARXIV.2510.10767},
  eprinttype   = {arXiv},
  eprint       = {2510.10767},
  timestamp    = {Wed, 12 Nov 2025 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2510-10767.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2511-00685,
  author       = {Haoting Zhang and
                  Haoxian Chen and
                  Donglin Zhan and
                  Hanyang Zhao and
                  Henry Lam and
                  Wenpin Tang and
                  David D. Yao and
                  Zeyu Zheng},
  title        = {{SOCRATES:} Simulation Optimization with Correlated Replicas and Adaptive
                  Trajectory Evaluations},
  journal      = {CoRR},
  volume       = {abs/2511.00685},
  year         = {2025},
  url          = {https://doi.org/10.48550/arXiv.2511.00685},
  doi          = {10.48550/ARXIV.2511.00685},
  eprinttype   = {arXiv},
  eprint       = {2511.00685},
  timestamp    = {Sat, 03 Jan 2026 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2511-00685.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2401-13115,
  author       = {Wenpin Tang and
                  Hanyang Zhao},
  title        = {Contractive Diffusion Probabilistic Models},
  journal      = {CoRR},
  volume       = {abs/2401.13115},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2401.13115},
  doi          = {10.48550/ARXIV.2401.13115},
  eprinttype   = {arXiv},
  eprint       = {2401.13115},
  timestamp    = {Mon, 05 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2401-13115.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2402-07487,
  author       = {Wenpin Tang and
                  Hanyang Zhao},
  title        = {Score-based Diffusion Models via Stochastic Differential Equations
                  - a Technical Tutorial},
  journal      = {CoRR},
  volume       = {abs/2402.07487},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2402.07487},
  doi          = {10.48550/ARXIV.2402.07487},
  eprinttype   = {arXiv},
  eprint       = {2402.07487},
  timestamp    = {Fri, 16 Feb 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2402-07487.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2405-14953,
  author       = {Haoxian Chen and
                  Hanyang Zhao and
                  Henry Lam and
                  David D. Yao and
                  Wenpin Tang},
  title        = {Mallows-DPO: Fine-Tune Your {LLM} with Preference Dispersions},
  journal      = {CoRR},
  volume       = {abs/2405.14953},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2405.14953},
  doi          = {10.48550/ARXIV.2405.14953},
  eprinttype   = {arXiv},
  eprint       = {2405.14953},
  timestamp    = {Wed, 19 Jun 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2405-14953.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-08400,
  author       = {Hanyang Zhao and
                  Haoxian Chen and
                  Ji Zhang and
                  David D. Yao and
                  Wenpin Tang},
  title        = {Scores as Actions: a framework of fine-tuning diffusion models by
                  continuous-time reinforcement learning},
  journal      = {CoRR},
  volume       = {abs/2409.08400},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2409.08400},
  doi          = {10.48550/ARXIV.2409.08400},
  eprinttype   = {arXiv},
  eprint       = {2409.08400},
  timestamp    = {Mon, 14 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2409-08400.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2409-11564,
  author       = {Genta Indra Winata and
                  Hanyang Zhao and
                  Anirban Das and
                  Wenpin Tang and
                  David D. Yao and
                  Shi{-}Xiong Zhang and
                  Sambit Sahu},
  title        = {Preference Tuning with Human Feedback on Language, Speech, and Vision
                  Tasks: {A} Survey},
  journal      = {CoRR},
  volume       = {abs/2409.11564},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2409.11564},
  doi          = {10.48550/ARXIV.2409.11564},
  eprinttype   = {arXiv},
  eprint       = {2409.11564},
  timestamp    = {Mon, 21 Oct 2024 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2409-11564.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-04203,
  author       = {Hanyang Zhao and
                  Genta Indra Winata and
                  Anirban Das and
                  Shi{-}Xiong Zhang and
                  David D. Yao and
                  Wenpin Tang and
                  Sambit Sahu},
  title        = {RainbowPO: {A} Unified Framework for Combining Improvements in Preference
                  Optimization},
  journal      = {CoRR},
  volume       = {abs/2410.04203},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2410.04203},
  doi          = {10.48550/ARXIV.2410.04203},
  eprinttype   = {arXiv},
  eprint       = {2410.04203},
  timestamp    = {Tue, 12 Nov 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2410-04203.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2410-12705,
  author       = {Genta Indra Winata and
                  Frederikus Hudi and
                  Patrick Amadeus Irawan and
                  David Anugraha and
                  Rifki Afina Putri and
                  Yutong Wang and
                  Adam Nohejl and
                  Ubaidillah Ariq Prathama and
                  Nedjma Ousidhoum and
                  Afifa Amriani and
                  Anar Rzayev and
                  Anirban Das and
                  Ashmari Pramodya and
                  Aulia Adila and
                  Bryan Wilie and
                  Candy Olivia Mawalim and
                  Ching Lam Cheng and
                  Daud Abolade and
                  Emmanuele Chersoni and
                  Enrico Santus and
                  Fariz Ikhwantri and
                  Garry Kuwanto and
                  Hanyang Zhao and
                  Haryo Akbarianto Wibowo and
                  Holy Lovenia and
                  Jan Christian Blaise Cruz and
                  Jan Wira Gotama Putra and
                  Junho Myung and
                  Lucky Susanto and
                  Maria Angelica Riera Machin and
                  Marina Zhukova and
                  Michael Anugraha and
                  Muhammad Farid Adilazuarda and
                  Natasha Santosa and
                  Peerat Limkonchotiwat and
                  Raj Dabre and
                  Rio Alexander Audino and
                  Samuel Cahyawijaya and
                  Shi{-}Xiong Zhang and
                  Stephanie Yulia Salim and
                  Yi Zhou and
                  Yinxuan Gui and
                  David Ifeoluwa Adelani and
                  En{-}Shiun Annie Lee and
                  Shogo Okada and
                  Ayu Purwarianti and
                  Alham Fikri Aji and
                  Taro Watanabe and
                  Derry Tanti Wijaya and
                  Alice Oh and
                  Chong{-}Wah Ngo},
  title        = {WorldCuisines: {A} Massive-Scale Benchmark for Multilingual and Multicultural
                  Visual Question Answering on Global Cuisines},
  journal      = {CoRR},
  volume       = {abs/2410.12705},
  year         = {2024},
  url          = {https://doi.org/10.48550/arXiv.2410.12705},
  doi          = {10.48550/ARXIV.2410.12705},
  eprinttype   = {arXiv},
  eprint       = {2410.12705},
  timestamp    = {Sat, 31 May 2025 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2410-12705.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/nips/ZhaoTY23,
  author       = {Hanyang Zhao and
                  Wenpin Tang and
                  David D. Yao},
  editor       = {Alice Oh and
                  Tristan Naumann and
                  Amir Globerson and
                  Kate Saenko and
                  Moritz Hardt and
                  Sergey Levine},
  title        = {Policy Optimization for Continuous Reinforcement Learning},
  booktitle    = {Advances in Neural Information Processing Systems 36: Annual Conference
                  on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans,
                  LA, USA, December 10 - 16, 2023},
  year         = {2023},
  url          = {http://papers.nips.cc/paper\_files/paper/2023/hash/2c53bc01e30711a08f6ac86919193022-Abstract-Conference.html},
  timestamp    = {Fri, 01 Mar 2024 00:00:00 +0100},
  biburl       = {https://dblp.org/rec/conf/nips/ZhaoTY23.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-2305-18901,
  author       = {Hanyang Zhao and
                  Wenpin Tang and
                  David D. Yao},
  title        = {Policy Optimization for Continuous Reinforcement Learning},
  journal      = {CoRR},
  volume       = {abs/2305.18901},
  year         = {2023},
  url          = {https://doi.org/10.48550/arXiv.2305.18901},
  doi          = {10.48550/ARXIV.2305.18901},
  eprinttype   = {arXiv},
  eprint       = {2305.18901},
  timestamp    = {Wed, 07 Jun 2023 01:00:00 +0200},
  biburl       = {https://dblp.org/rec/journals/corr/abs-2305-18901.bib},
  bibsource    = {dblp computer science bibliography, https://dblp.org}
}