If you use the HAL Reliability Evaluation in your research, please cite:
@article{rabanser2026towards,
title = {Towards a Science of AI Agent Reliability},
author = {Stephan Rabanser and Sayash Kapoor and Peter Kirgis and Kangheng Liu and Saiteja Utpala and Arvind Narayanan},
journal = {arXiv preprint arXiv:2602.16666},
year = {2026}
}
@Misc{hal,
title = {Holistic Agent Leaderboard: The Missing Infrastructure for AI Agent Evaluation},
author = {Sayash Kapoor and Benedikt Stroebl and Peter Kirgis and Nitya Nadgir and Zachary S Siegel and Boyi Wei and Tianci Xue and Ziru Chen and Felix Chen and Saiteja Utpala and Franck Ndzomga and Dheeraj Oruganty and Sophie Luskin and Kangheng Liu and Botao Yu and Amit Arora and Dongyoon Hahm and Harsh Trivedi and Huan Sun and Juyong Lee and Tengjun Jin and Yifan Mai and Yifei Zhou and Yuxuan Zhu and Rishi Bommasani and Daniel Kang and Dawn Song and Peter Henderson and Yu Su and Percy Liang and Arvind Narayanan},
howpublished = {\url{https://github.com/princeton-pli/hal-harness}},
year = {2025}
}