@inproceedings{yang2021tap, title={TAP: Text-Aware Pre-training for Text-VQA and Text-Caption}, author={Yang, Zhengyuan and Lu, Yijuan and Wang, Jianfeng and Yin, Xi and Florencio, Dinei and Wang, Lijuan and Zhang, Cha and Zhang, Lei and Luo, Jiebo}, booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition}, pages={8751--8761}, year={2021} }