@article{scholars17841, year = {2022}, pages = {853--865}, publisher = {Springer Science and Business Media Deutschland GmbH}, journal = {Lecture Notes in Electrical Engineering}, doi = {10.1007/978-981-16-2406-3{$_6$}{$_4$}}, volume = {770}, note = {cited By 0; Conference of 12th National Technical Seminar on Unmanned System Technology, NUSYS 2020 ; Conference Date: 24 November 2020 Through 25 November 2020; Conference Code:266059}, title = {Speech Recognizing Comparisons Between Web Speech API and FPT.AI API}, issn = {18761100}, author = {Tran, D. C. and Nguyen, D. L. and Ha, H. S. and Hassan, M. F.}, isbn = {9789811624056}, keywords = {Application programming interfaces (API); Character recognition; Machine learning; Speech; Statistical tests, Applications programming interfaces; Audio files; Daily lives; Foreign language; FPT.; Processing time; Speech-to-text; Vietnamese; Vietnamese speech; Web speech, Speech recognition}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85116480522&doi=10.1007\%2f978-981-16-2406-3\%5f64&partnerID=40&md5=a1ffad457cfbb1bb01be650287375c0d}, abstract = {Nowadays, people use speech recognition services for many purposes in their daily lives, such as learning foreign languages, communicating, etc. Therefore, they need to decide which ones to use. High accuracy and short processing time speech recognition service will help improve the work effectively as the time to re-check output results and the delay time between recognition tasks. For Vietnamese speech recognition, Web Speech API and FPT.AI API are popular. Web Speech API supports multiple languages, while FPT.AI API focuses on Vietnamese as FPT.AI{\^a}??s products are developed exclusively for the Vietnamese market. In order to assist people in choosing a suitable Vietnamese speech recognition service, in this paper, the speech recognizing accuracy and processing time between Web Speech API and FPT.AI API has been compared. 307 audio files containing Vietnamese speeches which are obtained from FPT Open Speech Dataset were chosen to test the accuracy and the processing time of both APIs. For the accuracy test, FPT.AI API was 0.57 more precise than Web Speech API. However, in the processing time test, Web Speech API was 50.99 faster than FPT.AI API. For Web Speech API, it was mostly accurate to process 12{\^a}??14-second-long audio files, while FPT.AI API did best when process 2{\^a}??4-second-long audio files. The audio files with duration values between 2 and 8{\^A} seconds{\^A} are optimal for both APIs to proceed with STT conversions. {\^A}{\copyright} 2022, The Author(s), under exclusive license to Springer Nature Singapore Pte Ltd.} }