@inproceedings{scholars17439, year = {2022}, journal = {2022 3rd International Conference on Artificial Intelligence and Data Sciences: Championing Innovations in Artificial Intelligence and Data Sciences for Sustainable Future, AiDAS 2022 - Proceedings}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, pages = {19--24}, doi = {10.1109/AiDAS56890.2022.9918749}, note = {cited By 2; Conference of 3rd International Conference on Artificial Intelligence and Data Sciences, AiDAS 2022 ; Conference Date: 7 September 2022 Through 8 September 2022; Conference Code:183782}, title = {Lexicon-based Non-Compositional Multiword Augmentation Enriching Tweet Sentiment Analysis}, author = {Tahayna, B. and Ayyasamy, R. K. and Akbar, R. and Subri, N. F. B. and Sangodiah, A.}, keywords = {Knowledge based systems, Augmentation; Fine tuning; Idiomatics; Knowledge-base; Lexicon-based; Multi-word; Performance; Sentiment analysis; Training sample; Twitter, Sentiment analysis}, isbn = {9781665491648}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85141771877&doi=10.1109\%2fAiDAS56890.2022.9918749&partnerID=40&md5=e2a53df9b9720718c9e29c33ebde0767}, abstract = {One of the benefits of recognizing a slang, an id-iom or an abbreviation in a tweet is the ability to help in finding certain sentiment in a concise and understandable manner. However, a lack of adequate annotated 'idiomatic tweets' makes classification challenging. We propose a pliable augmen-tation technique to improve the classification of idiomatic tweets with tiny training samples. For classification, we evaluate the performance of fine-tuning version of a pre-trained embedding model at different flavors. During the augmentation process, we deduce the intrinsic propositional meaning of the idiomatic ex-pression from IBM's SliDE (Sentiment Lexicon of IDiomatic Expressions) and another lexicon we built. The empirical results show that the proposed method is beneficial in concealing the actual intent of the tweet and advantageous to tackle the prob-lem of overfitting caused by smaller training sets. The experi-ment shows that using data augmentation of the idiomatic ex-pressions has reduced the classification error rate with 16. {\^A}{\copyright} 2022 IEEE.} }