@article{scholars3124, year = {2012}, pages = {106--117}, journal = {Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)}, publisher = {Springer Verlag}, doi = {10.1007/978-3-642-32541-0{$_9$}}, volume = {7457 L}, note = {cited By 4; Conference of 12th International Workshop on Knowledge Management and Acquisition for Intelligent Systems, PKAW 2012, held in conjunction with 12th Pacific Rim International Conference on Artificial Intelligence, PRICAI 2012 ; Conference Date: 5 September 2012 Through 6 September 2012; Conference Code:194509}, title = {A lazy man{\^a}??s way to part-of-speech tagging}, keywords = {Intelligent systems; Knowledge management; Mergers and acquisitions; Natural resources; Text processing; Unsupervised learning, Dice coefficient; Linguistic information; Part of speech tagging; Part-of-speech tags; Resource-Rich; Similarity functions; Unsupervised learning method; Word alignment, Computational linguistics}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84893005114&doi=10.1007\%2f978-3-642-32541-0\%5f9&partnerID=40&md5=6398989ce94cbfadf1367af131ac7120}, abstract = {A statistical-based approach to word alignment involving automatically projecting part-of-speech (POS) tags is presented. The approach is referred to as the {\^a}??lazy man{\^a}??s way{\^a}?? because it improves POS assignment for a resource-poor language by exploiting its similarity to a resource-rich one. This unsupervised learning method combines the N-gram and Dice Coefficient similarity functions in order to align English texts with Malay texts thus projecting the POS tags from English to Malay. It is a quick method that does not require the laborious effort needed to annotate the Malay dataset. A case study, an experiment done on 25 terrorism news articles written in Malay, has shown that leveraging pre-existing resources from a resource-rich language, i.e. English, to supplement a resource-poor language, i.e. Malay, is feasible and avoids building new text-processing tools from scratch. The system was tested on the Malay corpus, consisting of 5413 word tokens. The results reached values of 86.87 for precision, 72.56 for recall and 79.07 for F1-Score. This shows that the {\^a}??lazy man{\^a}??s way{\^a}??, where a resource-poor language just exploits the rich linguistic information available in English, increases bitext projection accuracy significantly. {\^A}{\copyright} Springer-Verlag Berlin Heidelberg 2012.}, issn = {03029743}, author = {Zamin, N. and Oxley, A. and Bakar, Z. A. and Farhan, S. A.}, isbn = {9783642325403} }