@inproceedings{scholars2476, pages = {214--218}, journal = {Proceedings of 2012 IEEE Conference on Control, Systems and Industrial Informatics, ICCSII 2012}, title = {Characteristics of a Malay journalistic corpus}, address = {Bandung}, year = {2012}, doi = {10.1109/CCSII.2012.6470503}, note = {cited By 1; Conference of 2012 1st IEEE Conference on Control, Systems and Industrial Informatics, ICCSII 2012 ; Conference Date: 23 September 2012 Through 26 September 2012; Conference Code:96023}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84875103783&doi=10.1109\%2fCCSII.2012.6470503&partnerID=40&md5=6776ad8b5bae336d40307290eb6d1aff}, keywords = {corpus; Empirical analysis; Information Extraction; Linguistic patterns; Malaysia; Named entities; NAtural language processing; word distribution, Natural language processing systems; Terrorism, Linguistics}, abstract = {This paper presents in detail a linguistics study of a journalistic corpus of Malay describing Indonesian terrorism. The initial raw text was manually annotated for its parts-of-speech. It is the first corpus of its nature ever established in Malaysia. The objective of this research is to conduct an empirical analysis of the actual patterns of use in journalistic texts. This paper presents the characteristics of Malay terrorism corpus which include the properties, word classes, named entities and word occurrences. The results of this work are given purely in terms of the characteristics of a Malay terrorism corpus. The results are highly useful for solving larger tasks in the Natural Language Processing area, such as Information Retrieval and Information Extraction, in the area of terrorism. {\^A}{\copyright} 2012 IEEE.}, author = {Zamin, N. and Oxley, A. and Bakar, Z. A. and Farhan, S. A.}, isbn = {9781467310239} }