@article{scholars7144, doi = {10.1007/s10586-016-0539-z}, note = {cited By 10}, volume = {19}, number = {1}, title = {Data security rules/regulations based classification of file data using TsF-kNN algorithm}, year = {2016}, pages = {349--368}, journal = {Cluster Computing}, publisher = {Springer New York LLC}, issn = {13867857}, author = {Zardari, M. A. and Jung, L. T.}, keywords = {Algorithms; Complex networks; Cryptography; Digital storage; Learning algorithms; Nearest neighbor search; Pattern recognition; Security of data; Security systems, Cloud service providers; Corporate objectives; Data classification; Efficient managements; File attributes; Security policy; Security requirements; TsF-kNN, Classification (of information)}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-84961724182&doi=10.1007\%2fs10586-016-0539-z&partnerID=40&md5=c1943bb6bb047e33eebe359c8bd5ac6c}, abstract = {Personal and organizational data are getting larger in volume with respect to time. Due to the importance of data for organisations, effective and efficient management and categorization of data need a special focus. Understanding and applying data security policies to the appropriate data types therefore is one of the core concerns in large organisations such as cloud service providers. With data classification, the identification of security requirements for the data can be accomplished without manual intervention where the encryption process is applied only to the confidential data thus saving encryption time, decryption time, storage and processing power. The proposed data classification approach is to reduce the network traffic, the additional data movement, the overload, and the storage place for confidential data can be decided where security requirements of the confidential data are fulfilled. In this paper, an intelligent data classification approach is presented for predicting the confidentiality/sensitivity level of the data in a file based on the corporate objective and government policies/rules. An enhanced version of the k-NN algorithm is also proposed to reduce the computational complexity of the traditional k-NN algorithm at data classification phase. The proposed algorithm is called Training dataset Filtration-kNN (TsF-kNN). The experimental results show that data in a file can be classified into confidential and non-confidential classes and TsF-kNN algorithm has better performance against the traditional k-NN and Na{\~A}?ve Bayes algorithm. {\^A}{\copyright} 2016, Springer Science+Business Media New York.} }