@inproceedings{scholars15414, pages = {228--233}, publisher = {Institute of Electrical and Electronics Engineers Inc.}, journal = {2021 7th International Conference on Signal Processing and Communication, ICSC 2021}, title = {Prediction of Heart Disease Risk Using Machine Learning with Correlation-based Feature Selection and Optimization Techniques}, year = {2021}, doi = {10.1109/ICSC53193.2021.9673490}, note = {cited By 6; Conference of 7th International Conference on Signal Processing and Communication, ICSC 2021 ; Conference Date: 25 November 2021 Through 27 November 2021; Conference Code:176413}, author = {Reddy, K. V. V. and Elamvazuthi, I. and Aziz, A. A. and Paramasivam, S. and Chua, H. N. and Pranavanand, S.}, isbn = {9781665427395}, keywords = {Cardiology; Data handling; Data mining; Decision trees; Discriminant analysis; Diseases; Forecasting; Heart; Logistic regression; Motion compensation; Nearest neighbor search; Risk assessment; Support vector machines, Data preprocessing; Disease risks; Features optimizations; Features selection; Heart disease; Machine-learning; Optimal sets; Optimisations; Prediction of heart disease; Selection techniques, Feature extraction}, url = {https://www.scopus.com/inward/record.uri?eid=2-s2.0-85125068781&doi=10.1109\%2fICSC53193.2021.9673490&partnerID=40&md5=c6430c4b0192dd475406f68913cddada}, abstract = {Heart disease, one type of cardiovascular illness, is the leading cause of mortality for many individuals around the world. Early prediction of heart disease can help people to endure appropriate medical treatment and to save lives. Recent studies have focused on the use of data mining and machine learning in the detection of diseases based on specific features of a person. In this research, prepared an integrated heart dataset of 1190 observations from the Cleveland, Hungarian, Long Beach VA, Switzerland, and Statlog. Numerous machine learning classifiers, Decision Tree, Discriminant Analysis, Logistic Regression, Na{\~A}?ve Bayes, Support Vector Machines, k-Nearest Neighbors, Bagged Trees, Optimizable Tree, and Optimizable k-Nearest Neighbors are trained using 10-fold cross-validation for efficient heart disease risk prediction on the Correlation-based Feature Selection optimal set of the integrated heart dataset. Finally, performed a comparative analysis with and without feature selection. The Optimizable k-Nearest Neighbors algorithm achieved an utmost accuracy of 95.04, area under the ROC curve of 0.99 on the Correlation-based Feature Selection optimal set, and that of 90.34, 0.96 respectively, on full features. {\^A}{\copyright} 2021 IEEE.} }