@phdthesis{digilib72441, month = {July}, title = {PENGARUH KARAKTERISTIK DATA DAN OPTIMALISASI MODEL TERHADAP KINERJA ALGORITMA KLASIFIKASI ENSEMBLE}, school = {UIN SUNAN KALIJAGA YOGYAKARTA}, author = {NIM.: 21106050047 Ibnu Raju Humam}, year = {2025}, note = {Dr. Shofwatul Uyun, S.T., M.Kom.}, keywords = {Klasifikasi, Ensemble Learning, Boosting, Bagging, LASSO, Hyperparameter Tuning}, url = {https://digilib.uin-suka.ac.id/id/eprint/72441/}, abstract = {Data classification is a fundamental task in machine learning for recognizing patterns. However, the diversity of data types, such as numerical, categorical, and mixed, poses a challenge in selecting the optimal model. Tree-based algorithms, such as Decision Trees, are frequently used, including ensemble techniques like Random Forest and boosting methods like AdaBoost, Gradient Boosting, LightGBM, and XGBoost. This study aims to evaluate the impact of data type on the performance of these classification algorithms. Additionally, the study assesses the effectiveness of feature selection using LASSO and hyperparameter tuning optimization. The research methodology involves comparing models across three scenarios: (1) a baseline model using all features, (2) a model with LASSO feature selection, and (3) a model with LASSO optimized through hyperparameter tuning. The results show that ensemble boosting algorithms (Gradient Boosting, LightGBM, XGBoost) consistently perform best on numerical and mixed datasets. On the other hand, the effectiveness of optimization through LASSO and tuning showed varying results. However, it has the potential to improve both the F1-Score and computational efficiency, as there is often a trade-off between the two. Evaluation of purely categorical data faces limitations due to the difficulty in finding suitable datasets.} }