@phdthesis{digilib64967, month = {March}, title = {OPTIMISASI ANALISIS REGRESI LASSO UNTUK PENDUGAAN DATA PENYAKIT INFEKSI MENULAR SEKSUAL (IMS) DENGAN ALGORITMA LARS MENGGUNAKAN GCV}, school = {UIN SUNAN KALIJAGA YOGYAKARTA}, author = {NIM.: 20106010005 Anisatul Ula}, year = {2024}, note = {Pembimbing: Sri Utami Zuliana, S.Si.,M.Sc.,Ph.D.}, keywords = {Multikolinearitas, LASSO, LARS, GCV, IMS}, url = {https://digilib.uin-suka.ac.id/id/eprint/64967/}, abstract = {Multicollinearity is a condition where there is a relationship between independent variables. In a regression, it can be seen whether there is a problem of multicollinearity or ups and downs, namely by looking at the Variance Inflation Factor (VIF) value or by the correlation coefficient. There is a multicollinearity problem which results in large variance and contains bias and the coefficient becomes insignificant. Therefore, it is necessary to handle multicollinearity problems, such as using Least Absolute Shrinkage and Selection Operator (LASSO) regression. LASSO regression will shrink the coefficients of independent variables that have a high relationship so that they are close to zero or exactly zero. LASSO regression coefficients are searched using the Least Absolute Regression (LARS) algorithm. In this research, LASSO regression will shrink the variable coefficients in Sexually Transmitted Infection (STI) data in Central Java in 2021 which contains multicollinearity problems by looking at the VIF value. It was found that there were two variables, namely the variables X4 (the number of people who received SKTM) and X5 (the number of school facilities) containing multicollinearity. There are nine stages of independent variables entering the model, namely the first stage is zero variable, then X4,X1,X7,X2,X5,X3,X9,X6,?X4,X8,X4. There are several lambda values and 20 lambda values are taken and then the optimal lambda will be taken using the Generalized Cross Validation (GCV) method. The optimum lambda was obtained, namely 2.027 with a minimum GCV value of 30796.68. With seven independent variables included in the model, namely variables X1,X2,X3,X5,X6,X7,X9. And the two variables selected are variables X4 and X8.} }