{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# PCA & LDA untuk Deteksi Kanker Payudara\n", "\n", "Notebook awal untuk memahami reduksi dimensi pada dataset breast cancer. Materi ini hanya untuk edukasi machine learning, bukan alat diagnosis medis." ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "from sklearn.decomposition import PCA\n", "from sklearn.discriminant_analysis import LinearDiscriminantAnalysis\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score, f1_score\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.preprocessing import LabelEncoder, StandardScaler\n", "\n", "DATA_URL = \"https://autotrain.app/opendata92bd.html?file=breast-cancer.csv\"\n", "df = pd.read_csv(DATA_URL)\n", "possible_targets = [\"diagnosis\", \"target\", \"class\", \"label\"]\n", "target_col = next((col for col in possible_targets if col in df.columns), df.columns[-1])\n", "df_clean = df.drop(columns=[col for col in df.columns if \"unnamed\" in col.lower() or col.lower() == \"id\"])\n", "X = df_clean.drop(columns=[target_col]).select_dtypes(include=[\"int64\", \"float64\"])\n", "y = LabelEncoder().fit_transform(df_clean[target_col].astype(str))\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n", "scaler = StandardScaler()\n", "X_train_scaled = scaler.fit_transform(X_train)\n", "X_test_scaled = scaler.transform(X_test)\n", "\n", "pca = PCA(n_components=2, random_state=42)\n", "X_train_pca = pca.fit_transform(X_train_scaled)\n", "X_test_pca = pca.transform(X_test_scaled)\n", "model = LogisticRegression(max_iter=1000, random_state=42)\n", "model.fit(X_train_pca, y_train)\n", "preds = model.predict(X_test_pca)\n", "print({\"accuracy\": accuracy_score(y_test, preds), \"f1\": f1_score(y_test, preds, zero_division=0)})" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "pygments_lexer": "ipython3" } }, "nbformat": 4, "nbformat_minor": 5 }