{ "cells": [ { "cell_type": "markdown", "id": "144dde29", "metadata": {}, "source": [ "# Regression logistique\n", "\n", "Implementer et appliquer la regression logistique au donnees sur les cellules cancereuses." ] }, { "cell_type": "code", "execution_count": 1, "id": "8eb3ad1a", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "\n", "from matplotlib import pyplot as plt \n", "from sklearn.model_selection import train_test_split\n" ] }, { "cell_type": "markdown", "id": "45cf6552", "metadata": {}, "source": [ "Lecture des donnees" ] }, { "cell_type": "code", "execution_count": 2, "id": "5c152473", "metadata": {}, "outputs": [], "source": [ "df = pd.read_csv(\"./data.csv\", index_col=0)\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "79046efc", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "symmetry_mean | \n", "fractal_dimension_mean | \n", "... | \n", "radius_worst | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "... | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "
mean | \n", "14.127292 | \n", "19.289649 | \n", "91.969033 | \n", "654.889104 | \n", "0.096360 | \n", "0.104341 | \n", "0.088799 | \n", "0.048919 | \n", "0.181162 | \n", "0.062798 | \n", "... | \n", "16.269190 | \n", "25.677223 | \n", "107.261213 | \n", "880.583128 | \n", "0.132369 | \n", "0.254265 | \n", "0.272188 | \n", "0.114606 | \n", "0.290076 | \n", "0.083946 | \n", "
std | \n", "3.524049 | \n", "4.301036 | \n", "24.298981 | \n", "351.914129 | \n", "0.014064 | \n", "0.052813 | \n", "0.079720 | \n", "0.038803 | \n", "0.027414 | \n", "0.007060 | \n", "... | \n", "4.833242 | \n", "6.146258 | \n", "33.602542 | \n", "569.356993 | \n", "0.022832 | \n", "0.157336 | \n", "0.208624 | \n", "0.065732 | \n", "0.061867 | \n", "0.018061 | \n", "
min | \n", "6.981000 | \n", "9.710000 | \n", "43.790000 | \n", "143.500000 | \n", "0.052630 | \n", "0.019380 | \n", "0.000000 | \n", "0.000000 | \n", "0.106000 | \n", "0.049960 | \n", "... | \n", "7.930000 | \n", "12.020000 | \n", "50.410000 | \n", "185.200000 | \n", "0.071170 | \n", "0.027290 | \n", "0.000000 | \n", "0.000000 | \n", "0.156500 | \n", "0.055040 | \n", "
25% | \n", "11.700000 | \n", "16.170000 | \n", "75.170000 | \n", "420.300000 | \n", "0.086370 | \n", "0.064920 | \n", "0.029560 | \n", "0.020310 | \n", "0.161900 | \n", "0.057700 | \n", "... | \n", "13.010000 | \n", "21.080000 | \n", "84.110000 | \n", "515.300000 | \n", "0.116600 | \n", "0.147200 | \n", "0.114500 | \n", "0.064930 | \n", "0.250400 | \n", "0.071460 | \n", "
50% | \n", "13.370000 | \n", "18.840000 | \n", "86.240000 | \n", "551.100000 | \n", "0.095870 | \n", "0.092630 | \n", "0.061540 | \n", "0.033500 | \n", "0.179200 | \n", "0.061540 | \n", "... | \n", "14.970000 | \n", "25.410000 | \n", "97.660000 | \n", "686.500000 | \n", "0.131300 | \n", "0.211900 | \n", "0.226700 | \n", "0.099930 | \n", "0.282200 | \n", "0.080040 | \n", "
75% | \n", "15.780000 | \n", "21.800000 | \n", "104.100000 | \n", "782.700000 | \n", "0.105300 | \n", "0.130400 | \n", "0.130700 | \n", "0.074000 | \n", "0.195700 | \n", "0.066120 | \n", "... | \n", "18.790000 | \n", "29.720000 | \n", "125.400000 | \n", "1084.000000 | \n", "0.146000 | \n", "0.339100 | \n", "0.382900 | \n", "0.161400 | \n", "0.317900 | \n", "0.092080 | \n", "
max | \n", "28.110000 | \n", "39.280000 | \n", "188.500000 | \n", "2501.000000 | \n", "0.163400 | \n", "0.345400 | \n", "0.426800 | \n", "0.201200 | \n", "0.304000 | \n", "0.097440 | \n", "... | \n", "36.040000 | \n", "49.540000 | \n", "251.200000 | \n", "4254.000000 | \n", "0.222600 | \n", "1.058000 | \n", "1.252000 | \n", "0.291000 | \n", "0.663800 | \n", "0.207500 | \n", "
8 rows × 30 columns
\n", "\n", " | diagnosis | \n", "radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "symmetry_mean | \n", "... | \n", "radius_worst | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "... | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "569.000000 | \n", "
mean | \n", "0.372583 | \n", "14.127292 | \n", "19.289649 | \n", "91.969033 | \n", "654.889104 | \n", "0.096360 | \n", "0.104341 | \n", "0.088799 | \n", "0.048919 | \n", "0.181162 | \n", "... | \n", "16.269190 | \n", "25.677223 | \n", "107.261213 | \n", "880.583128 | \n", "0.132369 | \n", "0.254265 | \n", "0.272188 | \n", "0.114606 | \n", "0.290076 | \n", "0.083946 | \n", "
std | \n", "0.483918 | \n", "3.524049 | \n", "4.301036 | \n", "24.298981 | \n", "351.914129 | \n", "0.014064 | \n", "0.052813 | \n", "0.079720 | \n", "0.038803 | \n", "0.027414 | \n", "... | \n", "4.833242 | \n", "6.146258 | \n", "33.602542 | \n", "569.356993 | \n", "0.022832 | \n", "0.157336 | \n", "0.208624 | \n", "0.065732 | \n", "0.061867 | \n", "0.018061 | \n", "
min | \n", "0.000000 | \n", "6.981000 | \n", "9.710000 | \n", "43.790000 | \n", "143.500000 | \n", "0.052630 | \n", "0.019380 | \n", "0.000000 | \n", "0.000000 | \n", "0.106000 | \n", "... | \n", "7.930000 | \n", "12.020000 | \n", "50.410000 | \n", "185.200000 | \n", "0.071170 | \n", "0.027290 | \n", "0.000000 | \n", "0.000000 | \n", "0.156500 | \n", "0.055040 | \n", "
25% | \n", "0.000000 | \n", "11.700000 | \n", "16.170000 | \n", "75.170000 | \n", "420.300000 | \n", "0.086370 | \n", "0.064920 | \n", "0.029560 | \n", "0.020310 | \n", "0.161900 | \n", "... | \n", "13.010000 | \n", "21.080000 | \n", "84.110000 | \n", "515.300000 | \n", "0.116600 | \n", "0.147200 | \n", "0.114500 | \n", "0.064930 | \n", "0.250400 | \n", "0.071460 | \n", "
50% | \n", "0.000000 | \n", "13.370000 | \n", "18.840000 | \n", "86.240000 | \n", "551.100000 | \n", "0.095870 | \n", "0.092630 | \n", "0.061540 | \n", "0.033500 | \n", "0.179200 | \n", "... | \n", "14.970000 | \n", "25.410000 | \n", "97.660000 | \n", "686.500000 | \n", "0.131300 | \n", "0.211900 | \n", "0.226700 | \n", "0.099930 | \n", "0.282200 | \n", "0.080040 | \n", "
75% | \n", "1.000000 | \n", "15.780000 | \n", "21.800000 | \n", "104.100000 | \n", "782.700000 | \n", "0.105300 | \n", "0.130400 | \n", "0.130700 | \n", "0.074000 | \n", "0.195700 | \n", "... | \n", "18.790000 | \n", "29.720000 | \n", "125.400000 | \n", "1084.000000 | \n", "0.146000 | \n", "0.339100 | \n", "0.382900 | \n", "0.161400 | \n", "0.317900 | \n", "0.092080 | \n", "
max | \n", "1.000000 | \n", "28.110000 | \n", "39.280000 | \n", "188.500000 | \n", "2501.000000 | \n", "0.163400 | \n", "0.345400 | \n", "0.426800 | \n", "0.201200 | \n", "0.304000 | \n", "... | \n", "36.040000 | \n", "49.540000 | \n", "251.200000 | \n", "4254.000000 | \n", "0.222600 | \n", "1.058000 | \n", "1.252000 | \n", "0.291000 | \n", "0.663800 | \n", "0.207500 | \n", "
8 rows × 31 columns
\n", "\n", " | radius_mean | \n", "texture_mean | \n", "perimeter_mean | \n", "area_mean | \n", "smoothness_mean | \n", "compactness_mean | \n", "concavity_mean | \n", "concave points_mean | \n", "symmetry_mean | \n", "fractal_dimension_mean | \n", "... | \n", "radius_worst | \n", "texture_worst | \n", "perimeter_worst | \n", "area_worst | \n", "smoothness_worst | \n", "compactness_worst | \n", "concavity_worst | \n", "concave points_worst | \n", "symmetry_worst | \n", "fractal_dimension_worst | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "... | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "398.000000 | \n", "
mean | \n", "14.159553 | \n", "19.395302 | \n", "92.207462 | \n", "661.267839 | \n", "0.096531 | \n", "0.105192 | \n", "0.089203 | \n", "0.049322 | \n", "0.180714 | \n", "0.062893 | \n", "... | \n", "16.355161 | \n", "25.677211 | \n", "107.871658 | \n", "897.275628 | \n", "0.132427 | \n", "0.255870 | \n", "0.270084 | \n", "0.114959 | \n", "0.286689 | \n", "0.083917 | \n", "
std | \n", "3.676611 | \n", "4.414834 | \n", "25.340346 | \n", "369.700011 | \n", "0.013931 | \n", "0.051376 | \n", "0.080270 | \n", "0.039574 | \n", "0.027345 | \n", "0.006900 | \n", "... | \n", "5.083506 | \n", "6.310161 | \n", "35.252618 | \n", "605.661312 | \n", "0.022508 | \n", "0.153518 | \n", "0.200286 | \n", "0.067138 | \n", "0.056616 | \n", "0.016859 | \n", "
min | \n", "6.981000 | \n", "9.710000 | \n", "43.790000 | \n", "143.500000 | \n", "0.052630 | \n", "0.019380 | \n", "0.000000 | \n", "0.000000 | \n", "0.106000 | \n", "0.050240 | \n", "... | \n", "7.930000 | \n", "12.020000 | \n", "50.410000 | \n", "185.200000 | \n", "0.071170 | \n", "0.027290 | \n", "0.000000 | \n", "0.000000 | \n", "0.156600 | \n", "0.055210 | \n", "
25% | \n", "11.622500 | \n", "16.152500 | \n", "74.667500 | \n", "412.525000 | \n", "0.086720 | \n", "0.066090 | \n", "0.029053 | \n", "0.020195 | \n", "0.162025 | \n", "0.058112 | \n", "... | \n", "12.840000 | \n", "21.037500 | \n", "83.007500 | \n", "498.925000 | \n", "0.117650 | \n", "0.147550 | \n", "0.106875 | \n", "0.065063 | \n", "0.247725 | \n", "0.072220 | \n", "
50% | \n", "13.290000 | \n", "18.895000 | \n", "86.290000 | \n", "545.650000 | \n", "0.096515 | \n", "0.097535 | \n", "0.063610 | \n", "0.034190 | \n", "0.179350 | \n", "0.061890 | \n", "... | \n", "14.875000 | \n", "25.100000 | \n", "97.260000 | \n", "679.250000 | \n", "0.131900 | \n", "0.223300 | \n", "0.235500 | \n", "0.099425 | \n", "0.280950 | \n", "0.080210 | \n", "
75% | \n", "16.100000 | \n", "21.932500 | \n", "105.550000 | \n", "802.325000 | \n", "0.105400 | \n", "0.130500 | \n", "0.132250 | \n", "0.074632 | \n", "0.194750 | \n", "0.066142 | \n", "... | \n", "19.130000 | \n", "29.927500 | \n", "126.075000 | \n", "1111.750000 | \n", "0.145175 | \n", "0.342575 | \n", "0.384700 | \n", "0.166200 | \n", "0.315100 | \n", "0.092135 | \n", "
max | \n", "28.110000 | \n", "39.280000 | \n", "188.500000 | \n", "2501.000000 | \n", "0.163400 | \n", "0.286700 | \n", "0.426800 | \n", "0.201200 | \n", "0.304000 | \n", "0.095750 | \n", "... | \n", "36.040000 | \n", "49.540000 | \n", "251.200000 | \n", "4254.000000 | \n", "0.222600 | \n", "0.937900 | \n", "0.960800 | \n", "0.291000 | \n", "0.555800 | \n", "0.148600 | \n", "
8 rows × 30 columns
\n", "\n", " | diagnosis | \n", "
---|---|
count | \n", "398.000000 | \n", "
mean | \n", "0.374372 | \n", "
std | \n", "0.484569 | \n", "
min | \n", "0.000000 | \n", "
25% | \n", "0.000000 | \n", "
50% | \n", "0.000000 | \n", "
75% | \n", "1.000000 | \n", "
max | \n", "1.000000 | \n", "