Code
import numpy as np
import pandas as pd
import zipfile
#importing the zipfile already saved in the other folder.
= zipfile.ZipFile("../2022-10-12-day-6-of-50daysofkaggle/titanic.zip")
zf = pd.read_csv(zf.open("train.csv"))
train = pd.read_csv(zf.open("test.csv"))
test
#Selecting only the numerical columns
= train.select_dtypes(include=np.number).columns.tolist()
num_col
#deslecting passenger ID and 'Survived'
del num_col[0:2] #.remove() can remove only 1 item. so for more than 1, use for loop
= num_col
select_col
#remaining columns
= ["Sex", "Embarked", "Survived"]
str_col#Adding more elements into a list using `extend` and not `append`
select_col.extend(str_col)
= train[train.columns.intersection(select_col)]
train_eda train_eda
Survived | Pclass | Sex | Age | SibSp | Parch | Fare | Embarked | |
---|---|---|---|---|---|---|---|---|
0 | 0 | 3 | male | 22.0 | 1 | 0 | 7.2500 | S |
1 | 1 | 1 | female | 38.0 | 1 | 0 | 71.2833 | C |
2 | 1 | 3 | female | 26.0 | 0 | 0 | 7.9250 | S |
3 | 1 | 1 | female | 35.0 | 1 | 0 | 53.1000 | S |
4 | 0 | 3 | male | 35.0 | 0 | 0 | 8.0500 | S |
... | ... | ... | ... | ... | ... | ... | ... | ... |
886 | 0 | 2 | male | 27.0 | 0 | 0 | 13.0000 | S |
887 | 1 | 1 | female | 19.0 | 0 | 0 | 30.0000 | S |
888 | 0 | 3 | female | NaN | 1 | 2 | 23.4500 | S |
889 | 1 | 1 | male | 26.0 | 0 | 0 | 30.0000 | C |
890 | 0 | 3 | male | 32.0 | 0 | 0 | 7.7500 | Q |
891 rows × 8 columns