bci_larionov_logisticregression.py

# -*- coding: utf-8 -*-
"""BCI_Larionov_LogisticRegression.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1zxAj1xN_FfTZHv_t0t1RgY1pKiSAaE3f

### Logistic Regression Classifier in Python


In logistic regression... basically, you are performing linear regression but applying a sigmoid function for the outcome.

#### Sigmoid  / Logistic Function

$p =1 / 1 + e^{-y}$
"""

# Commented out IPython magic to ensure Python compatibility.
# Import libraries, features and settings (not all of these are needed so pull what you need)

import matplotlib.pyplot as plt
import numpy as np
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import io
# %matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn import preprocessing
plt.rc("font", size = 14)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
sns.set(style="white")
sns.set(style="whitegrid", color_codes = True)

# Built in colab with local data upload

from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))

# Explore data

df = pd.read_csv(io.StringIO(uploaded['data_dimthree.csv'].decode('utf-8')))
df = df[['mean','std','max','metric']]

df.head()

# Split data into train test sets

from sklearn.model_selection import train_test_split
trainingSet, testSet = train_test_split(df, test_size=0.3)

# Creating the dataframes for training and test datasets
train_df = trainingSet
test_df = testSet

#Clean the missing values if needed
#train_df = train_df.dropna()
#test_df = test_df.dropna()

#Since the data is already split into Train and Test datasets, load the values into X_train, X_test, y_train, y_test
#X_train = train_df.iloc[:,:-1].values
#y_train = train_df.iloc[:,1].values
#X_test = test_df.iloc[:,:-1].values
#y_test = test_df.iloc[:,1].values

X_train = train_df[['mean','std','max']]

y_train = train_df["metric"]

X_test = test_df[['mean','std', 'max']]

y_test = test_df["metric"]

y_test.head()

"""### Data Exploration"""

y_train.value_counts()

count_no_choc = len(train_df[train_df['metric']==1])
count_choc = len(train_df[train_df['metric']==2])
print("Hand", count_no_choc)
print("Feet", count_choc)
pct_of_no_choc = count_no_choc/(count_no_choc+count_choc)
print("Percentage of hand movement", pct_of_no_choc*100)
pct_of_choc = count_choc/(count_no_choc+count_choc)
print("Percentage of feet movement", pct_of_choc*100)

train_df.groupby('metric').mean()

"""### Model Development and Prediction
Import the Scikit Learn Logistic Regression module
Fit model on the train set using fit() then perform prediction on test set using prediction
"""

from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train,y_train)
y_pred = logreg.predict(X_test)

print("Coefficients: \n", logreg.coef_)

"""### Evaluate model using confusion matrix
This is basically looking at how well your model did on predictions
"""

from sklearn import metrics
cnf_matrix = metrics.confusion_matrix(y_test, y_pred)
cnf_matrix

"""### Visualize CFM (confusion matrix) using a heatmap"""

# Commented out IPython magic to ensure Python compatibility.
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# %matplotlib inline

class_names=[0,1] # name  of classes
fig, ax = plt.subplots()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names)
plt.yticks(tick_marks, class_names)
# create heatmap
sns.heatmap(pd.DataFrame(cnf_matrix), annot=True, cmap="YlGnBu" ,fmt='g')
ax.xaxis.set_label_position("top")
plt.tight_layout()
plt.title('Confusion matrix', y=1.1)
plt.ylabel('Actual label')
plt.xlabel('Predicted label')

"""### CFM evaluation metrics Accuracy, Precision, Recall"""

print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred))
print("Recall:",metrics.recall_score(y_test, y_pred))