Students Marks Predictor

This is a Machine Learning model made to predict the marks of students on the basis of there previous scores and attendence.This model uses the concept of linear regression to predict the marks of students. And this model is implemented using flask framework.

ML Code of our Model

#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path = r"https://drive.google.com/uc?export=download&id=13ZTYmL3E8S0nz-UKl4aaTZJaI3DVBGHM"
df  = pd.read_csv(path) #loading data
df
study_hours student_marks
0 6.83 78.50
1 6.56 76.74
2 NaN 78.68
3 5.67 71.82
4 8.67 84.19
... ... ...
195 7.53 81.67
196 8.56 84.68
197 8.94 86.75
198 6.60 78.05
199 8.35 83.50
200 rows × 2 columns

df.head()
study_hours student_marks
0 6.83 78.50
1 6.56 76.74
2 NaN 78.68
3 5.67 71.82
4 8.67 84.19
df.tail()
study_hours student_marks
195 7.53 81.67
196 8.56 84.68
197 8.94 86.75
198 6.60 78.05
199 8.35 83.50
df.describe()
study_hours student_marks
count 195.000000 200.00000
mean 6.995949 77.93375
std 1.253060 4.92570
min 5.010000 68.57000
25% 5.775000 73.38500
50% 7.120000 77.71000
75% 8.085000 82.32000
max 8.990000 86.99000
plt.scatter(x =df.study_hours, y = df.student_marks)
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
plt.show()#plotting graph

df.isnull().sum()
study_hours      5
student_marks    0
dtype: int64
df.mean()#finding mean of the values
study_hours       6.995949
student_marks    77.933750
dtype: float64
df2 = df.fillna(df.mean())
df2.isnull().sum()#filling the null values
study_hours      0
student_marks    0
dtype: int64
X = df2.drop("student_marks", axis = "columns")
y = df2.drop("study_hours", axis = "columns")
print("shape of X = ", X.shape)
print("shape of y = ", y.shape)#knowing the shape of the data
shape of X =  (200, 1)
shape of y =  (200, 1)
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=51)
print("shape of X_train = ", X_train.shape)
print("shape of y_train = ", y_train.shape)
print("shape of X_test = ", X_test.shape)
print("shape of y_test = ", y_test.shape) #taking training data set to test
shape of X_train =  (160, 1)
shape of y_train =  (160, 1)
shape of X_test =  (40, 1)
shape of y_test =  (40, 1)
# y = m * x + c
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train,y_train)#linear regression
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
lr.coef_
array([[3.93571802]])
y_pred  = lr.predict(X_test)
y_pred
lr.intercept_
array([50.44735504])
pd.DataFrame(np.c_[X_test, y_test, y_pred], columns = ["study_hours", "student_marks_original","student_marks_predicted"])
study_hours student_marks_original student_marks_predicted
0 8.300000 82.02 83.113815
1 7.230000 77.55 78.902596
2 8.670000 84.19 84.570030
3 8.990000 85.46 85.829460
4 8.710000 84.03 84.727459
5 7.700000 80.81 80.752384
6 5.690000 73.61 72.841591
7 5.390000 70.90 71.660875
8 5.790000 73.14 73.235162
9 5.390000 73.02 71.660875
10 5.850000 75.02 73.471305
11 6.590000 75.37 76.383737
12 5.790000 74.44 73.235162
13 5.880000 73.40 73.589377
14 8.260000 81.70 82.956386
15 5.070000 69.27 70.401445
16 5.790000 73.64 73.235162
17 7.190000 77.63 78.745168
18 6.380000 77.01 75.557236
19 8.190000 83.08 82.680886
20 6.660000 76.63 76.659237
21 5.090000 72.22 70.480160
22 6.180000 72.96 74.770092
23 6.995949 76.14 77.981436
24 8.930000 85.96 85.593317
25 8.160000 83.36 82.562814
26 6.600000 78.05 76.423094
27 8.790000 84.60 85.042316
28 7.100000 76.76 78.390953
29 7.860000 81.24 81.382099
30 7.950000 80.86 81.736313
31 8.310000 82.69 83.153172
32 8.070000 82.30 82.208599
33 7.790000 79.17 81.106598
34 5.880000 73.34 73.589377
35 5.260000 71.86 71.149232
36 5.450000 70.06 71.897018
37 7.900000 80.76 81.539527
38 5.630000 72.87 72.605447
39 5.460000 71.10 71.936375
lr.score(X_test,y_test) #Accuracy
0.9514124242154464
plt.scatter(X_train,y_train)
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
plt.show()

plt.scatter(X_test, y_test)
plt.plot(X_train, lr.predict(X_train), color = "r")
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
Text(0.5, 1.0, 'Scatter Plot of Students Study Hours vs Students marks')

import joblib
joblib.dump(lr, "student_mark_predictor.pkl")
['student_mark_predictor.pkl']
model = joblib.load("student_mark_predictor.pkl")
model.predict([[0]])[0][0] #Just give the hours studied in the model...it will give the marks pridicted
50.44735503694244


Flask Code for Consuming our Model :

# -*- coding: utf-8 -*-

import numpy as np
import pandas as pd
from flask import Flask, request, render_template
import joblib

app = Flask(__name__)

model = joblib.load("student_mark_predictor.pkl")

df = pd.DataFrame()

@app.route('/')
def home():
    return render_template('index.html')

@app.route('/predict',methods=['POST'])
def predict():
    global df
    
    input_features = [int(x) for x in request.form.values()]
    features_value = np.array(input_features)
    
    #validate input hours
    if input_features[0] <0 or input_features[0] >12:
        return render_template('index.html', prediction_text='Please enter valid hours between 1 to 12')
        

    output = model.predict([features_value])[0][0].round(2)

    # input and predicted value store in df then save in csv file
    df= pd.concat([df,pd.DataFrame({'Study Hours':input_features,'Predicted Output':[output]})],ignore_index=True)
    print(df)   
    df.to_csv('smp_data_from_app.csv')

    return render_template('index.html', prediction_text='You will get {}% marks, when you do study {} hours per day '.format(output, int(features_value[0])))


if __name__ == "__main__":
    app.run(host='localhost', port=8080)
    

Comments