This is a Machine Learning model made to predict the marks of students on the basis of there previous scores and attendence.This model uses the concept of linear regression to predict the marks of students. And this model is implemented using flask framework.
                
    
ML Code of our Model
#Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
path = r"https://drive.google.com/uc?export=download&id=13ZTYmL3E8S0nz-UKl4aaTZJaI3DVBGHM"
df  = pd.read_csv(path) #loading data
df
study_hours	student_marks
0	6.83	78.50
1	6.56	76.74
2	NaN	78.68
3	5.67	71.82
4	8.67	84.19
...	...	...
195	7.53	81.67
196	8.56	84.68
197	8.94	86.75
198	6.60	78.05
199	8.35	83.50
200 rows × 2 columns
df.head()
study_hours	student_marks
0	6.83	78.50
1	6.56	76.74
2	NaN	78.68
3	5.67	71.82
4	8.67	84.19
df.tail()
study_hours	student_marks
195	7.53	81.67
196	8.56	84.68
197	8.94	86.75
198	6.60	78.05
199	8.35	83.50
df.describe()
study_hours	student_marks
count	195.000000	200.00000
mean	6.995949	77.93375
std	1.253060	4.92570
min	5.010000	68.57000
25%	5.775000	73.38500
50%	7.120000	77.71000
75%	8.085000	82.32000
max	8.990000	86.99000
plt.scatter(x =df.study_hours, y = df.student_marks)
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
plt.show()#plotting graph
df.isnull().sum()
study_hours      5
student_marks    0
dtype: int64
df.mean()#finding mean of the values
study_hours       6.995949
student_marks    77.933750
dtype: float64
df2 = df.fillna(df.mean())
df2.isnull().sum()#filling the null values
study_hours      0
student_marks    0
dtype: int64
X = df2.drop("student_marks", axis = "columns")
y = df2.drop("study_hours", axis = "columns")
print("shape of X = ", X.shape)
print("shape of y = ", y.shape)#knowing the shape of the data
shape of X =  (200, 1)
shape of y =  (200, 1)
from sklearn.model_selection import train_test_split
X_train, X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=51)
print("shape of X_train = ", X_train.shape)
print("shape of y_train = ", y_train.shape)
print("shape of X_test = ", X_test.shape)
print("shape of y_test = ", y_test.shape) #taking training data set to test
shape of X_train =  (160, 1)
shape of y_train =  (160, 1)
shape of X_test =  (40, 1)
shape of y_test =  (40, 1)
# y = m * x + c
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train,y_train)#linear regression
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
lr.coef_
array([[3.93571802]])
y_pred  = lr.predict(X_test)
y_pred
lr.intercept_
array([50.44735504])
pd.DataFrame(np.c_[X_test, y_test, y_pred], columns = ["study_hours", "student_marks_original","student_marks_predicted"])
study_hours	student_marks_original	student_marks_predicted
0	8.300000	82.02	83.113815
1	7.230000	77.55	78.902596
2	8.670000	84.19	84.570030
3	8.990000	85.46	85.829460
4	8.710000	84.03	84.727459
5	7.700000	80.81	80.752384
6	5.690000	73.61	72.841591
7	5.390000	70.90	71.660875
8	5.790000	73.14	73.235162
9	5.390000	73.02	71.660875
10	5.850000	75.02	73.471305
11	6.590000	75.37	76.383737
12	5.790000	74.44	73.235162
13	5.880000	73.40	73.589377
14	8.260000	81.70	82.956386
15	5.070000	69.27	70.401445
16	5.790000	73.64	73.235162
17	7.190000	77.63	78.745168
18	6.380000	77.01	75.557236
19	8.190000	83.08	82.680886
20	6.660000	76.63	76.659237
21	5.090000	72.22	70.480160
22	6.180000	72.96	74.770092
23	6.995949	76.14	77.981436
24	8.930000	85.96	85.593317
25	8.160000	83.36	82.562814
26	6.600000	78.05	76.423094
27	8.790000	84.60	85.042316
28	7.100000	76.76	78.390953
29	7.860000	81.24	81.382099
30	7.950000	80.86	81.736313
31	8.310000	82.69	83.153172
32	8.070000	82.30	82.208599
33	7.790000	79.17	81.106598
34	5.880000	73.34	73.589377
35	5.260000	71.86	71.149232
36	5.450000	70.06	71.897018
37	7.900000	80.76	81.539527
38	5.630000	72.87	72.605447
39	5.460000	71.10	71.936375
lr.score(X_test,y_test) #Accuracy
0.9514124242154464
plt.scatter(X_train,y_train)
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
plt.show()
plt.scatter(X_test, y_test)
plt.plot(X_train, lr.predict(X_train), color = "r")
plt.xlabel("Students Study Hours")
plt.ylabel("Students marks")
plt.title("Scatter Plot of Students Study Hours vs Students marks")
Text(0.5, 1.0, 'Scatter Plot of Students Study Hours vs Students marks')
import joblib
joblib.dump(lr, "student_mark_predictor.pkl")
['student_mark_predictor.pkl']
model = joblib.load("student_mark_predictor.pkl")
model.predict([[0]])[0][0] #Just give the hours studied in the model...it will give the marks pridicted
50.44735503694244
Flask Code for Consuming our Model :
# -*- coding: utf-8 -*-
import numpy as np
import pandas as pd
from flask import Flask, request, render_template
import joblib
app = Flask(__name__)
model = joblib.load("student_mark_predictor.pkl")
df = pd.DataFrame()
@app.route('/')
def home():
    return render_template('index.html')
@app.route('/predict',methods=['POST'])
def predict():
    global df
    input_features = [int(x) for x in request.form.values()]
    features_value = np.array(input_features)
    #validate input hours
    if input_features[0] <0 or input_features[0] >12:
        return render_template('index.html', prediction_text='Please enter valid hours between 1 to 12')
    output = model.predict([features_value])[0][0].round(2)
    # input and predicted value store in df then save in csv file
    df= pd.concat([df,pd.DataFrame({'Study Hours':input_features,'Predicted Output':[output]})],ignore_index=True)
    print(df)   
    df.to_csv('smp_data_from_app.csv')
    return render_template('index.html', prediction_text='You will get {}% marks, when you do study {} hours per day '.format(output, int(features_value[0])))
if __name__ == "__main__":
    app.run(host='localhost', port=8080)
Comments
Post a Comment