Students Marks Predictor

This is a Machine Learning model made to predict the marks of students on the basis of there previous scores and attendence.This model uses the concept of linear regression to predict the marks of students. And this model is implemented using flask framework.

ML Code of our Model

#Import libraries

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

path = r"https://drive.google.com/uc?export=download&id=13ZTYmL3E8S0nz-UKl4aaTZJaI3DVBGHM"

df = pd.read_csv(path) #loading data

study_hours student_marks

0 6.83 78.50

1 6.56 76.74

2 NaN 78.68

3 5.67 71.82

4 8.67 84.19

... ... ...

195 7.53 81.67

196 8.56 84.68

197 8.94 86.75

198 6.60 78.05

199 8.35 83.50

200 rows × 2 columns

df.head()

study_hours student_marks

0 6.83 78.50

1 6.56 76.74

2 NaN 78.68

3 5.67 71.82

4 8.67 84.19

df.tail()

study_hours student_marks

195 7.53 81.67

196 8.56 84.68

197 8.94 86.75

198 6.60 78.05

199 8.35 83.50

df.describe()

study_hours student_marks

count 195.000000 200.00000

mean 6.995949 77.93375

std 1.253060 4.92570

min 5.010000 68.57000

25% 5.775000 73.38500

50% 7.120000 77.71000

75% 8.085000 82.32000

max 8.990000 86.99000

plt.scatter(x =df.study_hours, y = df.student_marks)

plt.xlabel("Students Study Hours")

plt.ylabel("Students marks")

plt.title("Scatter Plot of Students Study Hours vs Students marks")

plt.show()#plotting graph

df.isnull().sum()

study_hours 5

student_marks 0

dtype: int64

df.mean()#finding mean of the values

study_hours 6.995949

student_marks 77.933750

dtype: float64

df2 = df.fillna(df.mean())

df2.isnull().sum()#filling the null values

study_hours 0

student_marks 0

dtype: int64

X = df2.drop("student_marks", axis = "columns")

y = df2.drop("study_hours", axis = "columns")

print("shape of X = ", X.shape)

print("shape of y = ", y.shape)#knowing the shape of the data

shape of X = (200, 1)

shape of y = (200, 1)

from sklearn.model_selection import train_test_split

X_train, X_test,y_train,y_test = train_test_split(X,y, test_size = 0.2, random_state=51)

print("shape of X_train = ", X_train.shape)

print("shape of y_train = ", y_train.shape)

print("shape of X_test = ", X_test.shape)

print("shape of y_test = ", y_test.shape) #taking training data set to test

shape of X_train = (160, 1)

shape of y_train = (160, 1)

shape of X_test = (40, 1)

shape of y_test = (40, 1)

# y = m * x + c

from sklearn.linear_model import LinearRegression

lr = LinearRegression()

lr.fit(X_train,y_train)#linear regression

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

lr.coef_

array([[3.93571802]])

y_pred = lr.predict(X_test)

y_pred

lr.intercept_

array([50.44735504])

pd.DataFrame(np.c_[X_test, y_test, y_pred], columns = ["study_hours", "student_marks_original","student_marks_predicted"])

study_hours student_marks_original student_marks_predicted

0 8.300000 82.02 83.113815

1 7.230000 77.55 78.902596

2 8.670000 84.19 84.570030

3 8.990000 85.46 85.829460

4 8.710000 84.03 84.727459

5 7.700000 80.81 80.752384

6 5.690000 73.61 72.841591

7 5.390000 70.90 71.660875

8 5.790000 73.14 73.235162

9 5.390000 73.02 71.660875

10 5.850000 75.02 73.471305

11 6.590000 75.37 76.383737

12 5.790000 74.44 73.235162

13 5.880000 73.40 73.589377

14 8.260000 81.70 82.956386

15 5.070000 69.27 70.401445

16 5.790000 73.64 73.235162

17 7.190000 77.63 78.745168

18 6.380000 77.01 75.557236

19 8.190000 83.08 82.680886

20 6.660000 76.63 76.659237

21 5.090000 72.22 70.480160

22 6.180000 72.96 74.770092

23 6.995949 76.14 77.981436

24 8.930000 85.96 85.593317

25 8.160000 83.36 82.562814

26 6.600000 78.05 76.423094

27 8.790000 84.60 85.042316

28 7.100000 76.76 78.390953

29 7.860000 81.24 81.382099

30 7.950000 80.86 81.736313

31 8.310000 82.69 83.153172

32 8.070000 82.30 82.208599

33 7.790000 79.17 81.106598

34 5.880000 73.34 73.589377

35 5.260000 71.86 71.149232

36 5.450000 70.06 71.897018

37 7.900000 80.76 81.539527

38 5.630000 72.87 72.605447

39 5.460000 71.10 71.936375

lr.score(X_test,y_test) #Accuracy

0.9514124242154464

plt.scatter(X_train,y_train)

plt.xlabel("Students Study Hours")

plt.ylabel("Students marks")

plt.title("Scatter Plot of Students Study Hours vs Students marks")

plt.show()

plt.scatter(X_test, y_test)

plt.plot(X_train, lr.predict(X_train), color = "r")

plt.xlabel("Students Study Hours")

plt.ylabel("Students marks")

plt.title("Scatter Plot of Students Study Hours vs Students marks")

Text(0.5, 1.0, 'Scatter Plot of Students Study Hours vs Students marks')

import joblib

joblib.dump(lr, "student_mark_predictor.pkl")

['student_mark_predictor.pkl']

model = joblib.load("student_mark_predictor.pkl")

model.predict([[0]])[0][0] #Just give the hours studied in the model...it will give the marks pridicted

50.44735503694244

Flask Code for Consuming our Model :

# -*- coding: utf-8 -*-

import numpy as np

import pandas as pd

from flask import Flask, request, render_template

import joblib

app = Flask(__name__)

model = joblib.load("student_mark_predictor.pkl")

df = pd.DataFrame()

@app.route('/')

def home():

return render_template('index.html')

@app.route('/predict',methods=['POST'])

def predict():

global df

input_features = [int(x) for x in request.form.values()]

features_value = np.array(input_features)

#validate input hours

if input_features[0] <0 or input_features[0] >12:

return render_template('index.html', prediction_text='Please enter valid hours between 1 to 12')

output = model.predict([features_value])[0][0].round(2)

# input and predicted value store in df then save in csv file

df= pd.concat([df,pd.DataFrame({'Study Hours':input_features,'Predicted Output':[output]})],ignore_index=True)

print(df)

df.to_csv('smp_data_from_app.csv')

return render_template('index.html', prediction_text='You will get {}% marks, when you do study {} hours per day '.format(output, int(features_value[0])))

if __name__ == "__main__":

app.run(host='localhost', port=8080)

Code with Ayush

Search This Blog

Students Marks Predictor

Comments

Post a Comment