import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


df_it = pd.read_csv('IT_Normalized.csv')
df_ce = pd.read_csv('CE_Normalized.csv')
df_etc = pd.read_csv('ETC_Normalized.csv')


fig, axis = plt.subplots(1, 3, figsize = (18, 5))
# IT
axis[0].scatter(df_it['SEM_3_SGPA'], df_it['SEM_4_SGPA'], color = "blue")
axis[0].set_title("IT")
axis[0].set_xlim([0.5, 1])
axis[0].set_ylim([0, 1])
axis[0].set_xlabel("SEM 3")
axis[0].set_ylabel("SEM 4")
# CE
axis[1].scatter(df_ce['SEM_3_SGPA'], df_ce['SEM_4_SGPA'], color = "green")
axis[1].set_title("CE")
axis[1].set_xlim([0.5, 1])
axis[1].set_ylim([0, 1])
axis[1].set_xlabel("SEM 3")
axis[1].set_ylabel("SEM 4")
# ETC
axis[2].scatter(df_etc['SEM_3_SGPA'], df_etc['SEM_4_SGPA'], color = "red")
axis[2].set_title("ETC")
axis[2].set_xlim([0.5, 1])
axis[2].set_ylim([0, 1])
axis[2].set_xlabel("SEM 3")
axis[2].set_ylabel("SEM 4")

plt.suptitle("Marks Obtained by students in Semester 3 vs Semester 4", fontsize = 14)
plt.show()


fig, axis = plt.subplots(1, 1, figsize=(8,8))
axis.scatter(df_it['SEM_3_SGPA'], df_it['SEM_4_SGPA'], color = "blue", label = "IT")
axis.scatter(df_ce['SEM_3_SGPA'], df_ce['SEM_4_SGPA'], color = "green", label = "CE")
axis.scatter(df_etc['SEM_3_SGPA'], df_etc['SEM_4_SGPA'], color = "red", label = "ETC")
axis.set_title("Combined")
axis.set_xlabel("SEM 3")
axis.set_ylabel("SEM 4")
axis.legend()
plt.show()


fig, axis = plt.subplots(1, 3, figsize = (18, 5))
# IT
axis[0].scatter(df_it['SEM_3_INTERNAL'], df_it['SEM_4_INTERNAL'], color = "blue")
axis[0].set_title("IT")
axis[0].set_xlim([0, 1])
axis[0].set_ylim([0, 1])
axis[0].set_xlabel("SEM 3")
axis[0].set_ylabel("SEM 4")
# CE
axis[1].scatter(df_ce['SEM_3_INTERNAL'], df_ce['SEM_4_INTERNAL'], color = "green")
axis[1].set_title("CE")
axis[1].set_xlim([0, 1])
axis[1].set_ylim([0, 1])
axis[1].set_xlabel("SEM 3")
axis[1].set_ylabel("SEM 4")
# ETC
axis[2].scatter(df_etc['SEM_3_INTERNAL'], df_etc['SEM_4_INTERNAL'], color = "red")
axis[2].set_title("ETC")
axis[2].set_xlim([0, 1])
axis[2].set_ylim([0, 1])
axis[2].set_xlabel("SEM 3")
axis[2].set_ylabel("SEM 4")

plt.suptitle("Internal marks obtained by students in semister 3 vs semister 4", fontsize = 14)
plt.show()


fig, axis = plt.subplots(1, 3, figsize = (18, 5))
# IT
axis[0].scatter(df_it['SEM_4_INTERNAL'], df_it['SEM_4_EXTERNAL'], color = "blue")
axis[0].set_title("IT")
axis[0].set_xlim([0, 1])
axis[0].set_ylim([0, 1])
axis[0].set_xlabel("SEM 4 Internal")
axis[0].set_ylabel("SEM 4 External")
# CE
axis[1].scatter(df_ce['SEM_4_INTERNAL'], df_ce['SEM_4_EXTERNAL'], color = "green")
axis[1].set_title("CE")
axis[1].set_xlim([0, 1])
axis[1].set_ylim([0, 1])
axis[1].set_xlabel("SEM 4 Internal")
axis[1].set_ylabel("SEM 4 External")
# ETC
axis[2].scatter(df_etc['SEM_4_INTERNAL'], df_etc['SEM_4_EXTERNAL'], color = "red")
axis[2].set_title("ETC")
axis[2].set_xlim([0, 1])
axis[2].set_ylim([0, 1])
axis[2].set_xlabel("SEM 4 Internal")
axis[2].set_ylabel("SEM 4 External")

plt.suptitle("Internal marks obtained by students vs External marks in semister 4", fontsize = 14)
plt.show()


def countStundents(df, nbins, subnames):
    diff = 1 / nbins
    tmp = pd.DataFrame(index = subnames, columns = [f"{x}, {x +diff}" for x in filters])

    for i in range(len(filters)):
        for j in range(len(subnames)):
            f1 = df[subnames[j]] >= filters[i] 

            if(i == len(filters) -1):
                total = df[subnames[j]].where(f1)
            else:
                f2 = df[subnames[j]] < (filters[i] +diff)
                total = df[subnames[j]].where(f1 & f2)

            total = total.count()
            tmp.iloc[j, i] = total
            
    return tmp


def displayPieChart(df, row, col, filters, subnames, title):
    fig, axis = plt.subplots(row, col, figsize=(col * 9, row * 6))
    cur = 0
    
    for i in range(row):
        for j in range(col):
            series = tmp.iloc[:, cur].sort_values()
            series = series[series != 0]
            if(len(series) == 0):
                axis[i][j].set_axis_off()
                cur += 1
                continue

            axis[i][j].pie(series, labels = series.index, autopct='%1.1f%%')
            axis[i][j].set_title(f'Range {filters[cur] *100}% to {(filters[cur] +diff) *100}%')
            cur += 1

            if(cur == nbins):
                break

    axis[row-1][col-1].set_axis_off()
    plt.suptitle(title, fontsize = 18)
    plt.show()


# IT
# Counting number of students in Ranges
nbins = 5
diff = 1 /nbins
filters = np.arange(0, 1, diff)
subnames = ['ENGINEERING MATHEMATICS-III','DATABASE MANAGEMENT SYSTEM','PROCESSOR ARCHITECTURE','COMPUTER GRAPHICS','SOFTWARE ENGINEERING']
tmp = countStundents(df_it, nbins, subnames)

# Displaying
displayPieChart(tmp, 3, 2, filters, subnames, "Count of Students getting marks in ranges - IT")


# CE
# Counting number of students in Ranges
nbins = 5
diff = 1 / nbins
filters = np.arange(0, 1, diff)
subnames = ['ENGINEERING MATHEMATICS III','DATA STRUCTURES & ALGO.','SOFTWARE ENGINEERING','MICROPROCESSOR','PRINCIPLES OF PROG. LANG.']
tmp = countStundents(df_ce, nbins, subnames)

# Displaying
displayPieChart(tmp, 3, 2, filters, subnames, "Count of Students getting marks in ranges - CE")


# ETC
# Counting number of students in Ranges
nbins = 5
diff = 1 / nbins
filters = np.arange(0, 1, diff)
subnames = ['ENGINEERING MATHEMATICS III','SIGNALS & SYSTEMS','CONTROL SYSTEMS','PRINCIPLES OF COMMU. SYSTEMS','OBJECT ORIENTED PROGRAMMING']
tmp = countStundents(df_etc, nbins, subnames)

# Displaying
displayPieChart(tmp, 3, 2, filters, subnames, "Count of Students getting marks in ranges - ETC")


from sklearn.cluster import KMeans


# IT

fig, axis = plt.subplots(3, 1, figsize=(9, 6 * 3))

X = df_it[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_1 = [list(sse.keys()), list(sse.values())]


X = df_it[['SEM_3_SGPA', 'SEM_4_SGPA']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_2 = [list(sse.keys()), list(sse.values())]


X = df_it[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_3 = [list(sse.keys()), list(sse.values())]

axis[0].set_xlim(0, 10)
axis[0].set_ylim(0, 10)
axis[0].set_xlabel('Sem 3 Internal')
axis[0].set_ylabel('Sem 4 Internal')
axis[0].set_title("Sem 3 internal vs Sem 4 internal", fontsize = 14)
axis[0].plot(sse_list_1[0], sse_list_1[1])

axis[1].set_xlim(0, 10)
axis[1].set_ylim(0, 10)
axis[1].set_xlabel('Sem 3')
axis[1].set_ylabel('Sem 4')
axis[1].set_title("Sem 3 vs Sem 4", fontsize = 14)
axis[1].plot(sse_list_2[0], sse_list_2[1])

axis[2].set_xlim(0, 10)
axis[2].set_ylim(0, 10)
axis[2].set_xlabel('Sem 4 Internal')
axis[2].set_ylabel('Sem 4 External')
axis[2].set_title("Sem 4 internal vs Sem 4 External", fontsize = 14)
axis[2].plot(sse_list_3[0], sse_list_3[1])

plt.suptitle('SSE of IT on range of 10 clusters using KMeans', fontsize = 18)
plt.show()


# ETC

fig, axis = plt.subplots(3, 1, figsize=(9, 6 * 3))

X = df_etc[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_1 = [list(sse.keys()), list(sse.values())]


X = df_etc[['SEM_3_SGPA', 'SEM_4_SGPA']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_2 = [list(sse.keys()), list(sse.values())]


X = df_etc[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_3 = [list(sse.keys()), list(sse.values())]

axis[0].set_xlim(0, 10)
axis[0].set_ylim(0, 10)
axis[0].set_xlabel('Sem 3 Internal')
axis[0].set_ylabel('Sem 4 Internal')
axis[0].set_title("Sem 3 internal vs Sem 4 internal", fontsize = 14)
axis[0].plot(sse_list_1[0], sse_list_1[1])

axis[1].set_xlim(0, 10)
axis[1].set_ylim(0, 10)
axis[1].set_xlabel('Sem 3')
axis[1].set_ylabel('Sem 4')
axis[1].set_title("Sem 3 vs Sem 4", fontsize = 14)
axis[1].plot(sse_list_2[0], sse_list_2[1])

axis[2].set_xlim(0, 10)
axis[2].set_ylim(0, 10)
axis[2].set_xlabel('Sem 4 Internal')
axis[2].set_ylabel('Sem 4 External')
axis[2].set_title("Sem 4 internal vs Sem 4 External", fontsize = 14)
axis[2].plot(sse_list_3[0], sse_list_3[1])

plt.suptitle('SSE of ETC on range of 10 clusters using KMeans', fontsize = 18)
plt.show()


# CE

fig, axis = plt.subplots(3, 1, figsize=(9, 6 * 3))

X = df_ce[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_1 = [list(sse.keys()), list(sse.values())]


X = df_ce[['SEM_3_SGPA', 'SEM_4_SGPA']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_2 = [list(sse.keys()), list(sse.values())]


X = df_ce[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
sse = {}
for k in range(1, 10):
    model = KMeans(n_clusters=k, max_iter=50)
    model.fit(X)
    sse[k] = model.inertia_
    
sse_list_3 = [list(sse.keys()), list(sse.values())]

axis[0].set_xlim(0, 10)
axis[0].set_ylim(0, 10)
axis[0].set_xlabel('Sem 3 Internal')
axis[0].set_ylabel('Sem 4 Internal')
axis[0].set_title("Sem 3 internal vs Sem 4 internal", fontsize = 14)
axis[0].plot(sse_list_1[0], sse_list_1[1])

axis[1].set_xlim(0, 10)
axis[1].set_ylim(0, 10)
axis[1].set_xlabel('Sem 3')
axis[1].set_ylabel('Sem 4')
axis[1].set_title("Sem 3 vs Sem 4", fontsize = 14)
axis[1].plot(sse_list_2[0], sse_list_2[1])

axis[2].set_xlim(0, 10)
axis[2].set_ylim(0, 10)
axis[2].set_xlabel('Sem 4 Internal')
axis[2].set_ylabel('Sem 4 External')
axis[2].set_title("Sem 4 internal vs Sem 4 External", fontsize = 14)
axis[2].plot(sse_list_3[0], sse_list_3[1])

plt.suptitle('SSE of CE on range of 10 clusters using KMeans', fontsize = 18)
plt.show()


def to_color(clusters):
    LABEL_COLOR_MAP = {
                        0 : 'r',
                        1 : 'g',
                        2 : 'b',
                        3 : 'y',
                        4 : 'v',
                      }
    label_color = [LABEL_COLOR_MAP[c] for c in clusters]
    return label_color


elementCluster = {}
def count_elements(labels, n):
    count = {}
    count['r'] = labels.tolist().count(0)
    count['g'] = labels.tolist().count(1)
    if(n == 3):
        count['b'] = labels.tolist().count(2)
    return count


# Sem 3 Internal vs Sem 4 Internal

count = {}
row = 1
col = 3
num_clusters = 2

fig, axis = plt.subplots(row, col, figsize=(9 * col, 6 * row))

X1 = df_it[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X1)
clusters1 = model.predict(X1)
count['IT'] = count_elements(model.labels_, num_clusters)

X2 = df_ce[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X2)
clusters2 = model.predict(X2)
count['CE'] = count_elements(model.labels_, num_clusters)

X3 = df_etc[['SEM_3_INTERNAL', 'SEM_4_INTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X3)
clusters3 = model.predict(X3)
count['ETC'] = count_elements(model.labels_, num_clusters)

axis[0].set_xlim(0, 1)
axis[0].set_ylim(0, 1)
axis[0].set_xlabel('Sem 3 Internal')
axis[0].set_ylabel('Sem 4 Internal')
axis[0].set_title("IT", fontsize = 14)
axis[0].scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=to_color(clusters1))

axis[1].set_xlim(0, 1)
axis[1].set_ylim(0, 1)
axis[1].set_xlabel('Sem 3 Internal')
axis[1].set_ylabel('Sem 4 Internal')
axis[1].set_title("CE", fontsize = 14)
axis[1].scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=to_color(clusters2))

axis[2].set_xlim(0, 1)
axis[2].set_ylim(0, 1)
axis[2].set_xlabel('Sem 3 Internal')
axis[2].set_ylabel('Sem 4 Internal')
axis[2].set_title("ETC", fontsize = 14)
axis[2].scatter(X3.iloc[:, 0], X3.iloc[:, 1], c=to_color(clusters3))

plt.suptitle('KMeans on Semister 3 Internal vs Semister 4 Internal Marks', fontsize = 18)
plt.show()
elementCluster['Q1'] = count


# Sem 3 vs Sem 4

count = {}
row = 1
col = 3
num_clusters = 2

fig, axis = plt.subplots(row, col, figsize=(9 * col, 6 * row))

X1 = df_it[['SEM_3_SGPA', 'SEM_4_SGPA']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X1)
clusters1 = model.predict(X1)
count['IT'] = count_elements(model.labels_, num_clusters)

X2 = df_ce[['SEM_3_SGPA', 'SEM_4_SGPA']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X2)
clusters2 = model.predict(X2)
count['CE'] = count_elements(model.labels_, num_clusters)

X3 = df_etc[['SEM_3_SGPA', 'SEM_4_SGPA']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X3)
clusters3 = model.predict(X3)
count['ETC'] = count_elements(model.labels_, num_clusters)

axis[0].set_xlim(0, 1)
axis[0].set_ylim(0, 1)
axis[0].set_xlabel('Sem 3')
axis[0].set_ylabel('Sem 4')
axis[0].set_title("IT", fontsize = 14)
axis[0].scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=to_color(clusters1))

axis[1].set_xlim(0, 1)
axis[1].set_ylim(0, 1)
axis[1].set_xlabel('Sem 3')
axis[1].set_ylabel('Sem 4')
axis[1].set_title("CE", fontsize = 14)
axis[1].scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=to_color(clusters2))

axis[2].set_xlim(0, 1)
axis[2].set_ylim(0, 1)
axis[2].set_xlabel('Sem 3')
axis[2].set_ylabel('Sem 4')
axis[2].set_title("ETC", fontsize = 14)
axis[2].scatter(X3.iloc[:, 0], X3.iloc[:, 1], c=to_color(clusters3))

plt.suptitle('KMeans on Semister 3 vs Semister 4 Marks', fontsize = 18)
plt.show()
elementCluster['Q2'] = count


# Sem 4 Internal vs Sem 4 External

count = {}
row = 1
col = 3
num_clusters = 3

fig, axis = plt.subplots(row, col, figsize=(9 * col, 6 * row))

X1 = df_it[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X1)
clusters1 = model.predict(X1)
count['IT'] = count_elements(model.labels_, num_clusters)

X2 = df_ce[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X2)
clusters2 = model.predict(X2)
count['CE'] = count_elements(model.labels_, num_clusters)

X3 = df_etc[['SEM_4_INTERNAL', 'SEM_4_EXTERNAL']]
model = KMeans(n_clusters=num_clusters, max_iter=50)
model.fit(X3)
clusters3 = model.predict(X3)
count['ETC'] = count_elements(model.labels_, num_clusters)

axis[0].set_xlim(0, 1)
axis[0].set_ylim(0, 1)
axis[0].set_xlabel('Sem 4 Internal')
axis[0].set_ylabel('Sem 4 External')
axis[0].set_title("IT", fontsize = 14)
axis[0].scatter(X1.iloc[:, 0], X1.iloc[:, 1], c=to_color(clusters1))

axis[1].set_xlim(0, 1)
axis[1].set_ylim(0, 1)
axis[1].set_xlabel('Sem 4 Internal')
axis[1].set_ylabel('Sem 4 External')
axis[1].set_title("CE", fontsize = 14)
axis[1].scatter(X2.iloc[:, 0], X2.iloc[:, 1], c=to_color(clusters2))

axis[2].set_xlim(0, 1)
axis[2].set_ylim(0, 1)
axis[2].set_xlabel('Sem 4 Internal')
axis[2].set_ylabel('Sem 4 External')
axis[2].set_title("ETC", fontsize = 14)
axis[2].scatter(X3.iloc[:, 0], X3.iloc[:, 1], c=to_color(clusters3))

plt.suptitle('KMeans on Semister 4 internal vs Semister 4 External Marks', fontsize = 18)
plt.show()
elementCluster['Q3'] = count


row = 1
col = 2

fig, axis = plt.subplots(row, col, figsize = (9 * col, 6 * row))
tmp = pd.Series([elementCluster['Q1']['IT']['g'], elementCluster['Q1']['CE']['r'], elementCluster['Q1']['ETC']['r']], index = ('IT', 'CE', 'ETC'))
axis[0].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[0].legend()
axis[0].set_title('Students Scoring Best')

tmp = pd.Series([elementCluster['Q1']['IT']['r'], elementCluster['Q1']['CE']['g'], elementCluster['Q1']['ETC']['g']], index = ('IT', 'CE', 'ETC'))
axis[1].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[1].legend()
axis[1].set_title('Students Scoring Average')

plt.suptitle('Semister 3 Internal vs Semister 4 Internal', fontsize = 18)
plt.show()


row = 1
col = 2

fig, axis = plt.subplots(row, col, figsize = (9 * col, 6 * row))
tmp = pd.Series([elementCluster['Q2']['IT']['g'], elementCluster['Q2']['CE']['g'], elementCluster['Q2']['ETC']['r']], index = ('IT', 'CE', 'ETC'))
axis[0].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[0].legend()
axis[0].set_title('Students Scoring Best')

tmp = pd.Series([elementCluster['Q2']['IT']['r'], elementCluster['Q2']['CE']['r'], elementCluster['Q2']['ETC']['g']], index = ('IT', 'CE', 'ETC'))
axis[1].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[1].legend()
axis[1].set_title('Students Scoring Average')

plt.suptitle('Semister 3 vs Semister 4', fontsize = 18)
plt.show()


row = 2
col = 2

fig, axis = plt.subplots(row, col, figsize = (9 * col, 6 * row))
tmp = pd.Series([elementCluster['Q3']['IT']['r'], elementCluster['Q3']['CE']['g'], elementCluster['Q3']['ETC']['r']], index = ('IT', 'CE', 'ETC'))
axis[0][0].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[0][0].legend()
axis[0][0].set_title('Students Scoring Best')

tmp = pd.Series([elementCluster['Q3']['IT']['g'], elementCluster['Q3']['CE']['b'], elementCluster['Q3']['ETC']['b']], index = ('IT', 'CE', 'ETC'))
axis[0][1].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[0][1].legend()
axis[0][1].set_title('Students Scoring Average')

tmp = pd.Series([elementCluster['Q3']['IT']['b'], elementCluster['Q3']['CE']['r'], elementCluster['Q3']['ETC']['g']], index = ('IT', 'CE', 'ETC'))
axis[1][0].pie(tmp, labels = tmp.index, autopct='%1.1f%%')
axis[1][0].legend()
axis[1][0].set_title('Students Scoring Less')

axis[1][1].set_axis_off()

plt.suptitle('Semister 4 Internal vs Semister 4 External', fontsize = 18)
plt.show()


fig, axis = plt.subplots(1, 1, figsize = (9 * 3, 6))
plt.plot(df_it['SEM_4_INTERNAL'], c = 'gold', label = 'IT')
plt.plot(df_ce['SEM_4_INTERNAL'], c = 'blue', label = 'CE')
plt.plot(df_etc['SEM_4_INTERNAL'], c = 'green', label = 'ETC')
plt.legend()

plt.show()


fig, axis = plt.subplots(1, 1, figsize = (9 * 3, 6))
plt.axhline(df_it['SEM_4_INTERNAL'].mean(), c = 'gold', label = 'IT')
plt.axhline(df_ce['SEM_4_INTERNAL'].mean(), c = 'blue', label = 'CE')
plt.axhline(df_etc['SEM_4_INTERNAL'].mean(), c = 'green', label = 'ETC')
plt.xlim(0, 1)
plt.ylim(0.80, 0.90)
plt.legend()

plt.show()

Importing Required Modules and importing Normalized Dataset

Visualizing the DataSets

Scoring Subjects

Finding out the Number of clusters to be formed

Q1 : How Students Scored in Semister 3 Internal and Semister 4 Internal?

Q2 : How Students Scored in Overall Semister 3 and Semister 4?

Q3 : How Students Performed in Semister 4?

Q4 : Which branch gives Averagely most internal marks?