import numpy as nm #importing required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
df = pd.read_csv("CE_SE_2021.csv") #importing comp engg. dept dataset
df.head() #showing first 5 rows in the dataset
DISCRETE MATHEMATICS | FUND. OF DATA STRUCTURES | OBJECT ORIENTED PROGRAMMING | COMPUTER GRAPHICS | DIGITAL ELEC. & LOGIC DESIGN | DATA STUCTURES LABORATORY | OOP & COMP. GRAPHICS LAB. | DIGITAL ELEC. LABORATORY | BUSINESS COMMUNICATION SKILLS | HUMANITY & SOCIAL SCIENCE | ... | MICROPROCESSOR LABORATORY | PROJECT BASED LEARNING II | CODE OF CONDUCT | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30 | 30 | 30 | 30 | 30 | 18 | 16 | 10 | 8 | 9 | ... | 6 | 18 | 9 | 0 | 211.0 | 9.591 | 124.0 | 5.636 | 39 | 0.00 |
1 | 30 | 30 | 30 | 30 | 30 | 18 | 18 | 9 | 9 | 7 | ... | 9 | 20 | 9 | 0 | 211.0 | 9.591 | 197.0 | 8.955 | 44 | 9.27 |
2 | 30 | 30 | 30 | 30 | 30 | 20 | 20 | 10 | 10 | 10 | ... | 10 | 20 | 9 | 0 | 220.0 | 10.000 | 178.0 | 8.091 | 44 | 9.05 |
3 | 30 | 30 | 30 | 30 | 30 | 16 | 16 | 9 | 9 | 8 | ... | 0 | 18 | 7 | 0 | 208.0 | 9.455 | 131.0 | 5.955 | 41 | 0.00 |
4 | 30 | 30 | 30 | 30 | 30 | 18 | 20 | 9 | 9 | 9 | ... | 9 | 18 | 10 | 0 | 215.0 | 9.773 | 176.0 | 8.000 | 44 | 8.89 |
5 rows × 28 columns
df.tail() #showing the last 5 values in the dataset
DISCRETE MATHEMATICS | FUND. OF DATA STRUCTURES | OBJECT ORIENTED PROGRAMMING | COMPUTER GRAPHICS | DIGITAL ELEC. & LOGIC DESIGN | DATA STUCTURES LABORATORY | OOP & COMP. GRAPHICS LAB. | DIGITAL ELEC. LABORATORY | BUSINESS COMMUNICATION SKILLS | HUMANITY & SOCIAL SCIENCE | ... | MICROPROCESSOR LABORATORY | PROJECT BASED LEARNING II | CODE OF CONDUCT | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
328 | 30 | 30 | 30 | 30 | 30 | 18 | 20 | 9 | 9 | 9 | ... | 9 | 20 | 10 | 0 | 215.0 | 9.773 | 184.0 | 8.364 | 44 | 9.07 |
329 | 30 | 30 | 27 | 30 | 30 | 18 | 18 | 9 | 9 | 9 | ... | 8 | 20 | 9 | 0 | 210.0 | 9.545 | 181.0 | 8.227 | 44 | 8.89 |
330 | 27 | 30 | 24 | 24 | 30 | 18 | 16 | 9 | 9 | 9 | ... | 7 | 18 | 10 | 0 | 196.0 | 8.909 | 57.0 | 2.591 | 29 | 0.00 |
331 | 30 | 30 | 30 | 27 | 30 | 18 | 20 | 10 | 10 | 9 | ... | 9 | 18 | 10 | 0 | 214.0 | 9.727 | 189.0 | 8.591 | 44 | 9.16 |
332 | 30 | 30 | 30 | 27 | 30 | 18 | 16 | 9 | 9 | 9 | ... | 8 | 18 | 8 | 0 | 208.0 | 9.455 | 151.0 | 6.864 | 44 | 8.16 |
5 rows × 28 columns
df.shape #printing number of rows and columns
(333, 28)
df.isnull().sum() #printing the number of null values in each column
DISCRETE MATHEMATICS 0 FUND. OF DATA STRUCTURES 0 OBJECT ORIENTED PROGRAMMING 0 COMPUTER GRAPHICS 0 DIGITAL ELEC. & LOGIC DESIGN 0 DATA STUCTURES LABORATORY 0 OOP & COMP. GRAPHICS LAB. 0 DIGITAL ELEC. LABORATORY 0 BUSINESS COMMUNICATION SKILLS 0 HUMANITY & SOCIAL SCIENCE 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS III 0 ENGINEERING MATHEMATICS III.1 0 DATA STRUCTURES & ALGO. 0 SOFTWARE ENGINEERING 0 MICROPROCESSOR 0 PRINCIPLES OF PROG. LANG. 0 DATA STRUCTURES & ALGO. LAB. 0 MICROPROCESSOR LABORATORY 0 PROJECT BASED LEARNING II 0 CODE OF CONDUCT 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values: 0
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'> RangeIndex: 333 entries, 0 to 332 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 DISCRETE MATHEMATICS 333 non-null int64 1 FUND. OF DATA STRUCTURES 333 non-null int64 2 OBJECT ORIENTED PROGRAMMING 333 non-null int64 3 COMPUTER GRAPHICS 333 non-null int64 4 DIGITAL ELEC. & LOGIC DESIGN 333 non-null int64 5 DATA STUCTURES LABORATORY 333 non-null int64 6 OOP & COMP. GRAPHICS LAB. 333 non-null int64 7 DIGITAL ELEC. LABORATORY 333 non-null int64 8 BUSINESS COMMUNICATION SKILLS 333 non-null int64 9 HUMANITY & SOCIAL SCIENCE 333 non-null int64 10 AUDIT COURSE 333 non-null int64 11 ENGINEERING MATHEMATICS III 333 non-null int64 12 ENGINEERING MATHEMATICS III.1 333 non-null int64 13 DATA STRUCTURES & ALGO. 333 non-null int64 14 SOFTWARE ENGINEERING 333 non-null int64 15 MICROPROCESSOR 333 non-null int64 16 PRINCIPLES OF PROG. LANG. 333 non-null int64 17 DATA STRUCTURES & ALGO. LAB. 333 non-null int64 18 MICROPROCESSOR LABORATORY 333 non-null int64 19 PROJECT BASED LEARNING II 333 non-null int64 20 CODE OF CONDUCT 333 non-null int64 21 AUDIT COURSE.1 333 non-null int64 22 SEM_3_GPA 333 non-null float64 23 SEM_3_SGPA 333 non-null float64 24 SEM_4_GPA 333 non-null float64 25 SEM_4_SGPA 333 non-null float64 26 TOTAL_CREDITS_EARNED 333 non-null int64 27 CGPA 333 non-null float64 dtypes: float64(5), int64(23) memory usage: 73.0 KB
df.columns #printing all the columns in the dataset
Index(['DISCRETE MATHEMATICS', 'FUND. OF DATA STRUCTURES', 'OBJECT ORIENTED PROGRAMMING', 'COMPUTER GRAPHICS', 'DIGITAL ELEC. & LOGIC DESIGN', 'DATA STUCTURES LABORATORY', 'OOP & COMP. GRAPHICS LAB.', 'DIGITAL ELEC. LABORATORY', 'BUSINESS COMMUNICATION SKILLS', 'HUMANITY & SOCIAL SCIENCE', 'AUDIT COURSE', 'ENGINEERING MATHEMATICS III', 'ENGINEERING MATHEMATICS III.1', 'DATA STRUCTURES & ALGO.', 'SOFTWARE ENGINEERING', 'MICROPROCESSOR', 'PRINCIPLES OF PROG. LANG.', 'DATA STRUCTURES & ALGO. LAB.', 'MICROPROCESSOR LABORATORY', 'PROJECT BASED LEARNING II', 'CODE OF CONDUCT', 'AUDIT COURSE.1', 'SEM_3_GPA', 'SEM_3_SGPA', 'SEM_4_GPA', 'SEM_4_SGPA', 'TOTAL_CREDITS_EARNED', 'CGPA'], dtype='object')
df.dtypes #printing the data types of each column
DISCRETE MATHEMATICS int64 FUND. OF DATA STRUCTURES int64 OBJECT ORIENTED PROGRAMMING int64 COMPUTER GRAPHICS int64 DIGITAL ELEC. & LOGIC DESIGN int64 DATA STUCTURES LABORATORY int64 OOP & COMP. GRAPHICS LAB. int64 DIGITAL ELEC. LABORATORY int64 BUSINESS COMMUNICATION SKILLS int64 HUMANITY & SOCIAL SCIENCE int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS III int64 ENGINEERING MATHEMATICS III.1 int64 DATA STRUCTURES & ALGO. int64 SOFTWARE ENGINEERING int64 MICROPROCESSOR int64 PRINCIPLES OF PROG. LANG. int64 DATA STRUCTURES & ALGO. LAB. int64 MICROPROCESSOR LABORATORY int64 PROJECT BASED LEARNING II int64 CODE OF CONDUCT int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
df.dtypes #printing the data types of each column
DISCRETE MATHEMATICS int64 FUND. OF DATA STRUCTURES int64 OBJECT ORIENTED PROGRAMMING int64 COMPUTER GRAPHICS int64 DIGITAL ELEC. & LOGIC DESIGN int64 DATA STUCTURES LABORATORY int64 OOP & COMP. GRAPHICS LAB. int64 DIGITAL ELEC. LABORATORY int64 BUSINESS COMMUNICATION SKILLS int64 HUMANITY & SOCIAL SCIENCE int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS III int64 ENGINEERING MATHEMATICS III.1 int64 DATA STRUCTURES & ALGO. int64 SOFTWARE ENGINEERING int64 MICROPROCESSOR int64 PRINCIPLES OF PROG. LANG. int64 DATA STRUCTURES & ALGO. LAB. int64 MICROPROCESSOR LABORATORY int64 PROJECT BASED LEARNING II int64 CODE OF CONDUCT int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
SEM_3_SGPA | SEM_4_SGPA | CGPA | |
---|---|---|---|
0 | 9.591 | 5.636 | 0.00 |
1 | 9.591 | 8.955 | 9.27 |
2 | 10.000 | 8.091 | 9.05 |
3 | 9.455 | 5.955 | 0.00 |
4 | 9.773 | 8.000 | 8.89 |
... | ... | ... | ... |
328 | 9.773 | 8.364 | 9.07 |
329 | 9.545 | 8.227 | 8.89 |
330 | 8.909 | 2.591 | 0.00 |
331 | 9.727 | 8.591 | 9.16 |
332 | 9.455 | 6.864 | 8.16 |
333 rows × 3 columns
df.head() #showing the first 5 values in the dataset
DISCRETE MATHEMATICS | FUND. OF DATA STRUCTURES | OBJECT ORIENTED PROGRAMMING | COMPUTER GRAPHICS | DIGITAL ELEC. & LOGIC DESIGN | DATA STUCTURES LABORATORY | OOP & COMP. GRAPHICS LAB. | DIGITAL ELEC. LABORATORY | BUSINESS COMMUNICATION SKILLS | HUMANITY & SOCIAL SCIENCE | ... | MICROPROCESSOR LABORATORY | PROJECT BASED LEARNING II | CODE OF CONDUCT | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30 | 30 | 30 | 30 | 30 | 18 | 16 | 10 | 8 | 9 | ... | 6 | 18 | 9 | 0 | 211.0 | 9.591 | 124.0 | 5.636 | 39 | 0.00 |
1 | 30 | 30 | 30 | 30 | 30 | 18 | 18 | 9 | 9 | 7 | ... | 9 | 20 | 9 | 0 | 211.0 | 9.591 | 197.0 | 8.955 | 44 | 9.27 |
2 | 30 | 30 | 30 | 30 | 30 | 20 | 20 | 10 | 10 | 10 | ... | 10 | 20 | 9 | 0 | 220.0 | 10.000 | 178.0 | 8.091 | 44 | 9.05 |
3 | 30 | 30 | 30 | 30 | 30 | 16 | 16 | 9 | 9 | 8 | ... | 0 | 18 | 7 | 0 | 208.0 | 9.455 | 131.0 | 5.955 | 41 | 0.00 |
4 | 30 | 30 | 30 | 30 | 30 | 18 | 20 | 9 | 9 | 9 | ... | 9 | 18 | 10 | 0 | 215.0 | 9.773 | 176.0 | 8.000 | 44 | 8.89 |
5 rows × 28 columns
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0) #replacing null values by 0
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
df['ENGINEERING MATHEMATICS III'] = df['ENGINEERING MATHEMATICS III'].fillna(0)
df['DATA STRUCTURES & ALGO.'] = df['DATA STRUCTURES & ALGO.'].fillna(0)
df['SOFTWARE ENGINEERING'] = df['SOFTWARE ENGINEERING'].fillna(0)
df['MICROPROCESSOR'] = df['MICROPROCESSOR'].fillna(0)
df['PRINCIPLES OF PROG. LANG.'] = df['PRINCIPLES OF PROG. LANG.'].fillna(0)
df['DATA STRUCTURES & ALGO. LAB.'] = df['DATA STRUCTURES & ALGO. LAB.'].fillna(0)
df['MICROPROCESSOR LABORATORY'] = df['MICROPROCESSOR LABORATORY'].fillna(0)
df['CGPA'] = df['CGPA'].fillna(0)
df.isnull().sum() #after replacing null values, final dataset showing number of null values
DISCRETE MATHEMATICS 0 FUND. OF DATA STRUCTURES 0 OBJECT ORIENTED PROGRAMMING 0 COMPUTER GRAPHICS 0 DIGITAL ELEC. & LOGIC DESIGN 0 DATA STUCTURES LABORATORY 0 OOP & COMP. GRAPHICS LAB. 0 DIGITAL ELEC. LABORATORY 0 BUSINESS COMMUNICATION SKILLS 0 HUMANITY & SOCIAL SCIENCE 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS III 0 ENGINEERING MATHEMATICS III.1 0 DATA STRUCTURES & ALGO. 0 SOFTWARE ENGINEERING 0 MICROPROCESSOR 0 PRINCIPLES OF PROG. LANG. 0 DATA STRUCTURES & ALGO. LAB. 0 MICROPROCESSOR LABORATORY 0 PROJECT BASED LEARNING II 0 CODE OF CONDUCT 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
9.436123123123124
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
9.5
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
0.35803710159749813
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
7.408681681681683
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
7.636
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem4 #Standard Deviation
1.2676948274236515
df["SEM_3_INTERNAL"] = df[['DATA STUCTURES LABORATORY', 'OOP & COMP. GRAPHICS LAB.', 'DIGITAL ELEC. LABORATORY', 'BUSINESS COMMUNICATION SKILLS', 'HUMANITY & SOCIAL SCIENCE', 'AUDIT COURSE']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['ENGINEERING MATHEMATICS III.1', 'DATA STRUCTURES & ALGO. LAB.', 'MICROPROCESSOR LABORATORY', 'PROJECT BASED LEARNING II', 'CODE OF CONDUCT', 'AUDIT COURSE.1']].sum(axis = 1)
df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']
df.head(5) #Original Dataset
DISCRETE MATHEMATICS | FUND. OF DATA STRUCTURES | OBJECT ORIENTED PROGRAMMING | COMPUTER GRAPHICS | DIGITAL ELEC. & LOGIC DESIGN | DATA STUCTURES LABORATORY | OOP & COMP. GRAPHICS LAB. | DIGITAL ELEC. LABORATORY | BUSINESS COMMUNICATION SKILLS | HUMANITY & SOCIAL SCIENCE | ... | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | SEM_3_INTERNAL | SEM_4_INTERNAL | SEM_3_EXTERNAL | SEM_4_EXTERNAL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30 | 30 | 30 | 30 | 30 | 18 | 16 | 10 | 8 | 9 | ... | 211.0 | 9.591 | 124.0 | 5.636 | 39 | 0.00 | 61 | 40 | 150.0 | 84.0 |
1 | 30 | 30 | 30 | 30 | 30 | 18 | 18 | 9 | 9 | 7 | ... | 211.0 | 9.591 | 197.0 | 8.955 | 44 | 9.27 | 61 | 65 | 150.0 | 132.0 |
2 | 30 | 30 | 30 | 30 | 30 | 20 | 20 | 10 | 10 | 10 | ... | 220.0 | 10.000 | 178.0 | 8.091 | 44 | 9.05 | 70 | 67 | 150.0 | 111.0 |
3 | 30 | 30 | 30 | 30 | 30 | 16 | 16 | 9 | 9 | 8 | ... | 208.0 | 9.455 | 131.0 | 5.955 | 41 | 0.00 | 58 | 32 | 150.0 | 99.0 |
4 | 30 | 30 | 30 | 30 | 30 | 18 | 20 | 9 | 9 | 9 | ... | 215.0 | 9.773 | 176.0 | 8.000 | 44 | 8.89 | 65 | 65 | 150.0 | 111.0 |
5 rows × 32 columns
df['DISCRETE MATHEMATICS'] = (df['DISCRETE MATHEMATICS'] / 30)
df['FUND. OF DATA STRUCTURES'] = (df['FUND. OF DATA STRUCTURES'] / 30)
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
df['COMPUTER GRAPHICS'] = (df['COMPUTER GRAPHICS'] / 30)
df['DIGITAL ELEC. & LOGIC DESIGN'] = (df['DIGITAL ELEC. & LOGIC DESIGN'] / 30)
df['DATA STUCTURES LABORATORY'] = (df['DATA STUCTURES LABORATORY'] / 20)
df['OOP & COMP. GRAPHICS LAB.'] = (df['OOP & COMP. GRAPHICS LAB.'] / 20)
df['DIGITAL ELEC. LABORATORY'] = (df['DIGITAL ELEC. LABORATORY'] / 10)
df['BUSINESS COMMUNICATION SKILLS'] = (df['BUSINESS COMMUNICATION SKILLS'] / 10)
df['HUMANITY & SOCIAL SCIENCE'] = (df['HUMANITY & SOCIAL SCIENCE'] / 10)
df['ENGINEERING MATHEMATICS III'] = (df['ENGINEERING MATHEMATICS III'] / 30)
df['ENGINEERING MATHEMATICS III.1'] = (df['ENGINEERING MATHEMATICS III.1'] / 10)
df['DATA STRUCTURES & ALGO.'] = (df['DATA STRUCTURES & ALGO.'] / 30)
df['SOFTWARE ENGINEERING'] = (df['SOFTWARE ENGINEERING'] / 30)
df['MICROPROCESSOR'] = (df['MICROPROCESSOR'] / 30)
df['PRINCIPLES OF PROG. LANG.'] = (df['PRINCIPLES OF PROG. LANG.'] / 30)
df['DATA STRUCTURES & ALGO. LAB.'] = (df['DATA STRUCTURES & ALGO. LAB.'] / 20)
df['MICROPROCESSOR LABORATORY'] = (df['MICROPROCESSOR LABORATORY'] / 10)
df['PROJECT BASED LEARNING II'] = (df['PROJECT BASED LEARNING II'] / 20)
df['CODE OF CONDUCT'] = (df['CODE OF CONDUCT'] / 10)
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
df['CGPA'] = (df['CGPA'] / 10) #CGPA IS AN OBJECT
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA as Object Data type to int data type
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 70
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 150
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150
#df.head() #Normalised dataset
pd.set_option('display.max_columns', None) #Normalised dataset
df
DISCRETE MATHEMATICS | FUND. OF DATA STRUCTURES | OBJECT ORIENTED PROGRAMMING | COMPUTER GRAPHICS | DIGITAL ELEC. & LOGIC DESIGN | DATA STUCTURES LABORATORY | OOP & COMP. GRAPHICS LAB. | DIGITAL ELEC. LABORATORY | BUSINESS COMMUNICATION SKILLS | HUMANITY & SOCIAL SCIENCE | AUDIT COURSE | ENGINEERING MATHEMATICS III | ENGINEERING MATHEMATICS III.1 | DATA STRUCTURES & ALGO. | SOFTWARE ENGINEERING | MICROPROCESSOR | PRINCIPLES OF PROG. LANG. | DATA STRUCTURES & ALGO. LAB. | MICROPROCESSOR LABORATORY | PROJECT BASED LEARNING II | CODE OF CONDUCT | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | SEM_3_INTERNAL | SEM_4_INTERNAL | SEM_3_EXTERNAL | SEM_4_EXTERNAL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.8 | 1.0 | 0.8 | 0.9 | 0 | 0.8 | 0.7 | 0.7 | 0.7 | 0.0 | 0.6 | 0.0 | 0.6 | 0.9 | 0.9 | 0 | 211.0 | 0.9591 | 124.0 | 0.5636 | 0.886364 | 0.000 | 0.871429 | 0.571429 | 1.00 | 0.56 |
1 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.9 | 0.9 | 0.9 | 0.7 | 0 | 1.0 | 0.9 | 0.9 | 0.8 | 0.8 | 0.9 | 0.9 | 0.9 | 1.0 | 0.9 | 0 | 211.0 | 0.9591 | 197.0 | 0.8955 | 1.000000 | 0.927 | 0.871429 | 0.928571 | 1.00 | 0.88 |
2 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0 | 1.0 | 0.8 | 0.6 | 0.5 | 0.9 | 0.7 | 1.0 | 1.0 | 1.0 | 0.9 | 0 | 220.0 | 1.0000 | 178.0 | 0.8091 | 1.000000 | 0.905 | 1.000000 | 0.957143 | 1.00 | 0.74 |
3 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.8 | 0.8 | 0.9 | 0.9 | 0.8 | 0 | 0.7 | 0.7 | 0.6 | 0.6 | 0.7 | 0.7 | 0.0 | 0.0 | 0.9 | 0.7 | 0 | 208.0 | 0.9455 | 131.0 | 0.5955 | 0.931818 | 0.000 | 0.828571 | 0.457143 | 1.00 | 0.66 |
4 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.9 | 0.9 | 0.9 | 0 | 0.7 | 1.0 | 0.8 | 0.7 | 0.8 | 0.7 | 0.9 | 0.9 | 0.9 | 1.0 | 0 | 215.0 | 0.9773 | 176.0 | 0.8000 | 1.000000 | 0.889 | 0.928571 | 0.928571 | 1.00 | 0.74 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
328 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.9 | 0.9 | 0.9 | 0 | 0.9 | 0.7 | 0.8 | 0.8 | 0.8 | 0.7 | 0.9 | 0.9 | 1.0 | 1.0 | 0 | 215.0 | 0.9773 | 184.0 | 0.8364 | 1.000000 | 0.907 | 0.928571 | 0.914286 | 1.00 | 0.80 |
329 | 1.0 | 1.0 | 0.9 | 1.0 | 1.0 | 0.9 | 0.9 | 0.9 | 0.9 | 0.9 | 0 | 1.0 | 0.9 | 0.6 | 0.7 | 0.8 | 0.8 | 0.9 | 0.8 | 1.0 | 0.9 | 0 | 210.0 | 0.9545 | 181.0 | 0.8227 | 1.000000 | 0.889 | 0.900000 | 0.914286 | 0.98 | 0.78 |
330 | 0.9 | 1.0 | 0.8 | 0.8 | 1.0 | 0.9 | 0.8 | 0.9 | 0.9 | 0.9 | 0 | 0.0 | 0.8 | 0.0 | 0.0 | 0.0 | 0.0 | 0.7 | 0.7 | 0.9 | 1.0 | 0 | 196.0 | 0.8909 | 57.0 | 0.2591 | 0.659091 | 0.000 | 0.871429 | 0.814286 | 0.90 | 0.00 |
331 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.9 | 1.0 | 1.0 | 1.0 | 0.9 | 0 | 1.0 | 0.9 | 0.7 | 0.7 | 0.9 | 0.8 | 1.0 | 0.9 | 0.9 | 1.0 | 0 | 214.0 | 0.9727 | 189.0 | 0.8591 | 1.000000 | 0.916 | 0.957143 | 0.942857 | 0.98 | 0.82 |
332 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.9 | 0.8 | 0.9 | 0.9 | 0.9 | 0 | 0.6 | 0.7 | 0.7 | 0.7 | 0.5 | 0.7 | 0.7 | 0.8 | 0.9 | 0.8 | 0 | 208.0 | 0.9455 | 151.0 | 0.6864 | 1.000000 | 0.816 | 0.871429 | 0.785714 | 0.98 | 0.64 |
333 rows × 32 columns
df.to_csv('CE_Normalized.csv')
df_ce = df
df = pd.read_csv("ETC_SE_2021.csv") #importing entc engg. dept dataset`
df.head() #showing first 5 rows in the dataset
ELECTRONIC CIRCUITS | DIGITAL CIRCUITS | ELECTRICAL CIRCUITS | DATA STRUCTURES | ELECTRONIC CIRCUIT LAB | DIGITAL CIRCUITS LAB | ELECTRICAL CIRCUIT LAB | DATA STRUCTURES LAB | ELECTRONIC SKILL DEVELOPMENT | AUDIT COURSE | ENGINEERING MATHEMATICS III | ENGINEERING MATHEMATICS III.1 | SIGNALS & SYSTEMS | SIGNALS & SYSTEMS.1 | CONTROL SYSTEMS | PRINCIPLES OF COMMU. SYSTEMS | OBJECT ORIENTED PROGRAMMING | SIGNALS & CONTROL SYSTEM LAB | PRINCIPLE OF COMMU. SYS. LAB | OOPS LAB | DATA ANALYTICS LAB | EMPLOYABILITY SKILL DEV. | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27 | 30 | 30 | 30 | 4 | 9 | 9 | 9 | 8 | 0 | 40 | 7 | 21 | 10 | 18 | 24 | 18 | 8 | 9 | 9 | 10 | 24 | 18 | 0 | 203.0 | 9.227 | 169.0 | 7.682 | 44 | 8.45 |
1 | 27 | 30 | 30 | 30 | 9 | 10 | 9 | 10 | 9 | 0 | 40 | 7 | 24 | 9 | 21 | 24 | 18 | 8 | 10 | 9 | 9 | 21 | 18 | 0 | 211.0 | 9.591 | 171.0 | 7.773 | 44 | 8.68 |
2 | 27 | 30 | 30 | 30 | 6 | 10 | 9 | 7 | 9 | 0 | 40 | 9 | 18 | 9 | 12 | 21 | 12 | 8 | 9 | 9 | 9 | 24 | 16 | 0 | 207.0 | 9.409 | 147.0 | 6.682 | 44 | 8.05 |
3 | 27 | 30 | 30 | 30 | 4 | 7 | 6 | 8 | 8 | 0 | 40 | 8 | 24 | 9 | 27 | 21 | 18 | 8 | 8 | 8 | 9 | 27 | 14 | 0 | 198.0 | 9.000 | 173.0 | 7.864 | 44 | 8.43 |
4 | 30 | 30 | 30 | 30 | 6 | 8 | 10 | 9 | 9 | 0 | 40 | 10 | 21 | 10 | 21 | 24 | 18 | 9 | 8 | 9 | 8 | 27 | 18 | 0 | 212.0 | 9.636 | 173.0 | 7.864 | 44 | 8.75 |
df.tail() #showing last 5 rows in the dataset
ELECTRONIC CIRCUITS | DIGITAL CIRCUITS | ELECTRICAL CIRCUITS | DATA STRUCTURES | ELECTRONIC CIRCUIT LAB | DIGITAL CIRCUITS LAB | ELECTRICAL CIRCUIT LAB | DATA STRUCTURES LAB | ELECTRONIC SKILL DEVELOPMENT | AUDIT COURSE | ENGINEERING MATHEMATICS III | ENGINEERING MATHEMATICS III.1 | SIGNALS & SYSTEMS | SIGNALS & SYSTEMS.1 | CONTROL SYSTEMS | PRINCIPLES OF COMMU. SYSTEMS | OBJECT ORIENTED PROGRAMMING | SIGNALS & CONTROL SYSTEM LAB | PRINCIPLE OF COMMU. SYS. LAB | OOPS LAB | DATA ANALYTICS LAB | EMPLOYABILITY SKILL DEV. | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
304 | 27 | 30 | 30 | 30 | 5 | 10 | 9 | 7 | 9 | 0 | 40 | 9 | 12 | 9 | 12 | 12 | 18 | 8 | 8 | 6 | 8 | 27 | 14 | 0 | 206.0 | 9.364 | 134.0 | 6.091 | 44 | 7.73 |
305 | 24 | 30 | 30 | 30 | 9 | 8 | 9 | 7 | 9 | 0 | 28 | 9 | 21 | 9 | 15 | 18 | 15 | 8 | 8 | 8 | 8 | 24 | 16 | 0 | 193.0 | 8.773 | 150.0 | 6.818 | 44 | 7.80 |
306 | 24 | 30 | 30 | 30 | 7 | 9 | 10 | 8 | 9 | 0 | 40 | 9 | 24 | 10 | 27 | 24 | 18 | 9 | 9 | 9 | 7 | 27 | 16 | 0 | 206.0 | 9.364 | 180.0 | 8.182 | 44 | 8.77 |
307 | 27 | 30 | 30 | 30 | 4 | 9 | 10 | 9 | 9 | 0 | 40 | 9 | 24 | 9 | 24 | 21 | 18 | 8 | 6 | 9 | 9 | 24 | 18 | 0 | 207.0 | 9.409 | 170.0 | 7.727 | 44 | 8.57 |
308 | 21 | 30 | 30 | 27 | 4 | 9 | 9 | 4 | 9 | 0 | 40 | 9 | 12 | 8 | 12 | 12 | 12 | 7 | 8 | 8 | 7 | 21 | 14 | 0 | 192.0 | 8.727 | 121.0 | 5.500 | 44 | 7.11 |
df.shape #printing number of rows and columns
(309, 30)
df.isnull().sum() #printing the number of null values in each column
ELECTRONIC CIRCUITS 0 DIGITAL CIRCUITS 0 ELECTRICAL CIRCUITS 0 DATA STRUCTURES 0 ELECTRONIC CIRCUIT LAB 0 DIGITAL CIRCUITS LAB 0 ELECTRICAL CIRCUIT LAB 0 DATA STRUCTURES LAB 0 ELECTRONIC SKILL DEVELOPMENT 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS III 0 ENGINEERING MATHEMATICS III.1 0 SIGNALS & SYSTEMS 0 SIGNALS & SYSTEMS.1 0 CONTROL SYSTEMS 0 PRINCIPLES OF COMMU. SYSTEMS 0 OBJECT ORIENTED PROGRAMMING 0 SIGNALS & CONTROL SYSTEM LAB 0 PRINCIPLE OF COMMU. SYS. LAB 0 OOPS LAB 0 DATA ANALYTICS LAB 0 EMPLOYABILITY SKILL DEV. 0 PROJECT BASED LEARNING 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values: 0
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'> RangeIndex: 309 entries, 0 to 308 Data columns (total 30 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 ELECTRONIC CIRCUITS 309 non-null int64 1 DIGITAL CIRCUITS 309 non-null int64 2 ELECTRICAL CIRCUITS 309 non-null int64 3 DATA STRUCTURES 309 non-null int64 4 ELECTRONIC CIRCUIT LAB 309 non-null int64 5 DIGITAL CIRCUITS LAB 309 non-null int64 6 ELECTRICAL CIRCUIT LAB 309 non-null int64 7 DATA STRUCTURES LAB 309 non-null int64 8 ELECTRONIC SKILL DEVELOPMENT 309 non-null int64 9 AUDIT COURSE 309 non-null int64 10 ENGINEERING MATHEMATICS III 309 non-null int64 11 ENGINEERING MATHEMATICS III.1 309 non-null int64 12 SIGNALS & SYSTEMS 309 non-null int64 13 SIGNALS & SYSTEMS.1 309 non-null int64 14 CONTROL SYSTEMS 309 non-null int64 15 PRINCIPLES OF COMMU. SYSTEMS 309 non-null int64 16 OBJECT ORIENTED PROGRAMMING 309 non-null int64 17 SIGNALS & CONTROL SYSTEM LAB 309 non-null int64 18 PRINCIPLE OF COMMU. SYS. LAB 309 non-null int64 19 OOPS LAB 309 non-null int64 20 DATA ANALYTICS LAB 309 non-null int64 21 EMPLOYABILITY SKILL DEV. 309 non-null int64 22 PROJECT BASED LEARNING 309 non-null int64 23 AUDIT COURSE.1 309 non-null int64 24 SEM_3_GPA 309 non-null float64 25 SEM_3_SGPA 309 non-null float64 26 SEM_4_GPA 309 non-null float64 27 SEM_4_SGPA 309 non-null float64 28 TOTAL_CREDITS_EARNED 309 non-null int64 29 CGPA 309 non-null float64 dtypes: float64(5), int64(25) memory usage: 72.5 KB
df.dtypes #printing the data types of each column
ELECTRONIC CIRCUITS int64 DIGITAL CIRCUITS int64 ELECTRICAL CIRCUITS int64 DATA STRUCTURES int64 ELECTRONIC CIRCUIT LAB int64 DIGITAL CIRCUITS LAB int64 ELECTRICAL CIRCUIT LAB int64 DATA STRUCTURES LAB int64 ELECTRONIC SKILL DEVELOPMENT int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS III int64 ENGINEERING MATHEMATICS III.1 int64 SIGNALS & SYSTEMS int64 SIGNALS & SYSTEMS.1 int64 CONTROL SYSTEMS int64 PRINCIPLES OF COMMU. SYSTEMS int64 OBJECT ORIENTED PROGRAMMING int64 SIGNALS & CONTROL SYSTEM LAB int64 PRINCIPLE OF COMMU. SYS. LAB int64 OOPS LAB int64 DATA ANALYTICS LAB int64 EMPLOYABILITY SKILL DEV. int64 PROJECT BASED LEARNING int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
df.dtypes #printing the data types of each column
ELECTRONIC CIRCUITS int64 DIGITAL CIRCUITS int64 ELECTRICAL CIRCUITS int64 DATA STRUCTURES int64 ELECTRONIC CIRCUIT LAB int64 DIGITAL CIRCUITS LAB int64 ELECTRICAL CIRCUIT LAB int64 DATA STRUCTURES LAB int64 ELECTRONIC SKILL DEVELOPMENT int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS III int64 ENGINEERING MATHEMATICS III.1 int64 SIGNALS & SYSTEMS int64 SIGNALS & SYSTEMS.1 int64 CONTROL SYSTEMS int64 PRINCIPLES OF COMMU. SYSTEMS int64 OBJECT ORIENTED PROGRAMMING int64 SIGNALS & CONTROL SYSTEM LAB int64 PRINCIPLE OF COMMU. SYS. LAB int64 OOPS LAB int64 DATA ANALYTICS LAB int64 EMPLOYABILITY SKILL DEV. int64 PROJECT BASED LEARNING int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
SEM_3_SGPA | SEM_4_SGPA | CGPA | |
---|---|---|---|
0 | 9.227 | 7.682 | 8.45 |
1 | 9.591 | 7.773 | 8.68 |
2 | 9.409 | 6.682 | 8.05 |
3 | 9.000 | 7.864 | 8.43 |
4 | 9.636 | 7.864 | 8.75 |
... | ... | ... | ... |
304 | 9.364 | 6.091 | 7.73 |
305 | 8.773 | 6.818 | 7.80 |
306 | 9.364 | 8.182 | 8.77 |
307 | 9.409 | 7.727 | 8.57 |
308 | 8.727 | 5.500 | 7.11 |
309 rows × 3 columns
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0) #replacing null values by 0
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
df['ENGINEERING MATHEMATICS III'] = df['ENGINEERING MATHEMATICS III'].fillna(0)
df['DATA STRUCTURES LAB'] = df['DATA STRUCTURES LAB'].fillna(0)
df['ELECTRONIC CIRCUIT LAB'] = df['ELECTRONIC CIRCUIT LAB'].fillna(0)
df['ELECTRICAL CIRCUIT LAB'] = df['ELECTRICAL CIRCUIT LAB'].fillna(0)
df['OOPS LAB'] = df['OOPS LAB'].fillna(0)
df['DATA ANALYTICS LAB'] = df['DATA ANALYTICS LAB'].fillna(0)
df['SIGNALS & SYSTEMS'] = df['SIGNALS & SYSTEMS'].fillna(0)
df['CONTROL SYSTEMS'] = df['CONTROL SYSTEMS'].fillna(0)
df['SIGNALS & SYSTEMS'] = df['SIGNALS & SYSTEMS'].fillna(0)
df['PRINCIPLES OF COMMU. SYSTEMS'] = df['PRINCIPLES OF COMMU. SYSTEMS'].fillna(0)
df['OBJECT ORIENTED PROGRAMMING'] = df['OBJECT ORIENTED PROGRAMMING'].fillna(0)
df['CGPA'] = df['CGPA'].fillna(0)
df.isnull().sum() #after replacing null values, final dataset showing number of null values
ELECTRONIC CIRCUITS 0 DIGITAL CIRCUITS 0 ELECTRICAL CIRCUITS 0 DATA STRUCTURES 0 ELECTRONIC CIRCUIT LAB 0 DIGITAL CIRCUITS LAB 0 ELECTRICAL CIRCUIT LAB 0 DATA STRUCTURES LAB 0 ELECTRONIC SKILL DEVELOPMENT 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS III 0 ENGINEERING MATHEMATICS III.1 0 SIGNALS & SYSTEMS 0 SIGNALS & SYSTEMS.1 0 CONTROL SYSTEMS 0 PRINCIPLES OF COMMU. SYSTEMS 0 OBJECT ORIENTED PROGRAMMING 0 SIGNALS & CONTROL SYSTEM LAB 0 PRINCIPLE OF COMMU. SYS. LAB 0 OOPS LAB 0 DATA ANALYTICS LAB 0 EMPLOYABILITY SKILL DEV. 0 PROJECT BASED LEARNING 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
9.310245954692556
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
9.409
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
0.49598066084602294
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
6.91982200647249
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
7.227
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem4 #Standard Deviation
1.320573892868979
df["SEM_3_INTERNAL"] = df[['ENGINEERING MATHEMATICS III.1', 'ELECTRONIC CIRCUIT LAB', 'DIGITAL CIRCUITS LAB', 'ELECTRICAL CIRCUIT LAB', 'DATA STRUCTURES LAB', 'ELECTRONIC SKILL DEVELOPMENT', 'AUDIT COURSE']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['SIGNALS & SYSTEMS.1', 'SIGNALS & CONTROL SYSTEM LAB', 'PRINCIPLE OF COMMU. SYS. LAB', 'OOPS LAB', 'DATA ANALYTICS LAB', 'PROJECT BASED LEARNING', 'AUDIT COURSE.1']].sum(axis = 1)
df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']
df.head()
ELECTRONIC CIRCUITS | DIGITAL CIRCUITS | ELECTRICAL CIRCUITS | DATA STRUCTURES | ELECTRONIC CIRCUIT LAB | DIGITAL CIRCUITS LAB | ELECTRICAL CIRCUIT LAB | DATA STRUCTURES LAB | ELECTRONIC SKILL DEVELOPMENT | AUDIT COURSE | ENGINEERING MATHEMATICS III | ENGINEERING MATHEMATICS III.1 | SIGNALS & SYSTEMS | SIGNALS & SYSTEMS.1 | CONTROL SYSTEMS | PRINCIPLES OF COMMU. SYSTEMS | OBJECT ORIENTED PROGRAMMING | SIGNALS & CONTROL SYSTEM LAB | PRINCIPLE OF COMMU. SYS. LAB | OOPS LAB | DATA ANALYTICS LAB | EMPLOYABILITY SKILL DEV. | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | SEM_3_INTERNAL | SEM_4_INTERNAL | SEM_3_EXTERNAL | SEM_4_EXTERNAL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 27 | 30 | 30 | 30 | 4 | 9 | 9 | 9 | 8 | 0 | 40 | 7 | 21 | 10 | 18 | 24 | 18 | 8 | 9 | 9 | 10 | 24 | 18 | 0 | 203.0 | 9.227 | 169.0 | 7.682 | 44 | 8.45 | 46 | 64 | 157.0 | 105.0 |
1 | 27 | 30 | 30 | 30 | 9 | 10 | 9 | 10 | 9 | 0 | 40 | 7 | 24 | 9 | 21 | 24 | 18 | 8 | 10 | 9 | 9 | 21 | 18 | 0 | 211.0 | 9.591 | 171.0 | 7.773 | 44 | 8.68 | 54 | 63 | 157.0 | 108.0 |
2 | 27 | 30 | 30 | 30 | 6 | 10 | 9 | 7 | 9 | 0 | 40 | 9 | 18 | 9 | 12 | 21 | 12 | 8 | 9 | 9 | 9 | 24 | 16 | 0 | 207.0 | 9.409 | 147.0 | 6.682 | 44 | 8.05 | 50 | 60 | 157.0 | 87.0 |
3 | 27 | 30 | 30 | 30 | 4 | 7 | 6 | 8 | 8 | 0 | 40 | 8 | 24 | 9 | 27 | 21 | 18 | 8 | 8 | 8 | 9 | 27 | 14 | 0 | 198.0 | 9.000 | 173.0 | 7.864 | 44 | 8.43 | 41 | 56 | 157.0 | 117.0 |
4 | 30 | 30 | 30 | 30 | 6 | 8 | 10 | 9 | 9 | 0 | 40 | 10 | 21 | 10 | 21 | 24 | 18 | 9 | 8 | 9 | 8 | 27 | 18 | 0 | 212.0 | 9.636 | 173.0 | 7.864 | 44 | 8.75 | 52 | 62 | 160.0 | 111.0 |
df['ELECTRONIC CIRCUITS'] = (df['ELECTRONIC CIRCUITS'] / 30)
df['DIGITAL CIRCUITS'] = (df['DIGITAL CIRCUITS'] / 30)
df['ELECTRICAL CIRCUITS'] = (df['ELECTRICAL CIRCUITS'] / 30)
df['DATA STRUCTURES'] = (df['DATA STRUCTURES'] / 30)
df['ENGINEERING MATHEMATICS III'] = (df['ENGINEERING MATHEMATICS III'] / 40)
df['ELECTRONIC CIRCUIT LAB'] = (df['ELECTRONIC CIRCUIT LAB'] / 10)
df['DIGITAL CIRCUITS LAB'] = (df['DIGITAL CIRCUITS LAB'] / 10)
df['ELECTRICAL CIRCUIT LAB'] = (df['ELECTRICAL CIRCUIT LAB'] / 10)
df['DATA STRUCTURES LAB'] = (df['DATA STRUCTURES LAB'] / 10)
df['ELECTRONIC SKILL DEVELOPMENT'] = (df['ELECTRONIC SKILL DEVELOPMENT'] / 10)
df['ENGINEERING MATHEMATICS III.1'] = (df['ENGINEERING MATHEMATICS III.1'] / 10)
df['SIGNALS & SYSTEMS'] = (df['SIGNALS & SYSTEMS'] / 30)
df['SIGNALS & SYSTEMS.1'] = (df['SIGNALS & SYSTEMS.1'] / 10)
df['CONTROL SYSTEMS'] = (df['CONTROL SYSTEMS'] / 30)
df['PRINCIPLES OF COMMU. SYSTEMS'] = (df['PRINCIPLES OF COMMU. SYSTEMS'] / 30)
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
df['EMPLOYABILITY SKILL DEV.'] = (df['EMPLOYABILITY SKILL DEV.'] / 30)
df['SIGNALS & CONTROL SYSTEM LAB'] = (df['SIGNALS & CONTROL SYSTEM LAB'] / 10)
df['PRINCIPLE OF COMMU. SYS. LAB'] = (df['PRINCIPLE OF COMMU. SYS. LAB'] / 10)
df['OOPS LAB'] = (df['OOPS LAB'] / 10)
df['DATA ANALYTICS LAB'] = (df['DATA ANALYTICS LAB'] / 10)
df['PROJECT BASED LEARNING'] = (df['PROJECT BASED LEARNING'] / 20)
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
df['CGPA'] = (df['CGPA'] / 10)
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 60
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 160
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150
pd.set_option('display.max_columns', None) #Normalised dataset
df
ELECTRONIC CIRCUITS | DIGITAL CIRCUITS | ELECTRICAL CIRCUITS | DATA STRUCTURES | ELECTRONIC CIRCUIT LAB | DIGITAL CIRCUITS LAB | ELECTRICAL CIRCUIT LAB | DATA STRUCTURES LAB | ELECTRONIC SKILL DEVELOPMENT | AUDIT COURSE | ENGINEERING MATHEMATICS III | ENGINEERING MATHEMATICS III.1 | SIGNALS & SYSTEMS | SIGNALS & SYSTEMS.1 | CONTROL SYSTEMS | PRINCIPLES OF COMMU. SYSTEMS | OBJECT ORIENTED PROGRAMMING | SIGNALS & CONTROL SYSTEM LAB | PRINCIPLE OF COMMU. SYS. LAB | OOPS LAB | DATA ANALYTICS LAB | EMPLOYABILITY SKILL DEV. | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | SEM_3_INTERNAL | SEM_4_INTERNAL | SEM_3_EXTERNAL | SEM_4_EXTERNAL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 0.9 | 1.0 | 1.0 | 1.0 | 0.4 | 0.9 | 0.9 | 0.9 | 0.8 | 0 | 1.0 | 0.7 | 0.7 | 1.0 | 0.6 | 0.8 | 0.6 | 0.8 | 0.9 | 0.9 | 1.0 | 0.8 | 0.9 | 0 | 203.0 | 0.9227 | 169.0 | 0.7682 | 1.0 | 0.845 | 0.766667 | 0.914286 | 0.98125 | 0.70 |
1 | 0.9 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.9 | 1.0 | 0.9 | 0 | 1.0 | 0.7 | 0.8 | 0.9 | 0.7 | 0.8 | 0.6 | 0.8 | 1.0 | 0.9 | 0.9 | 0.7 | 0.9 | 0 | 211.0 | 0.9591 | 171.0 | 0.7773 | 1.0 | 0.868 | 0.900000 | 0.900000 | 0.98125 | 0.72 |
2 | 0.9 | 1.0 | 1.0 | 1.0 | 0.6 | 1.0 | 0.9 | 0.7 | 0.9 | 0 | 1.0 | 0.9 | 0.6 | 0.9 | 0.4 | 0.7 | 0.4 | 0.8 | 0.9 | 0.9 | 0.9 | 0.8 | 0.8 | 0 | 207.0 | 0.9409 | 147.0 | 0.6682 | 1.0 | 0.805 | 0.833333 | 0.857143 | 0.98125 | 0.58 |
3 | 0.9 | 1.0 | 1.0 | 1.0 | 0.4 | 0.7 | 0.6 | 0.8 | 0.8 | 0 | 1.0 | 0.8 | 0.8 | 0.9 | 0.9 | 0.7 | 0.6 | 0.8 | 0.8 | 0.8 | 0.9 | 0.9 | 0.7 | 0 | 198.0 | 0.9000 | 173.0 | 0.7864 | 1.0 | 0.843 | 0.683333 | 0.800000 | 0.98125 | 0.78 |
4 | 1.0 | 1.0 | 1.0 | 1.0 | 0.6 | 0.8 | 1.0 | 0.9 | 0.9 | 0 | 1.0 | 1.0 | 0.7 | 1.0 | 0.7 | 0.8 | 0.6 | 0.9 | 0.8 | 0.9 | 0.8 | 0.9 | 0.9 | 0 | 212.0 | 0.9636 | 173.0 | 0.7864 | 1.0 | 0.875 | 0.866667 | 0.885714 | 1.00000 | 0.74 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
304 | 0.9 | 1.0 | 1.0 | 1.0 | 0.5 | 1.0 | 0.9 | 0.7 | 0.9 | 0 | 1.0 | 0.9 | 0.4 | 0.9 | 0.4 | 0.4 | 0.6 | 0.8 | 0.8 | 0.6 | 0.8 | 0.9 | 0.7 | 0 | 206.0 | 0.9364 | 134.0 | 0.6091 | 1.0 | 0.773 | 0.816667 | 0.757143 | 0.98125 | 0.54 |
305 | 0.8 | 1.0 | 1.0 | 1.0 | 0.9 | 0.8 | 0.9 | 0.7 | 0.9 | 0 | 0.7 | 0.9 | 0.7 | 0.9 | 0.5 | 0.6 | 0.5 | 0.8 | 0.8 | 0.8 | 0.8 | 0.8 | 0.8 | 0 | 193.0 | 0.8773 | 150.0 | 0.6818 | 1.0 | 0.780 | 0.850000 | 0.814286 | 0.88750 | 0.62 |
306 | 0.8 | 1.0 | 1.0 | 1.0 | 0.7 | 0.9 | 1.0 | 0.8 | 0.9 | 0 | 1.0 | 0.9 | 0.8 | 1.0 | 0.9 | 0.8 | 0.6 | 0.9 | 0.9 | 0.9 | 0.7 | 0.9 | 0.8 | 0 | 206.0 | 0.9364 | 180.0 | 0.8182 | 1.0 | 0.877 | 0.866667 | 0.857143 | 0.96250 | 0.80 |
307 | 0.9 | 1.0 | 1.0 | 1.0 | 0.4 | 0.9 | 1.0 | 0.9 | 0.9 | 0 | 1.0 | 0.9 | 0.8 | 0.9 | 0.8 | 0.7 | 0.6 | 0.8 | 0.6 | 0.9 | 0.9 | 0.8 | 0.9 | 0 | 207.0 | 0.9409 | 170.0 | 0.7727 | 1.0 | 0.857 | 0.833333 | 0.842857 | 0.98125 | 0.74 |
308 | 0.7 | 1.0 | 1.0 | 0.9 | 0.4 | 0.9 | 0.9 | 0.4 | 0.9 | 0 | 1.0 | 0.9 | 0.4 | 0.8 | 0.4 | 0.4 | 0.4 | 0.7 | 0.8 | 0.8 | 0.7 | 0.7 | 0.7 | 0 | 192.0 | 0.8727 | 121.0 | 0.5500 | 1.0 | 0.711 | 0.733333 | 0.742857 | 0.92500 | 0.46 |
309 rows × 34 columns
df.to_csv('ETC_Normalized.csv')
df_etc = df
df = pd.read_csv("IT_SE_2021.csv") #importing it dept dataset
df.head() #showing first 5 rows in the dataset
DISCRETE MATHEMATICS | DISCRETE MATHEMATICS.1 | LOGIC DESIGN & COMP. ORG. | DATA STRUCTURES & ALGO. | OBJECT ORIENTED PROGRAMMING | BASIC OF COMPUTER NETWORK | LOGIC DESIGN COMP. ORG. LAB | DATA STRUCTURES & ALGO. LAB | OBJECT ORIENTED PROG. LAB | SOFT SKILL LAB | AUDIT COURSE | ENGINEERING MATHEMATICS-III | ENGINEERING MATHEMATICS-III.1 | PROCESSOR ARCHITECTURE | DATABASE MANAGEMENT SYSTEM | COMPUTER GRAPHICS | SOFTWARE ENGINEERING | PROG. SKILL DEVELOPMENT LAB | DATABASE MGMT. SYSTEM LAB | COMPUTER GRAPHICS LAB | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 30 | 10 | 30 | 30 | 30 | 30 | 10 | 20 | 18 | 9 | 0 | 21 | 10 | 21 | 21 | 24 | 21 | 7 | 18 | 9 | 18 | 0 | 217.0 | 9.864 | 170.0 | 7.727 | 44 | 8.80 |
1 | 24 | 8 | 27 | 27 | 30 | 27 | 7 | 14 | 14 | 8 | 0 | 24 | 10 | 24 | 24 | 24 | 21 | 6 | 18 | 7 | 18 | 0 | 186.0 | 8.455 | 176.0 | 8.000 | 44 | 8.23 |
2 | 30 | 10 | 30 | 30 | 30 | 30 | 9 | 18 | 18 | 9 | 0 | 24 | 10 | 0 | 0 | 21 | 12 | 7 | 16 | 7 | 18 | 0 | 214.0 | 9.727 | 115.0 | 5.227 | 38 | 0.00 |
3 | 30 | 9 | 27 | 30 | 30 | 30 | 10 | 20 | 18 | 9 | 0 | 30 | 10 | 18 | 21 | 27 | 21 | 10 | 18 | 9 | 20 | 0 | 213.0 | 9.682 | 184.0 | 8.364 | 44 | 9.02 |
4 | 27 | 10 | 30 | 27 | 30 | 30 | 9 | 18 | 16 | 9 | 0 | 21 | 10 | 18 | 12 | 21 | 15 | 7 | 18 | 5 | 18 | 0 | 206.0 | 9.364 | 145.0 | 6.591 | 44 | 7.98 |
df.tail() #showing last 5 rows in the dataset
DISCRETE MATHEMATICS | DISCRETE MATHEMATICS.1 | LOGIC DESIGN & COMP. ORG. | DATA STRUCTURES & ALGO. | OBJECT ORIENTED PROGRAMMING | BASIC OF COMPUTER NETWORK | LOGIC DESIGN COMP. ORG. LAB | DATA STRUCTURES & ALGO. LAB | OBJECT ORIENTED PROG. LAB | SOFT SKILL LAB | AUDIT COURSE | ENGINEERING MATHEMATICS-III | ENGINEERING MATHEMATICS-III.1 | PROCESSOR ARCHITECTURE | DATABASE MANAGEMENT SYSTEM | COMPUTER GRAPHICS | SOFTWARE ENGINEERING | PROG. SKILL DEVELOPMENT LAB | DATABASE MGMT. SYSTEM LAB | COMPUTER GRAPHICS LAB | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
241 | 30 | 10 | 30 | 30 | 30 | 30 | 9 | 18 | 18 | 9 | 0 | 24 | 10 | 18 | 24 | 21 | 18 | 8 | 18 | 9 | 18 | 0 | 214.0 | 9.727 | 168.0 | 7.636 | 44 | 8.68 |
242 | 30 | 10 | 30 | 30 | 30 | 30 | 9 | 20 | 20 | 10 | 0 | 15 | 10 | 21 | 21 | 21 | 15 | 8 | 20 | 9 | 20 | 0 | 219.0 | 9.955 | 160.0 | 7.273 | 44 | 8.61 |
243 | 30 | 10 | 30 | 30 | 30 | 30 | 10 | 20 | 20 | 9 | 0 | 30 | 10 | 21 | 18 | 24 | 21 | 10 | 20 | 10 | 20 | 0 | 219.0 | 9.955 | 184.0 | 8.364 | 44 | 9.16 |
244 | 30 | 10 | 30 | 30 | 30 | 30 | 9 | 20 | 16 | 9 | 0 | 24 | 10 | 18 | 24 | 24 | 15 | 7 | 18 | 9 | 18 | 0 | 214.0 | 9.727 | 167.0 | 7.591 | 44 | 8.66 |
245 | 30 | 10 | 27 | 30 | 30 | 30 | 10 | 18 | 16 | 9 | 0 | 24 | 10 | 21 | 18 | 21 | 15 | 7 | 18 | 8 | 18 | 0 | 210.0 | 9.545 | 160.0 | 7.273 | 44 | 8.41 |
df.shape #printing number of rows and columns
(246, 28)
df.isnull().sum() #printing the number of null values in each column
DISCRETE MATHEMATICS 0 DISCRETE MATHEMATICS.1 0 LOGIC DESIGN & COMP. ORG. 0 DATA STRUCTURES & ALGO. 0 OBJECT ORIENTED PROGRAMMING 0 BASIC OF COMPUTER NETWORK 0 LOGIC DESIGN COMP. ORG. LAB 0 DATA STRUCTURES & ALGO. LAB 0 OBJECT ORIENTED PROG. LAB 0 SOFT SKILL LAB 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS-III 0 ENGINEERING MATHEMATICS-III.1 0 PROCESSOR ARCHITECTURE 0 DATABASE MANAGEMENT SYSTEM 0 COMPUTER GRAPHICS 0 SOFTWARE ENGINEERING 0 PROG. SKILL DEVELOPMENT LAB 0 DATABASE MGMT. SYSTEM LAB 0 COMPUTER GRAPHICS LAB 0 PROJECT BASED LEARNING 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values: 0
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'> RangeIndex: 246 entries, 0 to 245 Data columns (total 28 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 DISCRETE MATHEMATICS 246 non-null int64 1 DISCRETE MATHEMATICS.1 246 non-null int64 2 LOGIC DESIGN & COMP. ORG. 246 non-null int64 3 DATA STRUCTURES & ALGO. 246 non-null int64 4 OBJECT ORIENTED PROGRAMMING 246 non-null int64 5 BASIC OF COMPUTER NETWORK 246 non-null int64 6 LOGIC DESIGN COMP. ORG. LAB 246 non-null int64 7 DATA STRUCTURES & ALGO. LAB 246 non-null int64 8 OBJECT ORIENTED PROG. LAB 246 non-null int64 9 SOFT SKILL LAB 246 non-null int64 10 AUDIT COURSE 246 non-null int64 11 ENGINEERING MATHEMATICS-III 246 non-null int64 12 ENGINEERING MATHEMATICS-III.1 246 non-null int64 13 PROCESSOR ARCHITECTURE 246 non-null int64 14 DATABASE MANAGEMENT SYSTEM 246 non-null int64 15 COMPUTER GRAPHICS 246 non-null int64 16 SOFTWARE ENGINEERING 246 non-null int64 17 PROG. SKILL DEVELOPMENT LAB 246 non-null int64 18 DATABASE MGMT. SYSTEM LAB 246 non-null int64 19 COMPUTER GRAPHICS LAB 246 non-null int64 20 PROJECT BASED LEARNING 246 non-null int64 21 AUDIT COURSE.1 246 non-null int64 22 SEM_3_GPA 246 non-null float64 23 SEM_3_SGPA 246 non-null float64 24 SEM_4_GPA 246 non-null float64 25 SEM_4_SGPA 246 non-null float64 26 TOTAL_CREDITS_EARNED 246 non-null int64 27 CGPA 246 non-null float64 dtypes: float64(5), int64(23) memory usage: 53.9 KB
df.dtypes #printing the data types of each column
DISCRETE MATHEMATICS int64 DISCRETE MATHEMATICS.1 int64 LOGIC DESIGN & COMP. ORG. int64 DATA STRUCTURES & ALGO. int64 OBJECT ORIENTED PROGRAMMING int64 BASIC OF COMPUTER NETWORK int64 LOGIC DESIGN COMP. ORG. LAB int64 DATA STRUCTURES & ALGO. LAB int64 OBJECT ORIENTED PROG. LAB int64 SOFT SKILL LAB int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS-III int64 ENGINEERING MATHEMATICS-III.1 int64 PROCESSOR ARCHITECTURE int64 DATABASE MANAGEMENT SYSTEM int64 COMPUTER GRAPHICS int64 SOFTWARE ENGINEERING int64 PROG. SKILL DEVELOPMENT LAB int64 DATABASE MGMT. SYSTEM LAB int64 COMPUTER GRAPHICS LAB int64 PROJECT BASED LEARNING int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
df.dtypes #printing the data types of each column
DISCRETE MATHEMATICS int64 DISCRETE MATHEMATICS.1 int64 LOGIC DESIGN & COMP. ORG. int64 DATA STRUCTURES & ALGO. int64 OBJECT ORIENTED PROGRAMMING int64 BASIC OF COMPUTER NETWORK int64 LOGIC DESIGN COMP. ORG. LAB int64 DATA STRUCTURES & ALGO. LAB int64 OBJECT ORIENTED PROG. LAB int64 SOFT SKILL LAB int64 AUDIT COURSE int64 ENGINEERING MATHEMATICS-III int64 ENGINEERING MATHEMATICS-III.1 int64 PROCESSOR ARCHITECTURE int64 DATABASE MANAGEMENT SYSTEM int64 COMPUTER GRAPHICS int64 SOFTWARE ENGINEERING int64 PROG. SKILL DEVELOPMENT LAB int64 DATABASE MGMT. SYSTEM LAB int64 COMPUTER GRAPHICS LAB int64 PROJECT BASED LEARNING int64 AUDIT COURSE.1 int64 SEM_3_GPA float64 SEM_3_SGPA float64 SEM_4_GPA float64 SEM_4_SGPA float64 TOTAL_CREDITS_EARNED int64 CGPA float64 dtype: object
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
SEM_3_SGPA | SEM_4_SGPA | CGPA | |
---|---|---|---|
0 | 9.864 | 7.727 | 8.80 |
1 | 8.455 | 8.000 | 8.23 |
2 | 9.727 | 5.227 | 0.00 |
3 | 9.682 | 8.364 | 9.02 |
4 | 9.364 | 6.591 | 7.98 |
... | ... | ... | ... |
241 | 9.727 | 7.636 | 8.68 |
242 | 9.955 | 7.273 | 8.61 |
243 | 9.955 | 8.364 | 9.16 |
244 | 9.727 | 7.591 | 8.66 |
245 | 9.545 | 7.273 | 8.41 |
246 rows × 3 columns
df['LOGIC DESIGN & COMP. ORG.'] = df['LOGIC DESIGN & COMP. ORG.'].fillna(0) #replacing null values by 0
df['LOGIC DESIGN COMP. ORG. LAB'] = df['LOGIC DESIGN COMP. ORG. LAB'].fillna(0)
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0)
df['ENGINEERING MATHEMATICS-III'] = df['ENGINEERING MATHEMATICS-III'].fillna(0)
df['PROCESSOR ARCHITECTURE'] = df['PROCESSOR ARCHITECTURE'].fillna(0)
df['DATABASE MANAGEMENT SYSTEM'] = df['DATABASE MANAGEMENT SYSTEM'].fillna(0)
df['COMPUTER GRAPHICS'] = df['COMPUTER GRAPHICS'].fillna(0)
df['SOFTWARE ENGINEERING'] = df['SOFTWARE ENGINEERING'].fillna(0)
df['PROG. SKILL DEVELOPMENT LAB'] = df['PROG. SKILL DEVELOPMENT LAB'].fillna(0)
df['DATABASE MGMT. SYSTEM LAB'] = df['DATABASE MGMT. SYSTEM LAB'].fillna(0)
df['COMPUTER GRAPHICS LAB'] = df['COMPUTER GRAPHICS LAB'].fillna(0)
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
df['CGPA'] = df['CGPA'].fillna(0)
df.isnull().sum() #after replacing null values, final dataset showing number of null values
DISCRETE MATHEMATICS 0 DISCRETE MATHEMATICS.1 0 LOGIC DESIGN & COMP. ORG. 0 DATA STRUCTURES & ALGO. 0 OBJECT ORIENTED PROGRAMMING 0 BASIC OF COMPUTER NETWORK 0 LOGIC DESIGN COMP. ORG. LAB 0 DATA STRUCTURES & ALGO. LAB 0 OBJECT ORIENTED PROG. LAB 0 SOFT SKILL LAB 0 AUDIT COURSE 0 ENGINEERING MATHEMATICS-III 0 ENGINEERING MATHEMATICS-III.1 0 PROCESSOR ARCHITECTURE 0 DATABASE MANAGEMENT SYSTEM 0 COMPUTER GRAPHICS 0 SOFTWARE ENGINEERING 0 PROG. SKILL DEVELOPMENT LAB 0 DATABASE MGMT. SYSTEM LAB 0 COMPUTER GRAPHICS LAB 0 PROJECT BASED LEARNING 0 AUDIT COURSE.1 0 SEM_3_GPA 0 SEM_3_SGPA 0 SEM_4_GPA 0 SEM_4_SGPA 0 TOTAL_CREDITS_EARNED 0 CGPA 0 dtype: int64
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
9.439211382113822
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
9.545
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
0.4374280629283696
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
7.00459349593496
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
7.273
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 4 SGPA
std_sgpa_sem4 #Standard Deviation
1.3395733334583166
df["SEM_3_INTERNAL"] = df[['DISCRETE MATHEMATICS.1','LOGIC DESIGN COMP. ORG. LAB','DATA STRUCTURES & ALGO. LAB','OBJECT ORIENTED PROG. LAB','AUDIT COURSE', 'SOFT SKILL LAB']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['ENGINEERING MATHEMATICS-III.1', 'PROG. SKILL DEVELOPMENT LAB', 'DATABASE MGMT. SYSTEM LAB', 'COMPUTER GRAPHICS LAB', 'PROJECT BASED LEARNING', 'AUDIT COURSE.1']].sum(axis = 1)
df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']
df['DISCRETE MATHEMATICS'] = (df['DISCRETE MATHEMATICS'] / 30)
df['DISCRETE MATHEMATICS.1'] = (df['DISCRETE MATHEMATICS.1'] / 10)
df['LOGIC DESIGN & COMP. ORG.'] = (df['LOGIC DESIGN & COMP. ORG.'] / 30)
df['DATA STRUCTURES & ALGO.'] = (df['DATA STRUCTURES & ALGO.'] / 30)
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
df['BASIC OF COMPUTER NETWORK'] = (df['BASIC OF COMPUTER NETWORK'] / 30)
df['LOGIC DESIGN COMP. ORG. LAB'] = (df['LOGIC DESIGN COMP. ORG. LAB'] / 10)
df['DATA STRUCTURES & ALGO. LAB'] = (df['DATA STRUCTURES & ALGO. LAB'] / 20)
df['OBJECT ORIENTED PROG. LAB'] = (df['OBJECT ORIENTED PROG. LAB'] / 20)
df['SOFT SKILL LAB'] = (df['SOFT SKILL LAB'] / 10)
df['ENGINEERING MATHEMATICS-III'] = (df['ENGINEERING MATHEMATICS-III'] / 30)
df['ENGINEERING MATHEMATICS-III.1'] = (df['ENGINEERING MATHEMATICS-III.1'] / 10)
df['PROCESSOR ARCHITECTURE'] = (df['PROCESSOR ARCHITECTURE'] / 30)
df['DATABASE MANAGEMENT SYSTEM'] = (df['DATABASE MANAGEMENT SYSTEM'] / 30)
df['COMPUTER GRAPHICS'] = (df['COMPUTER GRAPHICS'] / 30)
df['SOFTWARE ENGINEERING'] = (df['SOFTWARE ENGINEERING'] / 30)
df['PROG. SKILL DEVELOPMENT LAB'] = (df['PROG. SKILL DEVELOPMENT LAB'] / 10)
df['DATABASE MGMT. SYSTEM LAB'] = (df['DATABASE MGMT. SYSTEM LAB'] / 20)
df['COMPUTER GRAPHICS LAB'] = (df['COMPUTER GRAPHICS LAB'] / 10)
df['PROJECT BASED LEARNING'] = (df['PROJECT BASED LEARNING'] / 20)
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
df['CGPA'] = (df['CGPA'] / 10)
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 70
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 150
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150
pd.set_option('display.max_columns', None) #Normalised dataset
df
DISCRETE MATHEMATICS | DISCRETE MATHEMATICS.1 | LOGIC DESIGN & COMP. ORG. | DATA STRUCTURES & ALGO. | OBJECT ORIENTED PROGRAMMING | BASIC OF COMPUTER NETWORK | LOGIC DESIGN COMP. ORG. LAB | DATA STRUCTURES & ALGO. LAB | OBJECT ORIENTED PROG. LAB | SOFT SKILL LAB | AUDIT COURSE | ENGINEERING MATHEMATICS-III | ENGINEERING MATHEMATICS-III.1 | PROCESSOR ARCHITECTURE | DATABASE MANAGEMENT SYSTEM | COMPUTER GRAPHICS | SOFTWARE ENGINEERING | PROG. SKILL DEVELOPMENT LAB | DATABASE MGMT. SYSTEM LAB | COMPUTER GRAPHICS LAB | PROJECT BASED LEARNING | AUDIT COURSE.1 | SEM_3_GPA | SEM_3_SGPA | SEM_4_GPA | SEM_4_SGPA | TOTAL_CREDITS_EARNED | CGPA | SEM_3_INTERNAL | SEM_4_INTERNAL | SEM_3_EXTERNAL | SEM_4_EXTERNAL | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.9 | 0 | 0.7 | 1.0 | 0.7 | 0.7 | 0.8 | 0.7 | 0.7 | 0.9 | 0.9 | 0.9 | 0 | 217.0 | 0.9864 | 170.0 | 0.7727 | 1.000000 | 0.880 | 0.957143 | 0.885714 | 1.00 | 0.72 |
1 | 0.8 | 0.8 | 0.9 | 0.9 | 1.0 | 0.9 | 0.7 | 0.7 | 0.7 | 0.8 | 0 | 0.8 | 1.0 | 0.8 | 0.8 | 0.8 | 0.7 | 0.6 | 0.9 | 0.7 | 0.9 | 0 | 186.0 | 0.8455 | 176.0 | 0.8000 | 1.000000 | 0.823 | 0.728571 | 0.842857 | 0.90 | 0.78 |
2 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.9 | 0.9 | 0.9 | 0 | 0.8 | 1.0 | 0.0 | 0.0 | 0.7 | 0.4 | 0.7 | 0.8 | 0.7 | 0.9 | 0 | 214.0 | 0.9727 | 115.0 | 0.5227 | 0.863636 | 0.000 | 0.914286 | 0.828571 | 1.00 | 0.38 |
3 | 1.0 | 0.9 | 0.9 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.9 | 0 | 1.0 | 1.0 | 0.6 | 0.7 | 0.9 | 0.7 | 1.0 | 0.9 | 0.9 | 1.0 | 0 | 213.0 | 0.9682 | 184.0 | 0.8364 | 1.000000 | 0.902 | 0.942857 | 0.957143 | 0.98 | 0.78 |
4 | 0.9 | 1.0 | 1.0 | 0.9 | 1.0 | 1.0 | 0.9 | 0.9 | 0.8 | 0.9 | 0 | 0.7 | 1.0 | 0.6 | 0.4 | 0.7 | 0.5 | 0.7 | 0.9 | 0.5 | 0.9 | 0 | 206.0 | 0.9364 | 145.0 | 0.6591 | 1.000000 | 0.798 | 0.885714 | 0.828571 | 0.96 | 0.58 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
241 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.9 | 0.9 | 0.9 | 0 | 0.8 | 1.0 | 0.6 | 0.8 | 0.7 | 0.6 | 0.8 | 0.9 | 0.9 | 0.9 | 0 | 214.0 | 0.9727 | 168.0 | 0.7636 | 1.000000 | 0.868 | 0.914286 | 0.900000 | 1.00 | 0.70 |
242 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 1.0 | 1.0 | 0 | 0.5 | 1.0 | 0.7 | 0.7 | 0.7 | 0.5 | 0.8 | 1.0 | 0.9 | 1.0 | 0 | 219.0 | 0.9955 | 160.0 | 0.7273 | 1.000000 | 0.861 | 0.985714 | 0.957143 | 1.00 | 0.62 |
243 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0 | 1.0 | 1.0 | 0.7 | 0.6 | 0.8 | 0.7 | 1.0 | 1.0 | 1.0 | 1.0 | 0 | 219.0 | 0.9955 | 184.0 | 0.8364 | 1.000000 | 0.916 | 0.985714 | 1.000000 | 1.00 | 0.76 |
244 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 1.0 | 0.8 | 0.9 | 0 | 0.8 | 1.0 | 0.6 | 0.8 | 0.8 | 0.5 | 0.7 | 0.9 | 0.9 | 0.9 | 0 | 214.0 | 0.9727 | 167.0 | 0.7591 | 1.000000 | 0.866 | 0.914286 | 0.885714 | 1.00 | 0.70 |
245 | 1.0 | 1.0 | 0.9 | 1.0 | 1.0 | 1.0 | 1.0 | 0.9 | 0.8 | 0.9 | 0 | 0.8 | 1.0 | 0.7 | 0.6 | 0.7 | 0.5 | 0.7 | 0.9 | 0.8 | 0.9 | 0 | 210.0 | 0.9545 | 160.0 | 0.7273 | 1.000000 | 0.841 | 0.900000 | 0.871429 | 0.98 | 0.66 |
246 rows × 32 columns
df.to_csv('IT_Normalized.csv')
df_it = df