Importing required libraries

In [1]:
import numpy as nm #importing required libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

Results of Computer Engg. Department

Importing Comp. Dept Dataset

In [2]:
df = pd.read_csv("CE_SE_2021.csv") #importing comp engg. dept dataset
In [3]:
df.head() #showing first 5 rows in the dataset
Out[3]:
DISCRETE MATHEMATICS FUND. OF DATA STRUCTURES OBJECT ORIENTED PROGRAMMING COMPUTER GRAPHICS DIGITAL ELEC. & LOGIC DESIGN DATA STUCTURES LABORATORY OOP & COMP. GRAPHICS LAB. DIGITAL ELEC. LABORATORY BUSINESS COMMUNICATION SKILLS HUMANITY & SOCIAL SCIENCE ... MICROPROCESSOR LABORATORY PROJECT BASED LEARNING II CODE OF CONDUCT AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
0 30 30 30 30 30 18 16 10 8 9 ... 6 18 9 0 211.0 9.591 124.0 5.636 39 0.00
1 30 30 30 30 30 18 18 9 9 7 ... 9 20 9 0 211.0 9.591 197.0 8.955 44 9.27
2 30 30 30 30 30 20 20 10 10 10 ... 10 20 9 0 220.0 10.000 178.0 8.091 44 9.05
3 30 30 30 30 30 16 16 9 9 8 ... 0 18 7 0 208.0 9.455 131.0 5.955 41 0.00
4 30 30 30 30 30 18 20 9 9 9 ... 9 18 10 0 215.0 9.773 176.0 8.000 44 8.89

5 rows × 28 columns

In [4]:
df.tail() #showing the last 5 values in the dataset
Out[4]:
DISCRETE MATHEMATICS FUND. OF DATA STRUCTURES OBJECT ORIENTED PROGRAMMING COMPUTER GRAPHICS DIGITAL ELEC. & LOGIC DESIGN DATA STUCTURES LABORATORY OOP & COMP. GRAPHICS LAB. DIGITAL ELEC. LABORATORY BUSINESS COMMUNICATION SKILLS HUMANITY & SOCIAL SCIENCE ... MICROPROCESSOR LABORATORY PROJECT BASED LEARNING II CODE OF CONDUCT AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
328 30 30 30 30 30 18 20 9 9 9 ... 9 20 10 0 215.0 9.773 184.0 8.364 44 9.07
329 30 30 27 30 30 18 18 9 9 9 ... 8 20 9 0 210.0 9.545 181.0 8.227 44 8.89
330 27 30 24 24 30 18 16 9 9 9 ... 7 18 10 0 196.0 8.909 57.0 2.591 29 0.00
331 30 30 30 27 30 18 20 10 10 9 ... 9 18 10 0 214.0 9.727 189.0 8.591 44 9.16
332 30 30 30 27 30 18 16 9 9 9 ... 8 18 8 0 208.0 9.455 151.0 6.864 44 8.16

5 rows × 28 columns

In [5]:
df.shape #printing number of rows and columns
Out[5]:
(333, 28)
In [6]:
df.isnull().sum() #printing the number of null values in each column
Out[6]:
DISCRETE MATHEMATICS             0
FUND. OF DATA STRUCTURES         0
OBJECT ORIENTED PROGRAMMING      0
COMPUTER GRAPHICS                0
DIGITAL ELEC. & LOGIC DESIGN     0
DATA STUCTURES LABORATORY        0
OOP & COMP. GRAPHICS LAB.        0
DIGITAL ELEC. LABORATORY         0
BUSINESS COMMUNICATION SKILLS    0
HUMANITY & SOCIAL SCIENCE        0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS III      0
ENGINEERING MATHEMATICS III.1    0
DATA STRUCTURES & ALGO.          0
SOFTWARE ENGINEERING             0
MICROPROCESSOR                   0
PRINCIPLES OF PROG. LANG.        0
DATA STRUCTURES & ALGO. LAB.     0
MICROPROCESSOR LABORATORY        0
PROJECT BASED LEARNING II        0
CODE OF CONDUCT                  0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64
In [7]:
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values:  0
In [8]:
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 333 entries, 0 to 332
Data columns (total 28 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   DISCRETE MATHEMATICS           333 non-null    int64  
 1   FUND. OF DATA STRUCTURES       333 non-null    int64  
 2   OBJECT ORIENTED PROGRAMMING    333 non-null    int64  
 3   COMPUTER GRAPHICS              333 non-null    int64  
 4   DIGITAL ELEC. & LOGIC DESIGN   333 non-null    int64  
 5   DATA STUCTURES LABORATORY      333 non-null    int64  
 6   OOP & COMP. GRAPHICS LAB.      333 non-null    int64  
 7   DIGITAL ELEC. LABORATORY       333 non-null    int64  
 8   BUSINESS COMMUNICATION SKILLS  333 non-null    int64  
 9   HUMANITY & SOCIAL SCIENCE      333 non-null    int64  
 10  AUDIT COURSE                   333 non-null    int64  
 11  ENGINEERING MATHEMATICS III    333 non-null    int64  
 12  ENGINEERING MATHEMATICS III.1  333 non-null    int64  
 13  DATA STRUCTURES & ALGO.        333 non-null    int64  
 14  SOFTWARE ENGINEERING           333 non-null    int64  
 15  MICROPROCESSOR                 333 non-null    int64  
 16  PRINCIPLES OF PROG. LANG.      333 non-null    int64  
 17  DATA STRUCTURES & ALGO. LAB.   333 non-null    int64  
 18  MICROPROCESSOR LABORATORY      333 non-null    int64  
 19  PROJECT BASED LEARNING II      333 non-null    int64  
 20  CODE OF CONDUCT                333 non-null    int64  
 21  AUDIT COURSE.1                 333 non-null    int64  
 22  SEM_3_GPA                      333 non-null    float64
 23  SEM_3_SGPA                     333 non-null    float64
 24  SEM_4_GPA                      333 non-null    float64
 25  SEM_4_SGPA                     333 non-null    float64
 26  TOTAL_CREDITS_EARNED           333 non-null    int64  
 27  CGPA                           333 non-null    float64
dtypes: float64(5), int64(23)
memory usage: 73.0 KB
In [9]:
df.columns #printing all the columns in the dataset
Out[9]:
Index(['DISCRETE MATHEMATICS', 'FUND. OF DATA STRUCTURES',
       'OBJECT ORIENTED PROGRAMMING', 'COMPUTER GRAPHICS',
       'DIGITAL ELEC. & LOGIC DESIGN', 'DATA STUCTURES LABORATORY',
       'OOP & COMP. GRAPHICS LAB.', 'DIGITAL ELEC. LABORATORY',
       'BUSINESS COMMUNICATION SKILLS', 'HUMANITY & SOCIAL SCIENCE',
       'AUDIT COURSE', 'ENGINEERING MATHEMATICS III',
       'ENGINEERING MATHEMATICS III.1', 'DATA STRUCTURES & ALGO.',
       'SOFTWARE ENGINEERING', 'MICROPROCESSOR', 'PRINCIPLES OF PROG. LANG.',
       'DATA STRUCTURES & ALGO. LAB.', 'MICROPROCESSOR LABORATORY',
       'PROJECT BASED LEARNING II', 'CODE OF CONDUCT', 'AUDIT COURSE.1',
       'SEM_3_GPA', 'SEM_3_SGPA', 'SEM_4_GPA', 'SEM_4_SGPA',
       'TOTAL_CREDITS_EARNED', 'CGPA'],
      dtype='object')
In [10]:
df.dtypes #printing the data types of each column
Out[10]:
DISCRETE MATHEMATICS               int64
FUND. OF DATA STRUCTURES           int64
OBJECT ORIENTED PROGRAMMING        int64
COMPUTER GRAPHICS                  int64
DIGITAL ELEC. & LOGIC DESIGN       int64
DATA STUCTURES LABORATORY          int64
OOP & COMP. GRAPHICS LAB.          int64
DIGITAL ELEC. LABORATORY           int64
BUSINESS COMMUNICATION SKILLS      int64
HUMANITY & SOCIAL SCIENCE          int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS III        int64
ENGINEERING MATHEMATICS III.1      int64
DATA STRUCTURES & ALGO.            int64
SOFTWARE ENGINEERING               int64
MICROPROCESSOR                     int64
PRINCIPLES OF PROG. LANG.          int64
DATA STRUCTURES & ALGO. LAB.       int64
MICROPROCESSOR LABORATORY          int64
PROJECT BASED LEARNING II          int64
CODE OF CONDUCT                    int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [11]:
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
In [12]:
df.dtypes #printing the data types of each column
Out[12]:
DISCRETE MATHEMATICS               int64
FUND. OF DATA STRUCTURES           int64
OBJECT ORIENTED PROGRAMMING        int64
COMPUTER GRAPHICS                  int64
DIGITAL ELEC. & LOGIC DESIGN       int64
DATA STUCTURES LABORATORY          int64
OOP & COMP. GRAPHICS LAB.          int64
DIGITAL ELEC. LABORATORY           int64
BUSINESS COMMUNICATION SKILLS      int64
HUMANITY & SOCIAL SCIENCE          int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS III        int64
ENGINEERING MATHEMATICS III.1      int64
DATA STRUCTURES & ALGO.            int64
SOFTWARE ENGINEERING               int64
MICROPROCESSOR                     int64
PRINCIPLES OF PROG. LANG.          int64
DATA STRUCTURES & ALGO. LAB.       int64
MICROPROCESSOR LABORATORY          int64
PROJECT BASED LEARNING II          int64
CODE OF CONDUCT                    int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [13]:
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
Out[13]:
SEM_3_SGPA SEM_4_SGPA CGPA
0 9.591 5.636 0.00
1 9.591 8.955 9.27
2 10.000 8.091 9.05
3 9.455 5.955 0.00
4 9.773 8.000 8.89
... ... ... ...
328 9.773 8.364 9.07
329 9.545 8.227 8.89
330 8.909 2.591 0.00
331 9.727 8.591 9.16
332 9.455 6.864 8.16

333 rows × 3 columns

In [14]:
df.head() #showing the first 5 values in the dataset
Out[14]:
DISCRETE MATHEMATICS FUND. OF DATA STRUCTURES OBJECT ORIENTED PROGRAMMING COMPUTER GRAPHICS DIGITAL ELEC. & LOGIC DESIGN DATA STUCTURES LABORATORY OOP & COMP. GRAPHICS LAB. DIGITAL ELEC. LABORATORY BUSINESS COMMUNICATION SKILLS HUMANITY & SOCIAL SCIENCE ... MICROPROCESSOR LABORATORY PROJECT BASED LEARNING II CODE OF CONDUCT AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
0 30 30 30 30 30 18 16 10 8 9 ... 6 18 9 0 211.0 9.591 124.0 5.636 39 0.00
1 30 30 30 30 30 18 18 9 9 7 ... 9 20 9 0 211.0 9.591 197.0 8.955 44 9.27
2 30 30 30 30 30 20 20 10 10 10 ... 10 20 9 0 220.0 10.000 178.0 8.091 44 9.05
3 30 30 30 30 30 16 16 9 9 8 ... 0 18 7 0 208.0 9.455 131.0 5.955 41 0.00
4 30 30 30 30 30 18 20 9 9 9 ... 9 18 10 0 215.0 9.773 176.0 8.000 44 8.89

5 rows × 28 columns

Replacing null(NaN) values by 0

In [15]:
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0) #replacing null values by 0
In [16]:
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
In [17]:
df['ENGINEERING MATHEMATICS III'] = df['ENGINEERING MATHEMATICS III'].fillna(0)
In [18]:
df['DATA STRUCTURES & ALGO.'] = df['DATA STRUCTURES & ALGO.'].fillna(0)
In [19]:
df['SOFTWARE ENGINEERING'] = df['SOFTWARE ENGINEERING'].fillna(0)
In [20]:
df['MICROPROCESSOR'] = df['MICROPROCESSOR'].fillna(0)
In [21]:
df['PRINCIPLES OF PROG. LANG.'] = df['PRINCIPLES OF PROG. LANG.'].fillna(0)
In [22]:
df['DATA STRUCTURES & ALGO. LAB.'] = df['DATA STRUCTURES & ALGO. LAB.'].fillna(0)
In [23]:
df['MICROPROCESSOR LABORATORY'] = df['MICROPROCESSOR LABORATORY'].fillna(0)
In [24]:
df['CGPA'] = df['CGPA'].fillna(0)
In [25]:
df.isnull().sum() #after replacing null values, final dataset showing number of null values
Out[25]:
DISCRETE MATHEMATICS             0
FUND. OF DATA STRUCTURES         0
OBJECT ORIENTED PROGRAMMING      0
COMPUTER GRAPHICS                0
DIGITAL ELEC. & LOGIC DESIGN     0
DATA STUCTURES LABORATORY        0
OOP & COMP. GRAPHICS LAB.        0
DIGITAL ELEC. LABORATORY         0
BUSINESS COMMUNICATION SKILLS    0
HUMANITY & SOCIAL SCIENCE        0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS III      0
ENGINEERING MATHEMATICS III.1    0
DATA STRUCTURES & ALGO.          0
SOFTWARE ENGINEERING             0
MICROPROCESSOR                   0
PRINCIPLES OF PROG. LANG.        0
DATA STRUCTURES & ALGO. LAB.     0
MICROPROCESSOR LABORATORY        0
PROJECT BASED LEARNING II        0
CODE OF CONDUCT                  0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64

Displaying Mean, Median and Standard Deviation of SEM 3,4 SGPA

In [26]:
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
Out[26]:
9.436123123123124
In [27]:
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
Out[27]:
9.5
In [28]:
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
Out[28]:
0.35803710159749813
In [29]:
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
Out[29]:
7.408681681681683
In [30]:
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
Out[30]:
7.636
In [31]:
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem4 #Standard Deviation
Out[31]:
1.2676948274236515

Calculating Internal and External Marks in Dataset

In [32]:
df["SEM_3_INTERNAL"] = df[['DATA STUCTURES LABORATORY', 'OOP & COMP. GRAPHICS LAB.', 'DIGITAL ELEC. LABORATORY', 'BUSINESS COMMUNICATION SKILLS', 'HUMANITY & SOCIAL SCIENCE', 'AUDIT COURSE']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['ENGINEERING MATHEMATICS III.1', 'DATA STRUCTURES & ALGO. LAB.', 'MICROPROCESSOR LABORATORY', 'PROJECT BASED LEARNING II', 'CODE OF CONDUCT', 'AUDIT COURSE.1']].sum(axis = 1)
df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']

Normalising Dataset

In [33]:
df.head(5) #Original Dataset
Out[33]:
DISCRETE MATHEMATICS FUND. OF DATA STRUCTURES OBJECT ORIENTED PROGRAMMING COMPUTER GRAPHICS DIGITAL ELEC. & LOGIC DESIGN DATA STUCTURES LABORATORY OOP & COMP. GRAPHICS LAB. DIGITAL ELEC. LABORATORY BUSINESS COMMUNICATION SKILLS HUMANITY & SOCIAL SCIENCE ... SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA SEM_3_INTERNAL SEM_4_INTERNAL SEM_3_EXTERNAL SEM_4_EXTERNAL
0 30 30 30 30 30 18 16 10 8 9 ... 211.0 9.591 124.0 5.636 39 0.00 61 40 150.0 84.0
1 30 30 30 30 30 18 18 9 9 7 ... 211.0 9.591 197.0 8.955 44 9.27 61 65 150.0 132.0
2 30 30 30 30 30 20 20 10 10 10 ... 220.0 10.000 178.0 8.091 44 9.05 70 67 150.0 111.0
3 30 30 30 30 30 16 16 9 9 8 ... 208.0 9.455 131.0 5.955 41 0.00 58 32 150.0 99.0
4 30 30 30 30 30 18 20 9 9 9 ... 215.0 9.773 176.0 8.000 44 8.89 65 65 150.0 111.0

5 rows × 32 columns

In [34]:
df['DISCRETE MATHEMATICS'] = (df['DISCRETE MATHEMATICS'] / 30)
In [35]:
df['FUND. OF DATA STRUCTURES'] = (df['FUND. OF DATA STRUCTURES'] / 30)
In [36]:
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
In [37]:
df['COMPUTER GRAPHICS'] = (df['COMPUTER GRAPHICS'] / 30)
In [38]:
df['DIGITAL ELEC. & LOGIC DESIGN'] = (df['DIGITAL ELEC. & LOGIC DESIGN'] / 30)
In [39]:
df['DATA STUCTURES LABORATORY'] = (df['DATA STUCTURES LABORATORY'] / 20)
In [40]:
df['OOP & COMP. GRAPHICS LAB.'] = (df['OOP & COMP. GRAPHICS LAB.'] / 20)
In [41]:
df['DIGITAL ELEC. LABORATORY'] = (df['DIGITAL ELEC. LABORATORY'] / 10)
In [42]:
df['BUSINESS COMMUNICATION SKILLS'] = (df['BUSINESS COMMUNICATION SKILLS'] / 10)
In [43]:
df['HUMANITY & SOCIAL SCIENCE'] = (df['HUMANITY & SOCIAL SCIENCE'] / 10)
In [44]:
df['ENGINEERING MATHEMATICS III'] = (df['ENGINEERING MATHEMATICS III'] / 30)
In [45]:
df['ENGINEERING MATHEMATICS III.1'] = (df['ENGINEERING MATHEMATICS III.1'] / 10)
In [46]:
df['DATA STRUCTURES & ALGO.'] = (df['DATA STRUCTURES & ALGO.'] / 30)
In [47]:
df['SOFTWARE ENGINEERING'] = (df['SOFTWARE ENGINEERING'] / 30)
In [48]:
df['MICROPROCESSOR'] = (df['MICROPROCESSOR'] / 30)
In [49]:
df['PRINCIPLES OF PROG. LANG.'] = (df['PRINCIPLES OF PROG. LANG.'] / 30)
In [50]:
df['DATA STRUCTURES & ALGO. LAB.'] = (df['DATA STRUCTURES & ALGO. LAB.'] / 20)
In [51]:
df['MICROPROCESSOR LABORATORY'] = (df['MICROPROCESSOR LABORATORY'] / 10)
In [52]:
df['PROJECT BASED LEARNING II'] = (df['PROJECT BASED LEARNING II'] / 20)
In [53]:
df['CODE OF CONDUCT'] = (df['CODE OF CONDUCT'] / 10)
In [54]:
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
In [55]:
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
In [56]:
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
In [57]:
df['CGPA'] = (df['CGPA'] / 10) #CGPA IS AN OBJECT
In [58]:
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA as Object Data type to int data type
In [59]:
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 70
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 150
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150

Normalised Dataset

In [60]:
#df.head() #Normalised dataset
In [61]:
pd.set_option('display.max_columns', None) #Normalised dataset
df
Out[61]:
DISCRETE MATHEMATICS FUND. OF DATA STRUCTURES OBJECT ORIENTED PROGRAMMING COMPUTER GRAPHICS DIGITAL ELEC. & LOGIC DESIGN DATA STUCTURES LABORATORY OOP & COMP. GRAPHICS LAB. DIGITAL ELEC. LABORATORY BUSINESS COMMUNICATION SKILLS HUMANITY & SOCIAL SCIENCE AUDIT COURSE ENGINEERING MATHEMATICS III ENGINEERING MATHEMATICS III.1 DATA STRUCTURES & ALGO. SOFTWARE ENGINEERING MICROPROCESSOR PRINCIPLES OF PROG. LANG. DATA STRUCTURES & ALGO. LAB. MICROPROCESSOR LABORATORY PROJECT BASED LEARNING II CODE OF CONDUCT AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA SEM_3_INTERNAL SEM_4_INTERNAL SEM_3_EXTERNAL SEM_4_EXTERNAL
0 1.0 1.0 1.0 1.0 1.0 0.9 0.8 1.0 0.8 0.9 0 0.8 0.7 0.7 0.7 0.0 0.6 0.0 0.6 0.9 0.9 0 211.0 0.9591 124.0 0.5636 0.886364 0.000 0.871429 0.571429 1.00 0.56
1 1.0 1.0 1.0 1.0 1.0 0.9 0.9 0.9 0.9 0.7 0 1.0 0.9 0.9 0.8 0.8 0.9 0.9 0.9 1.0 0.9 0 211.0 0.9591 197.0 0.8955 1.000000 0.927 0.871429 0.928571 1.00 0.88
2 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0 1.0 0.8 0.6 0.5 0.9 0.7 1.0 1.0 1.0 0.9 0 220.0 1.0000 178.0 0.8091 1.000000 0.905 1.000000 0.957143 1.00 0.74
3 1.0 1.0 1.0 1.0 1.0 0.8 0.8 0.9 0.9 0.8 0 0.7 0.7 0.6 0.6 0.7 0.7 0.0 0.0 0.9 0.7 0 208.0 0.9455 131.0 0.5955 0.931818 0.000 0.828571 0.457143 1.00 0.66
4 1.0 1.0 1.0 1.0 1.0 0.9 1.0 0.9 0.9 0.9 0 0.7 1.0 0.8 0.7 0.8 0.7 0.9 0.9 0.9 1.0 0 215.0 0.9773 176.0 0.8000 1.000000 0.889 0.928571 0.928571 1.00 0.74
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
328 1.0 1.0 1.0 1.0 1.0 0.9 1.0 0.9 0.9 0.9 0 0.9 0.7 0.8 0.8 0.8 0.7 0.9 0.9 1.0 1.0 0 215.0 0.9773 184.0 0.8364 1.000000 0.907 0.928571 0.914286 1.00 0.80
329 1.0 1.0 0.9 1.0 1.0 0.9 0.9 0.9 0.9 0.9 0 1.0 0.9 0.6 0.7 0.8 0.8 0.9 0.8 1.0 0.9 0 210.0 0.9545 181.0 0.8227 1.000000 0.889 0.900000 0.914286 0.98 0.78
330 0.9 1.0 0.8 0.8 1.0 0.9 0.8 0.9 0.9 0.9 0 0.0 0.8 0.0 0.0 0.0 0.0 0.7 0.7 0.9 1.0 0 196.0 0.8909 57.0 0.2591 0.659091 0.000 0.871429 0.814286 0.90 0.00
331 1.0 1.0 1.0 0.9 1.0 0.9 1.0 1.0 1.0 0.9 0 1.0 0.9 0.7 0.7 0.9 0.8 1.0 0.9 0.9 1.0 0 214.0 0.9727 189.0 0.8591 1.000000 0.916 0.957143 0.942857 0.98 0.82
332 1.0 1.0 1.0 0.9 1.0 0.9 0.8 0.9 0.9 0.9 0 0.6 0.7 0.7 0.7 0.5 0.7 0.7 0.8 0.9 0.8 0 208.0 0.9455 151.0 0.6864 1.000000 0.816 0.871429 0.785714 0.98 0.64

333 rows × 32 columns

Saving Normalized Dataset

In [62]:
df.to_csv('CE_Normalized.csv')
df_ce = df

Results of EnTC Department

Importing EnTC Dept Dataset

In [63]:
df = pd.read_csv("ETC_SE_2021.csv") #importing entc engg. dept dataset`
In [64]:
df.head() #showing first 5 rows in the dataset
Out[64]:
ELECTRONIC CIRCUITS DIGITAL CIRCUITS ELECTRICAL CIRCUITS DATA STRUCTURES ELECTRONIC CIRCUIT LAB DIGITAL CIRCUITS LAB ELECTRICAL CIRCUIT LAB DATA STRUCTURES LAB ELECTRONIC SKILL DEVELOPMENT AUDIT COURSE ENGINEERING MATHEMATICS III ENGINEERING MATHEMATICS III.1 SIGNALS & SYSTEMS SIGNALS & SYSTEMS.1 CONTROL SYSTEMS PRINCIPLES OF COMMU. SYSTEMS OBJECT ORIENTED PROGRAMMING SIGNALS & CONTROL SYSTEM LAB PRINCIPLE OF COMMU. SYS. LAB OOPS LAB DATA ANALYTICS LAB EMPLOYABILITY SKILL DEV. PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
0 27 30 30 30 4 9 9 9 8 0 40 7 21 10 18 24 18 8 9 9 10 24 18 0 203.0 9.227 169.0 7.682 44 8.45
1 27 30 30 30 9 10 9 10 9 0 40 7 24 9 21 24 18 8 10 9 9 21 18 0 211.0 9.591 171.0 7.773 44 8.68
2 27 30 30 30 6 10 9 7 9 0 40 9 18 9 12 21 12 8 9 9 9 24 16 0 207.0 9.409 147.0 6.682 44 8.05
3 27 30 30 30 4 7 6 8 8 0 40 8 24 9 27 21 18 8 8 8 9 27 14 0 198.0 9.000 173.0 7.864 44 8.43
4 30 30 30 30 6 8 10 9 9 0 40 10 21 10 21 24 18 9 8 9 8 27 18 0 212.0 9.636 173.0 7.864 44 8.75
In [65]:
df.tail() #showing last 5 rows in the dataset
Out[65]:
ELECTRONIC CIRCUITS DIGITAL CIRCUITS ELECTRICAL CIRCUITS DATA STRUCTURES ELECTRONIC CIRCUIT LAB DIGITAL CIRCUITS LAB ELECTRICAL CIRCUIT LAB DATA STRUCTURES LAB ELECTRONIC SKILL DEVELOPMENT AUDIT COURSE ENGINEERING MATHEMATICS III ENGINEERING MATHEMATICS III.1 SIGNALS & SYSTEMS SIGNALS & SYSTEMS.1 CONTROL SYSTEMS PRINCIPLES OF COMMU. SYSTEMS OBJECT ORIENTED PROGRAMMING SIGNALS & CONTROL SYSTEM LAB PRINCIPLE OF COMMU. SYS. LAB OOPS LAB DATA ANALYTICS LAB EMPLOYABILITY SKILL DEV. PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
304 27 30 30 30 5 10 9 7 9 0 40 9 12 9 12 12 18 8 8 6 8 27 14 0 206.0 9.364 134.0 6.091 44 7.73
305 24 30 30 30 9 8 9 7 9 0 28 9 21 9 15 18 15 8 8 8 8 24 16 0 193.0 8.773 150.0 6.818 44 7.80
306 24 30 30 30 7 9 10 8 9 0 40 9 24 10 27 24 18 9 9 9 7 27 16 0 206.0 9.364 180.0 8.182 44 8.77
307 27 30 30 30 4 9 10 9 9 0 40 9 24 9 24 21 18 8 6 9 9 24 18 0 207.0 9.409 170.0 7.727 44 8.57
308 21 30 30 27 4 9 9 4 9 0 40 9 12 8 12 12 12 7 8 8 7 21 14 0 192.0 8.727 121.0 5.500 44 7.11
In [66]:
df.shape #printing number of rows and columns
Out[66]:
(309, 30)
In [67]:
df.isnull().sum() #printing the number of null values in each column
Out[67]:
ELECTRONIC CIRCUITS              0
DIGITAL CIRCUITS                 0
ELECTRICAL CIRCUITS              0
DATA STRUCTURES                  0
ELECTRONIC CIRCUIT LAB           0
DIGITAL CIRCUITS LAB             0
ELECTRICAL CIRCUIT LAB           0
DATA STRUCTURES LAB              0
ELECTRONIC SKILL DEVELOPMENT     0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS III      0
ENGINEERING MATHEMATICS III.1    0
SIGNALS & SYSTEMS                0
SIGNALS & SYSTEMS.1              0
CONTROL SYSTEMS                  0
PRINCIPLES OF COMMU. SYSTEMS     0
OBJECT ORIENTED PROGRAMMING      0
SIGNALS & CONTROL SYSTEM LAB     0
PRINCIPLE OF COMMU. SYS. LAB     0
OOPS LAB                         0
DATA ANALYTICS LAB               0
EMPLOYABILITY SKILL DEV.         0
PROJECT BASED LEARNING           0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64
In [68]:
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values:  0
In [69]:
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 309 entries, 0 to 308
Data columns (total 30 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   ELECTRONIC CIRCUITS            309 non-null    int64  
 1   DIGITAL CIRCUITS               309 non-null    int64  
 2   ELECTRICAL CIRCUITS            309 non-null    int64  
 3   DATA STRUCTURES                309 non-null    int64  
 4   ELECTRONIC CIRCUIT LAB         309 non-null    int64  
 5   DIGITAL CIRCUITS LAB           309 non-null    int64  
 6   ELECTRICAL CIRCUIT LAB         309 non-null    int64  
 7   DATA STRUCTURES LAB            309 non-null    int64  
 8   ELECTRONIC SKILL DEVELOPMENT   309 non-null    int64  
 9   AUDIT COURSE                   309 non-null    int64  
 10  ENGINEERING MATHEMATICS III    309 non-null    int64  
 11  ENGINEERING MATHEMATICS III.1  309 non-null    int64  
 12  SIGNALS & SYSTEMS              309 non-null    int64  
 13  SIGNALS & SYSTEMS.1            309 non-null    int64  
 14  CONTROL SYSTEMS                309 non-null    int64  
 15  PRINCIPLES OF COMMU. SYSTEMS   309 non-null    int64  
 16  OBJECT ORIENTED PROGRAMMING    309 non-null    int64  
 17  SIGNALS & CONTROL SYSTEM LAB   309 non-null    int64  
 18  PRINCIPLE OF COMMU. SYS. LAB   309 non-null    int64  
 19  OOPS LAB                       309 non-null    int64  
 20  DATA ANALYTICS LAB             309 non-null    int64  
 21  EMPLOYABILITY SKILL DEV.       309 non-null    int64  
 22  PROJECT BASED LEARNING         309 non-null    int64  
 23  AUDIT COURSE.1                 309 non-null    int64  
 24  SEM_3_GPA                      309 non-null    float64
 25  SEM_3_SGPA                     309 non-null    float64
 26  SEM_4_GPA                      309 non-null    float64
 27  SEM_4_SGPA                     309 non-null    float64
 28  TOTAL_CREDITS_EARNED           309 non-null    int64  
 29  CGPA                           309 non-null    float64
dtypes: float64(5), int64(25)
memory usage: 72.5 KB
In [70]:
df.dtypes #printing the data types of each column
Out[70]:
ELECTRONIC CIRCUITS                int64
DIGITAL CIRCUITS                   int64
ELECTRICAL CIRCUITS                int64
DATA STRUCTURES                    int64
ELECTRONIC CIRCUIT LAB             int64
DIGITAL CIRCUITS LAB               int64
ELECTRICAL CIRCUIT LAB             int64
DATA STRUCTURES LAB                int64
ELECTRONIC SKILL DEVELOPMENT       int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS III        int64
ENGINEERING MATHEMATICS III.1      int64
SIGNALS & SYSTEMS                  int64
SIGNALS & SYSTEMS.1                int64
CONTROL SYSTEMS                    int64
PRINCIPLES OF COMMU. SYSTEMS       int64
OBJECT ORIENTED PROGRAMMING        int64
SIGNALS & CONTROL SYSTEM LAB       int64
PRINCIPLE OF COMMU. SYS. LAB       int64
OOPS LAB                           int64
DATA ANALYTICS LAB                 int64
EMPLOYABILITY SKILL DEV.           int64
PROJECT BASED LEARNING             int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [71]:
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
In [72]:
df.dtypes #printing the data types of each column
Out[72]:
ELECTRONIC CIRCUITS                int64
DIGITAL CIRCUITS                   int64
ELECTRICAL CIRCUITS                int64
DATA STRUCTURES                    int64
ELECTRONIC CIRCUIT LAB             int64
DIGITAL CIRCUITS LAB               int64
ELECTRICAL CIRCUIT LAB             int64
DATA STRUCTURES LAB                int64
ELECTRONIC SKILL DEVELOPMENT       int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS III        int64
ENGINEERING MATHEMATICS III.1      int64
SIGNALS & SYSTEMS                  int64
SIGNALS & SYSTEMS.1                int64
CONTROL SYSTEMS                    int64
PRINCIPLES OF COMMU. SYSTEMS       int64
OBJECT ORIENTED PROGRAMMING        int64
SIGNALS & CONTROL SYSTEM LAB       int64
PRINCIPLE OF COMMU. SYS. LAB       int64
OOPS LAB                           int64
DATA ANALYTICS LAB                 int64
EMPLOYABILITY SKILL DEV.           int64
PROJECT BASED LEARNING             int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [73]:
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
Out[73]:
SEM_3_SGPA SEM_4_SGPA CGPA
0 9.227 7.682 8.45
1 9.591 7.773 8.68
2 9.409 6.682 8.05
3 9.000 7.864 8.43
4 9.636 7.864 8.75
... ... ... ...
304 9.364 6.091 7.73
305 8.773 6.818 7.80
306 9.364 8.182 8.77
307 9.409 7.727 8.57
308 8.727 5.500 7.11

309 rows × 3 columns

Replacing null(NaN) values by 0

In [74]:
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0) #replacing null values by 0
In [75]:
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
In [76]:
df['ENGINEERING MATHEMATICS III'] = df['ENGINEERING MATHEMATICS III'].fillna(0)
In [77]:
df['DATA STRUCTURES LAB'] = df['DATA STRUCTURES LAB'].fillna(0)
In [78]:
df['ELECTRONIC CIRCUIT LAB'] = df['ELECTRONIC CIRCUIT LAB'].fillna(0)
In [79]:
df['ELECTRICAL CIRCUIT LAB'] = df['ELECTRICAL CIRCUIT LAB'].fillna(0)
In [80]:
df['OOPS LAB'] = df['OOPS LAB'].fillna(0)
In [81]:
df['DATA ANALYTICS LAB'] = df['DATA ANALYTICS LAB'].fillna(0)
In [82]:
df['SIGNALS & SYSTEMS'] = df['SIGNALS & SYSTEMS'].fillna(0)
In [83]:
df['CONTROL SYSTEMS'] = df['CONTROL SYSTEMS'].fillna(0)
In [84]:
df['SIGNALS & SYSTEMS'] = df['SIGNALS & SYSTEMS'].fillna(0)
In [85]:
df['PRINCIPLES OF COMMU. SYSTEMS'] = df['PRINCIPLES OF COMMU. SYSTEMS'].fillna(0)
In [86]:
df['OBJECT ORIENTED PROGRAMMING'] = df['OBJECT ORIENTED PROGRAMMING'].fillna(0)
In [87]:
df['CGPA'] = df['CGPA'].fillna(0)
In [88]:
df.isnull().sum()  #after replacing null values, final dataset showing number of null values
Out[88]:
ELECTRONIC CIRCUITS              0
DIGITAL CIRCUITS                 0
ELECTRICAL CIRCUITS              0
DATA STRUCTURES                  0
ELECTRONIC CIRCUIT LAB           0
DIGITAL CIRCUITS LAB             0
ELECTRICAL CIRCUIT LAB           0
DATA STRUCTURES LAB              0
ELECTRONIC SKILL DEVELOPMENT     0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS III      0
ENGINEERING MATHEMATICS III.1    0
SIGNALS & SYSTEMS                0
SIGNALS & SYSTEMS.1              0
CONTROL SYSTEMS                  0
PRINCIPLES OF COMMU. SYSTEMS     0
OBJECT ORIENTED PROGRAMMING      0
SIGNALS & CONTROL SYSTEM LAB     0
PRINCIPLE OF COMMU. SYS. LAB     0
OOPS LAB                         0
DATA ANALYTICS LAB               0
EMPLOYABILITY SKILL DEV.         0
PROJECT BASED LEARNING           0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64

Displaying Mean, Median and Standard Deviation of SEM 3,4 SGPA

In [89]:
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
Out[89]:
9.310245954692556
In [90]:
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
Out[90]:
9.409
In [91]:
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
Out[91]:
0.49598066084602294
In [92]:
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
Out[92]:
6.91982200647249
In [93]:
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
Out[93]:
7.227
In [94]:
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem4 #Standard Deviation
Out[94]:
1.320573892868979

Calculating Internal and External Marks in Dataset

In [95]:
df["SEM_3_INTERNAL"] = df[['ENGINEERING MATHEMATICS III.1', 'ELECTRONIC CIRCUIT LAB', 'DIGITAL CIRCUITS LAB', 'ELECTRICAL CIRCUIT LAB', 'DATA STRUCTURES LAB', 'ELECTRONIC SKILL DEVELOPMENT', 'AUDIT COURSE']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['SIGNALS & SYSTEMS.1', 'SIGNALS & CONTROL SYSTEM LAB', 'PRINCIPLE OF COMMU. SYS. LAB', 'OOPS LAB', 'DATA ANALYTICS LAB', 'PROJECT BASED LEARNING', 'AUDIT COURSE.1']].sum(axis = 1)

df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']

Normalising Dataset

In [96]:
df.head()
Out[96]:
ELECTRONIC CIRCUITS DIGITAL CIRCUITS ELECTRICAL CIRCUITS DATA STRUCTURES ELECTRONIC CIRCUIT LAB DIGITAL CIRCUITS LAB ELECTRICAL CIRCUIT LAB DATA STRUCTURES LAB ELECTRONIC SKILL DEVELOPMENT AUDIT COURSE ENGINEERING MATHEMATICS III ENGINEERING MATHEMATICS III.1 SIGNALS & SYSTEMS SIGNALS & SYSTEMS.1 CONTROL SYSTEMS PRINCIPLES OF COMMU. SYSTEMS OBJECT ORIENTED PROGRAMMING SIGNALS & CONTROL SYSTEM LAB PRINCIPLE OF COMMU. SYS. LAB OOPS LAB DATA ANALYTICS LAB EMPLOYABILITY SKILL DEV. PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA SEM_3_INTERNAL SEM_4_INTERNAL SEM_3_EXTERNAL SEM_4_EXTERNAL
0 27 30 30 30 4 9 9 9 8 0 40 7 21 10 18 24 18 8 9 9 10 24 18 0 203.0 9.227 169.0 7.682 44 8.45 46 64 157.0 105.0
1 27 30 30 30 9 10 9 10 9 0 40 7 24 9 21 24 18 8 10 9 9 21 18 0 211.0 9.591 171.0 7.773 44 8.68 54 63 157.0 108.0
2 27 30 30 30 6 10 9 7 9 0 40 9 18 9 12 21 12 8 9 9 9 24 16 0 207.0 9.409 147.0 6.682 44 8.05 50 60 157.0 87.0
3 27 30 30 30 4 7 6 8 8 0 40 8 24 9 27 21 18 8 8 8 9 27 14 0 198.0 9.000 173.0 7.864 44 8.43 41 56 157.0 117.0
4 30 30 30 30 6 8 10 9 9 0 40 10 21 10 21 24 18 9 8 9 8 27 18 0 212.0 9.636 173.0 7.864 44 8.75 52 62 160.0 111.0
In [97]:
df['ELECTRONIC CIRCUITS'] = (df['ELECTRONIC CIRCUITS'] / 30)
In [98]:
df['DIGITAL CIRCUITS'] = (df['DIGITAL CIRCUITS'] / 30)
In [99]:
df['ELECTRICAL CIRCUITS'] = (df['ELECTRICAL CIRCUITS'] / 30)
In [100]:
df['DATA STRUCTURES'] = (df['DATA STRUCTURES'] / 30)
In [101]:
df['ENGINEERING MATHEMATICS III'] = (df['ENGINEERING MATHEMATICS III'] / 40)
In [102]:
df['ELECTRONIC CIRCUIT LAB'] = (df['ELECTRONIC CIRCUIT LAB'] / 10)
In [103]:
df['DIGITAL CIRCUITS LAB'] = (df['DIGITAL CIRCUITS LAB'] / 10)
In [104]:
df['ELECTRICAL CIRCUIT LAB'] = (df['ELECTRICAL CIRCUIT LAB'] / 10)
In [105]:
df['DATA STRUCTURES LAB'] = (df['DATA STRUCTURES LAB'] / 10)
In [106]:
df['ELECTRONIC SKILL DEVELOPMENT'] = (df['ELECTRONIC SKILL DEVELOPMENT'] / 10)
In [107]:
df['ENGINEERING MATHEMATICS III.1'] = (df['ENGINEERING MATHEMATICS III.1'] / 10)
In [108]:
df['SIGNALS & SYSTEMS'] = (df['SIGNALS & SYSTEMS'] / 30)
In [109]:
df['SIGNALS & SYSTEMS.1'] = (df['SIGNALS & SYSTEMS.1'] / 10)
In [110]:
df['CONTROL SYSTEMS'] = (df['CONTROL SYSTEMS'] / 30)
In [111]:
df['PRINCIPLES OF COMMU. SYSTEMS'] = (df['PRINCIPLES OF COMMU. SYSTEMS'] / 30)
In [112]:
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
In [113]:
df['EMPLOYABILITY SKILL DEV.'] = (df['EMPLOYABILITY SKILL DEV.'] / 30)
In [114]:
df['SIGNALS & CONTROL SYSTEM LAB'] = (df['SIGNALS & CONTROL SYSTEM LAB'] / 10)
In [115]:
df['PRINCIPLE OF COMMU. SYS. LAB'] = (df['PRINCIPLE OF COMMU. SYS. LAB'] / 10)
In [116]:
df['OOPS LAB'] = (df['OOPS LAB'] / 10)
In [117]:
df['DATA ANALYTICS LAB'] = (df['DATA ANALYTICS LAB'] / 10)
In [118]:
df['PROJECT BASED LEARNING'] = (df['PROJECT BASED LEARNING'] / 20)
In [119]:
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
In [120]:
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
In [121]:
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
In [122]:
df['CGPA'] = (df['CGPA'] / 10)
In [123]:
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 60
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 160
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150

Normalised Dataset

In [124]:
pd.set_option('display.max_columns', None) #Normalised dataset
df
Out[124]:
ELECTRONIC CIRCUITS DIGITAL CIRCUITS ELECTRICAL CIRCUITS DATA STRUCTURES ELECTRONIC CIRCUIT LAB DIGITAL CIRCUITS LAB ELECTRICAL CIRCUIT LAB DATA STRUCTURES LAB ELECTRONIC SKILL DEVELOPMENT AUDIT COURSE ENGINEERING MATHEMATICS III ENGINEERING MATHEMATICS III.1 SIGNALS & SYSTEMS SIGNALS & SYSTEMS.1 CONTROL SYSTEMS PRINCIPLES OF COMMU. SYSTEMS OBJECT ORIENTED PROGRAMMING SIGNALS & CONTROL SYSTEM LAB PRINCIPLE OF COMMU. SYS. LAB OOPS LAB DATA ANALYTICS LAB EMPLOYABILITY SKILL DEV. PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA SEM_3_INTERNAL SEM_4_INTERNAL SEM_3_EXTERNAL SEM_4_EXTERNAL
0 0.9 1.0 1.0 1.0 0.4 0.9 0.9 0.9 0.8 0 1.0 0.7 0.7 1.0 0.6 0.8 0.6 0.8 0.9 0.9 1.0 0.8 0.9 0 203.0 0.9227 169.0 0.7682 1.0 0.845 0.766667 0.914286 0.98125 0.70
1 0.9 1.0 1.0 1.0 0.9 1.0 0.9 1.0 0.9 0 1.0 0.7 0.8 0.9 0.7 0.8 0.6 0.8 1.0 0.9 0.9 0.7 0.9 0 211.0 0.9591 171.0 0.7773 1.0 0.868 0.900000 0.900000 0.98125 0.72
2 0.9 1.0 1.0 1.0 0.6 1.0 0.9 0.7 0.9 0 1.0 0.9 0.6 0.9 0.4 0.7 0.4 0.8 0.9 0.9 0.9 0.8 0.8 0 207.0 0.9409 147.0 0.6682 1.0 0.805 0.833333 0.857143 0.98125 0.58
3 0.9 1.0 1.0 1.0 0.4 0.7 0.6 0.8 0.8 0 1.0 0.8 0.8 0.9 0.9 0.7 0.6 0.8 0.8 0.8 0.9 0.9 0.7 0 198.0 0.9000 173.0 0.7864 1.0 0.843 0.683333 0.800000 0.98125 0.78
4 1.0 1.0 1.0 1.0 0.6 0.8 1.0 0.9 0.9 0 1.0 1.0 0.7 1.0 0.7 0.8 0.6 0.9 0.8 0.9 0.8 0.9 0.9 0 212.0 0.9636 173.0 0.7864 1.0 0.875 0.866667 0.885714 1.00000 0.74
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
304 0.9 1.0 1.0 1.0 0.5 1.0 0.9 0.7 0.9 0 1.0 0.9 0.4 0.9 0.4 0.4 0.6 0.8 0.8 0.6 0.8 0.9 0.7 0 206.0 0.9364 134.0 0.6091 1.0 0.773 0.816667 0.757143 0.98125 0.54
305 0.8 1.0 1.0 1.0 0.9 0.8 0.9 0.7 0.9 0 0.7 0.9 0.7 0.9 0.5 0.6 0.5 0.8 0.8 0.8 0.8 0.8 0.8 0 193.0 0.8773 150.0 0.6818 1.0 0.780 0.850000 0.814286 0.88750 0.62
306 0.8 1.0 1.0 1.0 0.7 0.9 1.0 0.8 0.9 0 1.0 0.9 0.8 1.0 0.9 0.8 0.6 0.9 0.9 0.9 0.7 0.9 0.8 0 206.0 0.9364 180.0 0.8182 1.0 0.877 0.866667 0.857143 0.96250 0.80
307 0.9 1.0 1.0 1.0 0.4 0.9 1.0 0.9 0.9 0 1.0 0.9 0.8 0.9 0.8 0.7 0.6 0.8 0.6 0.9 0.9 0.8 0.9 0 207.0 0.9409 170.0 0.7727 1.0 0.857 0.833333 0.842857 0.98125 0.74
308 0.7 1.0 1.0 0.9 0.4 0.9 0.9 0.4 0.9 0 1.0 0.9 0.4 0.8 0.4 0.4 0.4 0.7 0.8 0.8 0.7 0.7 0.7 0 192.0 0.8727 121.0 0.5500 1.0 0.711 0.733333 0.742857 0.92500 0.46

309 rows × 34 columns

Saving Normalized Dataset

In [125]:
df.to_csv('ETC_Normalized.csv')
df_etc = df

Results of IT Department

Importing IT Department Dataset

In [126]:
df = pd.read_csv("IT_SE_2021.csv") #importing it dept dataset
In [127]:
df.head() #showing first 5 rows in the dataset
Out[127]:
DISCRETE MATHEMATICS DISCRETE MATHEMATICS.1 LOGIC DESIGN & COMP. ORG. DATA STRUCTURES & ALGO. OBJECT ORIENTED PROGRAMMING BASIC OF COMPUTER NETWORK LOGIC DESIGN COMP. ORG. LAB DATA STRUCTURES & ALGO. LAB OBJECT ORIENTED PROG. LAB SOFT SKILL LAB AUDIT COURSE ENGINEERING MATHEMATICS-III ENGINEERING MATHEMATICS-III.1 PROCESSOR ARCHITECTURE DATABASE MANAGEMENT SYSTEM COMPUTER GRAPHICS SOFTWARE ENGINEERING PROG. SKILL DEVELOPMENT LAB DATABASE MGMT. SYSTEM LAB COMPUTER GRAPHICS LAB PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
0 30 10 30 30 30 30 10 20 18 9 0 21 10 21 21 24 21 7 18 9 18 0 217.0 9.864 170.0 7.727 44 8.80
1 24 8 27 27 30 27 7 14 14 8 0 24 10 24 24 24 21 6 18 7 18 0 186.0 8.455 176.0 8.000 44 8.23
2 30 10 30 30 30 30 9 18 18 9 0 24 10 0 0 21 12 7 16 7 18 0 214.0 9.727 115.0 5.227 38 0.00
3 30 9 27 30 30 30 10 20 18 9 0 30 10 18 21 27 21 10 18 9 20 0 213.0 9.682 184.0 8.364 44 9.02
4 27 10 30 27 30 30 9 18 16 9 0 21 10 18 12 21 15 7 18 5 18 0 206.0 9.364 145.0 6.591 44 7.98
In [128]:
df.tail() #showing last 5 rows in the dataset
Out[128]:
DISCRETE MATHEMATICS DISCRETE MATHEMATICS.1 LOGIC DESIGN & COMP. ORG. DATA STRUCTURES & ALGO. OBJECT ORIENTED PROGRAMMING BASIC OF COMPUTER NETWORK LOGIC DESIGN COMP. ORG. LAB DATA STRUCTURES & ALGO. LAB OBJECT ORIENTED PROG. LAB SOFT SKILL LAB AUDIT COURSE ENGINEERING MATHEMATICS-III ENGINEERING MATHEMATICS-III.1 PROCESSOR ARCHITECTURE DATABASE MANAGEMENT SYSTEM COMPUTER GRAPHICS SOFTWARE ENGINEERING PROG. SKILL DEVELOPMENT LAB DATABASE MGMT. SYSTEM LAB COMPUTER GRAPHICS LAB PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA
241 30 10 30 30 30 30 9 18 18 9 0 24 10 18 24 21 18 8 18 9 18 0 214.0 9.727 168.0 7.636 44 8.68
242 30 10 30 30 30 30 9 20 20 10 0 15 10 21 21 21 15 8 20 9 20 0 219.0 9.955 160.0 7.273 44 8.61
243 30 10 30 30 30 30 10 20 20 9 0 30 10 21 18 24 21 10 20 10 20 0 219.0 9.955 184.0 8.364 44 9.16
244 30 10 30 30 30 30 9 20 16 9 0 24 10 18 24 24 15 7 18 9 18 0 214.0 9.727 167.0 7.591 44 8.66
245 30 10 27 30 30 30 10 18 16 9 0 24 10 21 18 21 15 7 18 8 18 0 210.0 9.545 160.0 7.273 44 8.41
In [129]:
df.shape #printing number of rows and columns
Out[129]:
(246, 28)
In [130]:
df.isnull().sum() #printing the number of null values in each column
Out[130]:
DISCRETE MATHEMATICS             0
DISCRETE MATHEMATICS.1           0
LOGIC DESIGN & COMP. ORG.        0
DATA STRUCTURES & ALGO.          0
OBJECT ORIENTED PROGRAMMING      0
BASIC OF COMPUTER NETWORK        0
LOGIC DESIGN COMP. ORG. LAB      0
DATA STRUCTURES & ALGO. LAB      0
OBJECT ORIENTED PROG. LAB        0
SOFT SKILL LAB                   0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS-III      0
ENGINEERING MATHEMATICS-III.1    0
PROCESSOR ARCHITECTURE           0
DATABASE MANAGEMENT SYSTEM       0
COMPUTER GRAPHICS                0
SOFTWARE ENGINEERING             0
PROG. SKILL DEVELOPMENT LAB      0
DATABASE MGMT. SYSTEM LAB        0
COMPUTER GRAPHICS LAB            0
PROJECT BASED LEARNING           0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64
In [131]:
print("Total missing values: ", df.isnull().sum().sum()) #printing the total number of null values
Total missing values:  0
In [132]:
df.info() #printing the details of dataset
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 246 entries, 0 to 245
Data columns (total 28 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   DISCRETE MATHEMATICS           246 non-null    int64  
 1   DISCRETE MATHEMATICS.1         246 non-null    int64  
 2   LOGIC DESIGN & COMP. ORG.      246 non-null    int64  
 3   DATA STRUCTURES & ALGO.        246 non-null    int64  
 4   OBJECT ORIENTED PROGRAMMING    246 non-null    int64  
 5   BASIC OF COMPUTER NETWORK      246 non-null    int64  
 6   LOGIC DESIGN COMP. ORG. LAB    246 non-null    int64  
 7   DATA STRUCTURES & ALGO. LAB    246 non-null    int64  
 8   OBJECT ORIENTED PROG. LAB      246 non-null    int64  
 9   SOFT SKILL LAB                 246 non-null    int64  
 10  AUDIT COURSE                   246 non-null    int64  
 11  ENGINEERING MATHEMATICS-III    246 non-null    int64  
 12  ENGINEERING MATHEMATICS-III.1  246 non-null    int64  
 13  PROCESSOR ARCHITECTURE         246 non-null    int64  
 14  DATABASE MANAGEMENT SYSTEM     246 non-null    int64  
 15  COMPUTER GRAPHICS              246 non-null    int64  
 16  SOFTWARE ENGINEERING           246 non-null    int64  
 17  PROG. SKILL DEVELOPMENT LAB    246 non-null    int64  
 18  DATABASE MGMT. SYSTEM LAB      246 non-null    int64  
 19  COMPUTER GRAPHICS LAB          246 non-null    int64  
 20  PROJECT BASED LEARNING         246 non-null    int64  
 21  AUDIT COURSE.1                 246 non-null    int64  
 22  SEM_3_GPA                      246 non-null    float64
 23  SEM_3_SGPA                     246 non-null    float64
 24  SEM_4_GPA                      246 non-null    float64
 25  SEM_4_SGPA                     246 non-null    float64
 26  TOTAL_CREDITS_EARNED           246 non-null    int64  
 27  CGPA                           246 non-null    float64
dtypes: float64(5), int64(23)
memory usage: 53.9 KB
In [133]:
df.dtypes #printing the data types of each column
Out[133]:
DISCRETE MATHEMATICS               int64
DISCRETE MATHEMATICS.1             int64
LOGIC DESIGN & COMP. ORG.          int64
DATA STRUCTURES & ALGO.            int64
OBJECT ORIENTED PROGRAMMING        int64
BASIC OF COMPUTER NETWORK          int64
LOGIC DESIGN COMP. ORG. LAB        int64
DATA STRUCTURES & ALGO. LAB        int64
OBJECT ORIENTED PROG. LAB          int64
SOFT SKILL LAB                     int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS-III        int64
ENGINEERING MATHEMATICS-III.1      int64
PROCESSOR ARCHITECTURE             int64
DATABASE MANAGEMENT SYSTEM         int64
COMPUTER GRAPHICS                  int64
SOFTWARE ENGINEERING               int64
PROG. SKILL DEVELOPMENT LAB        int64
DATABASE MGMT. SYSTEM LAB          int64
COMPUTER GRAPHICS LAB              int64
PROJECT BASED LEARNING             int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [134]:
#df['CGPA'] = df['CGPA'].astype(float)
df['CGPA'] = pd.to_numeric(df['CGPA'], errors='coerce') #converted CGPA from object Data type to int data type
In [135]:
df.dtypes #printing the data types of each column
Out[135]:
DISCRETE MATHEMATICS               int64
DISCRETE MATHEMATICS.1             int64
LOGIC DESIGN & COMP. ORG.          int64
DATA STRUCTURES & ALGO.            int64
OBJECT ORIENTED PROGRAMMING        int64
BASIC OF COMPUTER NETWORK          int64
LOGIC DESIGN COMP. ORG. LAB        int64
DATA STRUCTURES & ALGO. LAB        int64
OBJECT ORIENTED PROG. LAB          int64
SOFT SKILL LAB                     int64
AUDIT COURSE                       int64
ENGINEERING MATHEMATICS-III        int64
ENGINEERING MATHEMATICS-III.1      int64
PROCESSOR ARCHITECTURE             int64
DATABASE MANAGEMENT SYSTEM         int64
COMPUTER GRAPHICS                  int64
SOFTWARE ENGINEERING               int64
PROG. SKILL DEVELOPMENT LAB        int64
DATABASE MGMT. SYSTEM LAB          int64
COMPUTER GRAPHICS LAB              int64
PROJECT BASED LEARNING             int64
AUDIT COURSE.1                     int64
SEM_3_GPA                        float64
SEM_3_SGPA                       float64
SEM_4_GPA                        float64
SEM_4_SGPA                       float64
TOTAL_CREDITS_EARNED               int64
CGPA                             float64
dtype: object
In [136]:
df2 = df.filter(['SEM_3_SGPA', 'SEM_4_SGPA', 'CGPA']) #showing only the required columns
df2
Out[136]:
SEM_3_SGPA SEM_4_SGPA CGPA
0 9.864 7.727 8.80
1 8.455 8.000 8.23
2 9.727 5.227 0.00
3 9.682 8.364 9.02
4 9.364 6.591 7.98
... ... ... ...
241 9.727 7.636 8.68
242 9.955 7.273 8.61
243 9.955 8.364 9.16
244 9.727 7.591 8.66
245 9.545 7.273 8.41

246 rows × 3 columns

Replacing null(NaN) values by 0

In [137]:
df['LOGIC DESIGN & COMP. ORG.'] = df['LOGIC DESIGN & COMP. ORG.'].fillna(0) #replacing null values by 0
In [138]:
df['LOGIC DESIGN COMP. ORG. LAB'] = df['LOGIC DESIGN COMP. ORG. LAB'].fillna(0)
In [139]:
df['AUDIT COURSE'] = df['AUDIT COURSE'].fillna(0)
In [140]:
df['ENGINEERING MATHEMATICS-III'] = df['ENGINEERING MATHEMATICS-III'].fillna(0)
In [141]:
df['PROCESSOR ARCHITECTURE'] = df['PROCESSOR ARCHITECTURE'].fillna(0)
In [142]:
df['DATABASE MANAGEMENT SYSTEM'] = df['DATABASE MANAGEMENT SYSTEM'].fillna(0)
In [143]:
df['COMPUTER GRAPHICS'] = df['COMPUTER GRAPHICS'].fillna(0)
In [144]:
df['SOFTWARE ENGINEERING'] = df['SOFTWARE ENGINEERING'].fillna(0)
In [145]:
df['PROG. SKILL DEVELOPMENT LAB'] = df['PROG. SKILL DEVELOPMENT LAB'].fillna(0)
In [146]:
df['DATABASE MGMT. SYSTEM LAB'] = df['DATABASE MGMT. SYSTEM LAB'].fillna(0)
In [147]:
df['COMPUTER GRAPHICS LAB'] = df['COMPUTER GRAPHICS LAB'].fillna(0)
In [148]:
df['AUDIT COURSE.1'] = df['AUDIT COURSE.1'].fillna(0)
In [149]:
df['CGPA'] = df['CGPA'].fillna(0)
In [150]:
df.isnull().sum()  #after replacing null values, final dataset showing number of null values
Out[150]:
DISCRETE MATHEMATICS             0
DISCRETE MATHEMATICS.1           0
LOGIC DESIGN & COMP. ORG.        0
DATA STRUCTURES & ALGO.          0
OBJECT ORIENTED PROGRAMMING      0
BASIC OF COMPUTER NETWORK        0
LOGIC DESIGN COMP. ORG. LAB      0
DATA STRUCTURES & ALGO. LAB      0
OBJECT ORIENTED PROG. LAB        0
SOFT SKILL LAB                   0
AUDIT COURSE                     0
ENGINEERING MATHEMATICS-III      0
ENGINEERING MATHEMATICS-III.1    0
PROCESSOR ARCHITECTURE           0
DATABASE MANAGEMENT SYSTEM       0
COMPUTER GRAPHICS                0
SOFTWARE ENGINEERING             0
PROG. SKILL DEVELOPMENT LAB      0
DATABASE MGMT. SYSTEM LAB        0
COMPUTER GRAPHICS LAB            0
PROJECT BASED LEARNING           0
AUDIT COURSE.1                   0
SEM_3_GPA                        0
SEM_3_SGPA                       0
SEM_4_GPA                        0
SEM_4_SGPA                       0
TOTAL_CREDITS_EARNED             0
CGPA                             0
dtype: int64

Displaying Mean, Median and Standard Deviation of SEM 3,4 SGPA

In [151]:
mean_sgpa_sem3 = df['SEM_3_SGPA'].mean() #calculating the mean of SEM 3 SGPA
mean_sgpa_sem3 #Mean
Out[151]:
9.439211382113822
In [152]:
median_sgpa_sem3 = df['SEM_3_SGPA'].median() #calculating the median of SEM 3 SGPA
median_sgpa_sem3 #Median
Out[152]:
9.545
In [153]:
std_sgpa_sem3 = df['SEM_3_SGPA'].std() #calculating the standard deviation of SEM 3 SGPA
std_sgpa_sem3 #Standard Deviation
Out[153]:
0.4374280629283696
In [154]:
mean_sgpa_sem4 = df['SEM_4_SGPA'].mean() #calculating the mean of SEM 4 SGPA
mean_sgpa_sem4 #Mean
Out[154]:
7.00459349593496
In [155]:
median_sgpa_sem4 = df['SEM_4_SGPA'].median() #calculating the median of SEM 4 SGPA
median_sgpa_sem4 #Median
Out[155]:
7.273
In [156]:
std_sgpa_sem4 = df['SEM_4_SGPA'].std() #calculating the standard deviation of SEM 4 SGPA
std_sgpa_sem4 #Standard Deviation
Out[156]:
1.3395733334583166

Calculating Internal and External Marks in Dataset

In [157]:
df["SEM_3_INTERNAL"] = df[['DISCRETE MATHEMATICS.1','LOGIC DESIGN COMP. ORG. LAB','DATA STRUCTURES & ALGO. LAB','OBJECT ORIENTED PROG. LAB','AUDIT COURSE', 'SOFT SKILL LAB']].sum(axis = 1)
df["SEM_4_INTERNAL"] = df[['ENGINEERING MATHEMATICS-III.1', 'PROG. SKILL DEVELOPMENT LAB', 'DATABASE MGMT. SYSTEM LAB', 'COMPUTER GRAPHICS LAB', 'PROJECT BASED LEARNING', 'AUDIT COURSE.1']].sum(axis = 1)

df['SEM_3_EXTERNAL'] = df['SEM_3_GPA'] - df['SEM_3_INTERNAL']
df['SEM_4_EXTERNAL'] = df['SEM_4_GPA'] - df['SEM_4_INTERNAL']

Normalising Dataset

In [158]:
df['DISCRETE MATHEMATICS'] = (df['DISCRETE MATHEMATICS'] / 30)
In [159]:
df['DISCRETE MATHEMATICS.1'] = (df['DISCRETE MATHEMATICS.1'] / 10)
In [160]:
df['LOGIC DESIGN & COMP. ORG.'] = (df['LOGIC DESIGN & COMP. ORG.'] / 30)
In [161]:
df['DATA STRUCTURES & ALGO.'] = (df['DATA STRUCTURES & ALGO.'] / 30)
In [162]:
df['OBJECT ORIENTED PROGRAMMING'] = (df['OBJECT ORIENTED PROGRAMMING'] / 30)
In [163]:
df['BASIC OF COMPUTER NETWORK'] = (df['BASIC OF COMPUTER NETWORK'] / 30)
In [164]:
df['LOGIC DESIGN COMP. ORG. LAB'] = (df['LOGIC DESIGN COMP. ORG. LAB'] / 10)
In [165]:
df['DATA STRUCTURES & ALGO. LAB'] = (df['DATA STRUCTURES & ALGO. LAB'] / 20)
In [166]:
df['OBJECT ORIENTED PROG. LAB'] = (df['OBJECT ORIENTED PROG. LAB'] / 20)
In [167]:
df['SOFT SKILL LAB'] = (df['SOFT SKILL LAB'] / 10)
In [168]:
df['ENGINEERING MATHEMATICS-III'] = (df['ENGINEERING MATHEMATICS-III'] / 30)
In [169]:
df['ENGINEERING MATHEMATICS-III.1'] = (df['ENGINEERING MATHEMATICS-III.1'] / 10)
In [170]:
df['PROCESSOR ARCHITECTURE'] = (df['PROCESSOR ARCHITECTURE'] / 30)
In [171]:
df['DATABASE MANAGEMENT SYSTEM'] = (df['DATABASE MANAGEMENT SYSTEM'] / 30)
In [172]:
df['COMPUTER GRAPHICS'] = (df['COMPUTER GRAPHICS'] / 30)
In [173]:
df['SOFTWARE ENGINEERING'] = (df['SOFTWARE ENGINEERING'] / 30)
In [174]:
df['PROG. SKILL DEVELOPMENT LAB'] = (df['PROG. SKILL DEVELOPMENT LAB'] / 10)
In [175]:
df['DATABASE MGMT. SYSTEM LAB'] = (df['DATABASE MGMT. SYSTEM LAB'] / 20)
In [176]:
df['COMPUTER GRAPHICS LAB'] = (df['COMPUTER GRAPHICS LAB'] / 10)
In [177]:
df['PROJECT BASED LEARNING'] = (df['PROJECT BASED LEARNING'] / 20)
In [178]:
df['TOTAL_CREDITS_EARNED'] = (df['TOTAL_CREDITS_EARNED'] / 44)
In [179]:
df['SEM_3_SGPA'] = (df['SEM_3_SGPA'] / 10)
In [180]:
df['SEM_4_SGPA'] = (df['SEM_4_SGPA'] / 10)
In [181]:
df['CGPA'] = (df['CGPA'] / 10)
In [182]:
df['SEM_3_INTERNAL'] = df['SEM_3_INTERNAL'] / 70
df['SEM_4_INTERNAL'] = df['SEM_4_INTERNAL'] / 70
df['SEM_3_EXTERNAL'] = df['SEM_3_EXTERNAL'] / 150
df['SEM_4_EXTERNAL'] = df['SEM_4_EXTERNAL'] / 150

Normalised Dataset

In [183]:
pd.set_option('display.max_columns', None) #Normalised dataset
df
Out[183]:
DISCRETE MATHEMATICS DISCRETE MATHEMATICS.1 LOGIC DESIGN & COMP. ORG. DATA STRUCTURES & ALGO. OBJECT ORIENTED PROGRAMMING BASIC OF COMPUTER NETWORK LOGIC DESIGN COMP. ORG. LAB DATA STRUCTURES & ALGO. LAB OBJECT ORIENTED PROG. LAB SOFT SKILL LAB AUDIT COURSE ENGINEERING MATHEMATICS-III ENGINEERING MATHEMATICS-III.1 PROCESSOR ARCHITECTURE DATABASE MANAGEMENT SYSTEM COMPUTER GRAPHICS SOFTWARE ENGINEERING PROG. SKILL DEVELOPMENT LAB DATABASE MGMT. SYSTEM LAB COMPUTER GRAPHICS LAB PROJECT BASED LEARNING AUDIT COURSE.1 SEM_3_GPA SEM_3_SGPA SEM_4_GPA SEM_4_SGPA TOTAL_CREDITS_EARNED CGPA SEM_3_INTERNAL SEM_4_INTERNAL SEM_3_EXTERNAL SEM_4_EXTERNAL
0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.9 0.9 0 0.7 1.0 0.7 0.7 0.8 0.7 0.7 0.9 0.9 0.9 0 217.0 0.9864 170.0 0.7727 1.000000 0.880 0.957143 0.885714 1.00 0.72
1 0.8 0.8 0.9 0.9 1.0 0.9 0.7 0.7 0.7 0.8 0 0.8 1.0 0.8 0.8 0.8 0.7 0.6 0.9 0.7 0.9 0 186.0 0.8455 176.0 0.8000 1.000000 0.823 0.728571 0.842857 0.90 0.78
2 1.0 1.0 1.0 1.0 1.0 1.0 0.9 0.9 0.9 0.9 0 0.8 1.0 0.0 0.0 0.7 0.4 0.7 0.8 0.7 0.9 0 214.0 0.9727 115.0 0.5227 0.863636 0.000 0.914286 0.828571 1.00 0.38
3 1.0 0.9 0.9 1.0 1.0 1.0 1.0 1.0 0.9 0.9 0 1.0 1.0 0.6 0.7 0.9 0.7 1.0 0.9 0.9 1.0 0 213.0 0.9682 184.0 0.8364 1.000000 0.902 0.942857 0.957143 0.98 0.78
4 0.9 1.0 1.0 0.9 1.0 1.0 0.9 0.9 0.8 0.9 0 0.7 1.0 0.6 0.4 0.7 0.5 0.7 0.9 0.5 0.9 0 206.0 0.9364 145.0 0.6591 1.000000 0.798 0.885714 0.828571 0.96 0.58
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
241 1.0 1.0 1.0 1.0 1.0 1.0 0.9 0.9 0.9 0.9 0 0.8 1.0 0.6 0.8 0.7 0.6 0.8 0.9 0.9 0.9 0 214.0 0.9727 168.0 0.7636 1.000000 0.868 0.914286 0.900000 1.00 0.70
242 1.0 1.0 1.0 1.0 1.0 1.0 0.9 1.0 1.0 1.0 0 0.5 1.0 0.7 0.7 0.7 0.5 0.8 1.0 0.9 1.0 0 219.0 0.9955 160.0 0.7273 1.000000 0.861 0.985714 0.957143 1.00 0.62
243 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 1.0 0.9 0 1.0 1.0 0.7 0.6 0.8 0.7 1.0 1.0 1.0 1.0 0 219.0 0.9955 184.0 0.8364 1.000000 0.916 0.985714 1.000000 1.00 0.76
244 1.0 1.0 1.0 1.0 1.0 1.0 0.9 1.0 0.8 0.9 0 0.8 1.0 0.6 0.8 0.8 0.5 0.7 0.9 0.9 0.9 0 214.0 0.9727 167.0 0.7591 1.000000 0.866 0.914286 0.885714 1.00 0.70
245 1.0 1.0 0.9 1.0 1.0 1.0 1.0 0.9 0.8 0.9 0 0.8 1.0 0.7 0.6 0.7 0.5 0.7 0.9 0.8 0.9 0 210.0 0.9545 160.0 0.7273 1.000000 0.841 0.900000 0.871429 0.98 0.66

246 rows × 32 columns

Saving Normalized Dataset

In [184]:
df.to_csv('IT_Normalized.csv')
df_it = df

END of Preprocessing