1. # Create a pandas series from a dictionary of values and an ndarray.

# Create a panda’s series from a dictionary of values and a ndarray

import pandas as pd
import numpy as np
s=pd.Series(np.array([1,3,4,7,8,8,9]))
print(s)

#  create a dictionary
dictionary = {'X' : 10, 'Y' : 20, 'Z' : 30} #  create a series
series = pd.Series(dictionary)
print(series)

# 2. Write a Pandas program to perform arithmetic operations on two Pandas Series.

# Write a Pandas program to perform arithmetic operations on two Pandas Series.
import pandas as pd
ds1 = pd.Series([3, 6, 9, 12, 15])
ds2 = pd.Series([2, 4, 6, 8, 10])
ds = ds1 + ds2
print(ds)
print("Subtract two Series:")
ds = ds1 - ds2
print(ds)
print("Multiply two Series:")
ds = ds1 * ds2
print(ds)
print("Divide Series1 by Series2:")
ds = ds1 / ds2
print(ds)

# 3. Write a Pandas program to add some data to an existing Series.

# Write a Pandas program to add some data to an existing Series.

import pandas as pd
s = pd.Series(['S101', 'Amjad', 'C.Sc.', 'XII – A1', '450'])
print("Original Data Series:")
print(s)
print("\nData Series after adding some data:")
new_s = s.append(pd.Series(['90.0', 'PASS']))
print(new_s)

# 4. Write a Pandas program to select the rows where the percentage greater than 70.

# Write a Pandas program to select the rows where the percentage greater than 70.
import pandas as pd
import numpy as np

exam_data  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']

df = pd.DataFrame(exam_data , index=labels)
print("Number of student whoes percentage more than 70:")
print(df[df['perc'] > 70])

# 5. Write a Pandas program to select the rows the percentage is between 70 and 90 (inclusive)

# Write a Pandas program to select the rows the percentage is between 70 and 90 (inclusive)
import pandas as pd
import numpy as np

exam_data  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']

df = pd.DataFrame(exam_data , index=labels)
print("Number of student whoes percentage more than 70:")
print(df[df['perc'].between(70,90)])

# 6. Write a Pandas program to change the percentage in a given row by the user.

# Write a Pandas program to change the percentage in given row by user.
import pandas as pd
import numpy as np

exam_dic  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}
labels = ['A', 'B', 'C', 'B', 'E', 'F', 'G', 'H', 'I', 'J']

df = pd.DataFrame(exam_dic , index=labels)
print("\nOriginal data frame:")
print(df)
ch = input("Enter the index of row : ")
per = float(input("Enter percentage to be changed: "))
print('\nChange the percentage in row '+ch+ ' to',per)
df.loc[ch, 'perc'] = per
print(df)

# 7. Write a Pandas program to join the two given dataframes along rows and assign all data.

# Write a Pandas program to join the two given dataframes along rows and assign all data.
import pandas as pd
import numpy as np

exam_dic1  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}

exam_data1 = pd.DataFrame(exam_dic1)

exam_dic2  = {'name': ['Parveen', 'Ahil', 'Ashaz', 'Shifin', 'Hanash'],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}

exam_data2 = pd.DataFrame(exam_dic2)

print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2])
print(result_data)

# 8. Write a Pandas program to join the two given dataframes along columns and assign all data.

# Write a Pandas program to join the two given dataframes along columns and assign all data.​

import pandas as pd
import numpy as np

exam_dic1  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}

exam_data1 = pd.DataFrame(exam_dic1)

exam_dic2  = {'name': ['Parveen', 'Ahil', 'Ashaz', 'Shifin', 'Hanash'],
'perc': [89.5, 92, 90.5, 91.5, 90],
'qualify': ['yes', 'yes', 'yes', 'yes', 'yes']}

exam_data2 = pd.DataFrame(exam_dic2)

print("Original DataFrames:")
print(exam_data1)
print("-------------------------------------")
print(exam_data2)
print("\nJoin the said two dataframes along rows:")
result_data = pd.concat([exam_data1, exam_data2],axis=1)
print(result_data)

# 9. Write a Pandas program to append a list of dictionaries or series to an existing DataFrame and display the combined data.

# Write a Pandas program to append a list of dictioneries or series to a existing # DataFrame and display the combined data.
import pandas as pd
import numpy as np

exam_dic1  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}

exam_data1 = pd.DataFrame(exam_dic1)

s = pd.Series(['Sukhvir', 54,'yes'], index=['name', 'perc','qualify'])

dicts = [{'name': 'Krish', 'perc': 45,'qualify':'yes'},
{'name': 'Kumar', 'perc': 67,'qualify':'yes'}]

print("Original DataFrames:")
print(exam_data1)
print("\nDictionary:")
print(s)
combined_data =  exam_data1.append(s, ignore_index=True, sort=False)
combined_info =  combined_data.append(dicts, ignore_index=True, sort=False)
print("\nCombined Data:")
# Print Combined Data/info
print(combined_info)

# 10. Program to select or filter rows from a DataFrame based on values in columns in pandas.( Use of Relational and Logical Operators)

# Program to select or filter rows from a DataFrame based on values in columns in pandas.( Use of Relational and Logical Operators)
import pandas as pd
import numpy as np

exam_dic1  = {'name': ['Aman', 'Kamal', 'Amjad', 'Rohan', 'Amit', 'Sumit', 'Matthew', 'Kartik', 'Kavita', 'Pooja'],
'perc': [79.5, 29, 90.5, np.nan, 32, 65, 56, np.nan, 29, 89],
'qualify': ['yes', 'no', 'yes', 'no', 'no', 'yes', 'yes', 'no', 'no', 'yes']}

exam_data1 = pd.DataFrame(exam_dic1)

print("Original DataFrames:")
print(exam_data1)
print("\nUse == operator\n")
print(exam_data1.loc[exam_data1['name'] == 'Rohan'])

print("\nUse < operator\n")
print(exam_data1.loc[exam_data1['perc'] < 40])

print("\n Use != operator\n")
print(exam_data1.loc[exam_data1['qualify'] != 'no'])

print("\n Multiple Conditions\n")
print(exam_data1.loc[(exam_data1['qualify'] != 'yes') & (exam_data1['perc'] <40)])

# 11. Filter out rows based on different criteria such as duplicate rows

# Filter out rows based on different criteria such as duplicate rows

import pandas as pd
data={'Name':['Aman','Rohit','Deepika','Aman','Deepika','Sohit','Geeta'],
'Sales':[8500,4500,9200,8500,9200,9600,8400]}
sales=pd.DataFrame(data)
# Find duplicate rows
duplicated = sales[sales.duplicated(keep=False)]
print("duplicate Row:\n",duplicated)

# 12. Importing and exporting data between pandas and CSV file. # To create and open a data frame using ‘Student_result.csv’ file using Pandas. # To display row labels, column labels data types of each  column and the dimensions # To display the shape (number of rows and columns) of the CSV file.

# Importing and exporting data between pandas and CSV file.
# To create and open a data frame using ‘Student_result.csv’ file using Pandas.
# To display row labels, column labels data types of each  column and the dimensions
# To display the shape (number of rows and columns) of the CSV file.

import pandas as pd
import csv

# Display Name of Columns
print(df.columns)

# Display no of rows and column
print(df.shape)

# Display Column Names and their types
print(df.info())

# 13. Read the ‘Student_result.csv’ to create a data frame and do the following  operation: # To display Adm_No, Gender and Percentage from ‘student_result.csv’ file. # To display the first 5 and last 5 records from ‘student_result.csv’ file.

# Read the ‘Student_result.csv’ to create a data frame and do the following  operation:
# To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.
# To display the first 5 and last 5 records from ‘student_result.csv’ file.

import pandas as pd
import csv

#To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.

print("To display Adm_No, Gender and Percentage from ‘student_result.csv’ file.")
print(df)

#To display first 5 and last 5 records from ‘student_result.csv’ file.
print(df1.tail())

# 14. Read the ‘Student_result.csv’ to create a data frame and do the following  operation: # To display Student_result file with new column names. # To modify the Percentage of student below 40 with NaN value in dataframe.

# Read the ‘Student_result.csv’ to create a data frame and do the following  operation:
# To display Student_result file with new column names.
# To modify the Percentage of student below 40 with NaN value in dataframe.

import pandas as pd
import numpy as np
import csv

print(df)

#To display Student_result file with new column names.
df1 = pd.read_csv("student_result.csv",skiprows = 1,

'Maths','Sc.','SSt','San','IT','Perc'])

print("To display Student_result file with new column names")
print(df1)

# To modify the Percentage of student below 40 with NaN value.
print(df2)

print("To modify the Percentage of student below 40 with NaN value.")
df2.loc[(df2['PERCENTAGE'] <40, 'PERCENTAGE')] = np.nan
print(df2)

# 15. Read the ‘Student_result.csv’ to create a data frame and do the following  operation: # To create a duplicate file for ‘student_result.csv’ containing Adm_No, Name and Percentage. # Write the statement in Pandas to find the highest percentage and also print the student’s name and percentage.

# Read the ‘Student_result.csv’ to create a data frame and do the following  operation:
# To create a duplicate file for ‘student_result.csv’ containing Adm_No, Name and Percentage.
# Write the statement in Pandas to find the highest percentage and also print the student’s name and percentage.

import pandas as pd
import numpy as np
import csv

# To create a duplicate file for ‘student_result.csv’ containing Adm_No, Name and Percentage.
# Display Copied Dataframe
print(df2)

# find the highest percentage and also print the student’s name and percentage.
df1 = df1[["STUDENT'S_NAME",'PERCENTAGE']]
[df1.PERCENTAGE== df1['PERCENTAGE'].max()]
print(df1)

# 16. Importing and exporting data between pandas and MySQL database

# Importing and exporting data between pandas and MySQL database

import pymysql
import pandas as pd
import mysql.connector
from sqlalchemy import types, create_engine

# Create dataframe
dic={
'EMPNO':[7369,7499,7566,7654,7698,7782,7788,7839,7844,7900,7902,7934],

'BLAKE','MARTIN','TURNER'],
'JOB':['CLERK','CLERK','ANALYST','MANAGER','MANAGER','PRESIDENT','ANALYST',

'CLERK','MANAGER','ANALYST','SALESMAN','CLERK'],
'MGR':[7876,7876,7782,7900,7900 ,7900,7782,7876,7900,7782,7900,7876],
'HIREDATE':['2005/02/18','2005/01/04','2001/05/18','2003/04/19','2001/07/02',
'2006/09/21','2007/03/13','2005/03/06', '2007/01/12','2009/07/19','2009/01/05',

'2004/11/30'],
'SAL':[11400,19200,29400,60000,15000,95700,13200,36000,36000,34200,15000,18000],
'COMM':[4000,5000,5000,4000,2500,4000,2500,3000 ,3000,2500,2000 ,6000],
'DEPTT':[20,30,20,30,30,10,20,10,30,30,20,10]
}

data = pd.DataFrame(dic)
print('Our DataFrame is:\n',data)

tableName="employeedata"

# create sqlalchemy engine
sqlEngine = create_engine("mysql+pymysql://root:@localhost/Company")
dbConnection = sqlEngine.connect()

try:
# Exporting dataframe to SQl
frame = data.to_sql(tableName, dbConnection, if_exists='fail');

except ValueError as vx:

print(vx)

except Exception as ex:

print(ex)

else:

print("Table %s created successfully.\n"%tableName);

finally:

dbConnection.close()

# – Read a MySQL Database Table and write into a Pandas DataFrame:

sqlEngine   = create_engine('mysql+pymysql://root:@127.0.0.1')

dbConnection= sqlEngine.connect()

dframe       = pd.read_sql("select * from Company.employeedata", dbConnection);

print("After importing data from MySql:\n")
print(dframe)

dbConnection.close()

# 17. Find the sum of each column, or find the column with the lowest mean

# Find the sum of each column, or find the column with the lowest mean
import pandas as pd
Pass_Perc ={'Phy': {'2017':95.4,'2018':96.4,'2019':99.2,'2020':97.4},
'Che': {'2017':96.5,'2018':97.4,'2019':100,'2020':99.2},
'Maths': {'2017':90.2,'2018':92.6,'2019':97.4,'2020':98.0},
'Eng': {'2017':99.2,'2018':100,'2019':100,'2020':100},
'IP': {'2017':95.6,'2018':100,'2019':100,'2020':100}}

df=pd.DataFrame(Pass_Perc)
print(df)
print()
print('Column wise sum in datframe is :')
print(df.sum(axis=0))
#  Print mean vaLue of each coLumn
print()
print('Column wise mean value are:')
print(df.mean(axis=0).round(1))
#  Returns CoLumn with minimum mean vaLue
print()
print('Column with minimum mean value is:')
print(df.mean(axis=0).idxmin())

# 18. Locate the 3 largest values in a data frame.

# Locate the 3 largest values in a data frame.
import pandas as pd
'Sales':[8500,4500,9300,8600,9200,9600,8400]}
sales=pd.DataFrame(data)
# Find  3 Largest Value for MarksinlP Column
print(sales.nlargest(3,['Sales']))

# 19. Subtract the mean of a row from each element of the row in a Data Frame

# Subtract the mean of a row from each element of the row in a Data Frame
import pandas as pd
Pass_Perc ={'Phy': {'2017':95.4,'2018':96.4,'2019':99.2,'2020':97.4},
'Che': {'2017':96.5,'2018':97.4,'2019':100,'2020':99.2},
'Maths': {'2017':90.2,'2018':92.6,'2019':97.4,'2020':98.0},
'Eng': {'2017':99.2,'2018':100,'2019':100,'2020':100},
'IP': {'2017':95.6,'2018':100,'2019':100,'2020':100}}

df=pd.DataFrame(Pass_Perc)
print(df)
print()

print('Mean of each row is:')
print(df.mean(axis=1))
print()
print('Datafranie after Subtracting mean value of\
each row from each element of that Row is:')
print(df.sub(df.mean(axis=1), axis=0))

# 20. Replace all negative values in a data frame with a 0.

# Replace all negative values in a data frame with a 0.

import pandas as pd

data = {'sales1':[10,20,-4,5,-1,15],
'sales2':[20,15,10,-1,12,-2]}

df = pd.DataFrame(data)

print("Data Frame")
print(df)

print('Display DataFrame after replacing every negative value with 0')

df[df<0]=0
print(df)

# 21. Replace all missing values in a data frame with a 999

# Replace all missing values in a data frame with a 999
import pandas as pd
import numpy as np
Srec={'sid':[101,102,103,104,np.nan,106,107,108,109,110],
'smarks':[98,67,np.nan,56,38,98,67,np.nan,56,np.nan],
'remark':['P','P','P','F',np.nan,'P','P','F','P','P'],
'mobile':[9990009991,9990009992,9990009993,np.nan,9990009995,np.nan,
9990009997,

9990009998, np.nan,9999010000]}
# Convert the dictionary into DataFrame
df=pd.DataFrame(Srec)
print("\n- Dataframe Before Replacing NaN with 999-\n")
print(df)

#Replace missing value with zeros
print("\n-After Replacing missing value with 999-\n")
df=df.fillna(999)
print(df)

# 22. Given a Series, print all the elements that are above the 75th percentile.

# Given a Series, print all the elements that are above the 75th percentile.

import pandas as pd
import numpy as np
s=pd.Series(np.array([2,4,5,10,18,20,25]))
print(s)
res=s.quantile(q=0.75)
print()
print('75th Percentile of the series is::')
print(res)
print()
print('The elements that above the 75th percentile:')
print(s[s>res])

# 23. Create a Data Frame quarterly sales where each row contains the item category, item name, and expenditure. Group the rows by the category and print the total expenditure per category.

# Create a Data Frame quarterly sales where each row contains the item category,
#item name, and expenditure. Group the rows by the category and print the total
#expenditure per category.

import pandas as pd

# initialize list of lists
data = [['CAR','Maruti',1000000],['AC','Hitachi',55000],['AIRCOLLER','Bajaj',12000],
['WASHING MACHINE','LG',15000],['CAR','Ford',7000000],['AC','SAMSUNG',45000],['AIRCOLLER','Symphony',20000],['WASHING MACHINE','Wirlpool',25000]]

Col=['itemcat','itemname','expenditure']
# Create the pandas DataFrame

qrtsales = pd.DataFrame(data,columns=Col)

# print dataframe.
print (qrtsales)

qs=qrtsales.groupby('itemcat')
print('Result after Filtering Dataframe')
print(qs['itemcat','expenditure'].sum())

# 24. Create a data frame based on e-commerce data and generate descriptive statistics (mean, median, mode, quartile, and variance)

# Create a data frame based on ecommerce data and generate descriptive statistics # (mean, median,mode, quartile, and variance)

import pandas as pd
sales = {'InvoiceNo': [1001,1002,1903,1004,1085,1006,1007],
'ProductName': ['LCD','AC','Deodrant','leans','Books','Shoes','Jacket'],
'Quantity': [2,1,2,1,2,1,1],
'Price':[65000,55000,500,3000,958,3000,2200]}
df=pd.DataFrame(sales)
print(df)
print("Mean price of Item:", df['Price']. mean ().round (2))
print("Median price of Item:", df['Price']. median ().round (2))
print("Mode of price:\n", df[['Price']]. mode ())
print("Quartile of price:\n",df[['Price']].quantile([.1,.25,.5,.75],axis=0))
print("Variance of Price:\n",df[['Price']].var())

# 25. Given the school result data, analyses the performance of the students on different parameters, e.g subject wise or class wise.

# Given the school result data, analyses the performance of the students on #different parameters, e.g subject wise  or class wise.
# x-axis is shows the subject and y -axis
# shows the markers in each subject

# import pandas and matplotlib
import pandas as pd
import matplotlib.pyplot as plt

# Simple Line Chart with setting of Label of X and Y axis,
# title for chart line and color of line
subject = ['Physic','Chemistry','Mathematics', 'Biology','Computer']
marks =[80,75,70,78,82]
# To draw line in red colour

plt.plot(subject,marks,'r',marker ='*')
# To Write Title of the Line Chart

plt.title('Marks Scored')
# To Put Label At Y Axis

plt.xlabel('SUBJECT')
# To Put Label At X Axis

plt.ylabel('MARKS')
plt.show()

# 26. Write a program to plot a bar chart in python to display the result of a school for five consecutive years.

#Write a program to plot a bar chart in python to display the result of a school for five consecutive years.

import matplotlib.pyplot as pl

year=['2015','2016','2017','2018','2019'] # list of years
p=[98.50,70.25,55.20,90.5,61.50] #list of pass percentage
j=['b','g','r','m','c'] # color code of bar charts
pl.bar(year, p, width=0.2, color=j) # bar( ) function to create the bar chart
pl.xlabel("year") # label for x-axis
pl.ylabel("Pass%") # label for y-axis
pl.show( ) # function to display bar chart

# #• Number of Students against Scores in all the 7 subjects #• Show the Highest score of each subject

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv

#Number of Students against Scores in all the 7 subjects

plt.hist([df['ENG'],df['HINDI'],df['MATHS'],df['SCIENCE'],df['SSC'],df['SANSK'],df['CA']],color=['red', 'yellow', 'blue','green','orange','black','pink'])
plt.title('Number of Students against Scores')
plt.xlabel('Score')
plt.ylabel('Number of Students')
plt.legend(['English', 'Hindi', 'Maths','Science','S.Sc.','Sanskrit','CA'])
plt.show()

# Show the Highest score of each subject.
y = ['ENGG','HINNDI','MATHS','SCIENCE','SSC','SANSK','CA']
width = [df['ENG'].max(),df['HINDI'].max(),df['MATHS'].max(),df['SCIENCE'].max(),df['SSC'].max(),df['SANSK'].max(),df['CA'].max()]

plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('Average Scores')
plt.xlabel('Average Score')
plt.ylabel('Subjects')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center', fontweight='bold')
plt.show()

# 28. For the Data frames created above, analyze, and plot appropriate charts with title and legend. • Show the Average score of each subject

# For the Data frames created above, analyze, and plot appropriate charts with title and legend.
# • Show the Average score of each subject

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv

# Show the Average score of each subject
y = ['ENGG','HINNDI','MATHS','SCIENCE','SSC','SANSK','CA']
width = [df['ENG'].mean(),df['HINDI'].mean(),df['MATHS'].mean(),df['SCIENCE'].mean(),
df['SSC'].mean(),df['SANSK'].mean(),df['CA'].mean()]

plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('Average Scores')
plt.xlabel('Average Score')
plt.ylabel('Subjects')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center', fontweight='bold')
plt.show()

# 29. For the Data frames created above, analyze, and plot appropriate charts with title and legend. • Number of Females and Males • Average Percentage of Females and Males

# For the Data frames created above, analyze, and plot appropriate charts
# with title and legend.
# • Number of Females and Males
# • Average Percentage of Females and Males

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import csv

# Analyzing Scores based on Gender

df_gender = df.groupby('GENDER')

#Number of Females and Males
y = df_gender['GENDER'].count().keys()
width = df_gender['GENDER'].count()
plt.figure(figsize = (12,2))
plt.barh(y = y, width = width)
plt.title('No. of Females and Males')
plt.xlabel('Count')
plt.ylabel('Gender')
for i,v in enumerate(width):
plt.text(v, i, " "+str(v), color='blue', va='center', fontweight='bold')
plt.show()

#Average Percentage of Females and Males
y = df_gender['PERCENTAGE'].mean().keys()
width = df_gender['PERCENTAGE'].mean()
plt.figure(figsize = (12,2))
plt.barh(y = y,
width = width)
plt.title('Av Percentage of Female and Males')
plt.xlabel('Av. total Percentage ')
plt.ylabel('Gender')
for i,v in enumerate(width):
plt.text(v, i, " "+str(round(v,2)), color='blue', va='center', fontweight='bold')
plt.show()