%matplotlib inline
import pandas as pd

df = pd.read_csv('data/silicon_valley_summary.csv')

# 여기에 코드를 작성하세요
boolean_male = df['gender']=='Male'
boolean_manager = df['job_category'] == 'Managers'
boolean_not_all = df['race_ethnicity'] != 'All'

df[boolean_male & boolean_manager & boolean_not_all]
df[boolean_male & boolean_manager & boolean_not_all].plot(kind='bar', x='race_ethnicity',  y='count')

%matplotlib inline
import pandas as pd

df = pd.read_csv('data/gdp.csv', index_col=0)

# 여기에 코드를 작성하세요
nations = pd.Series(df.columns)
nations[nations.str.contains('Korea')]
df.plot(y=["Korea_Rep", "United_States", "United_Kingdom", "Germany", "China", "Japan"])

해설

  1. 같은 크기의 강의실이 필요한 과목에 대해 알파벳 순서대로 방 번호를 배정. 예를 들어 Auditorium이 필요한 과목으로 “arts”, “commerce”, “science” 세 과목이 있다면, “arts”는 “Auditorium-1”, “commerce”는 “Auditorium-2”, “science”는 “Auditorium-3” 순서로 방 배정
  2. “status”가 “not allowed”인 수강생은 “room assignment”를 그대로 “not assigned”
  3. “room assignment” column의 이름을 “room number”
import pandas as pd

df = pd.read_csv('data/enrolment_3.csv')

# 과목별 인원 가져오기
allowed = df["status"] == "allowed"
course_counts = df.loc[allowed, "course name"].value_counts()

 

# 각 강의실 규모에 해당되는 과목 리스트 만들기
auditorium_list = list(course_counts[course_counts >= 80].index)
large_room_list = list(course_counts[(80 > course_counts) & (course_counts >= 40)].index)
medium_room_list = list(course_counts[(40 > course_counts) & (course_counts >= 15)].index)
small_room_list = list(course_counts[(15 > course_counts) & (course_counts > 4)].index)

 

# 강의실 이름 붙이기
for i in range(len(auditorium_list)):
    df.loc[(df["course name"] == sorted(auditorium_list)[i]) & allowed, "room assignment"] = "Auditorium-" + str(i + 1)

for i in range(len(large_room_list)):
    df.loc[(df["course name"] == sorted(large_room_list)[i]) & allowed, "room assignment"] = "Large-" + str(i + 1)
    
for i in range(len(medium_room_list)):
    df.loc[(df["course name"] == sorted(medium_room_list)[i]) & allowed, "room assignment"] = "Medium-" + str(i + 1)
    
for i in range(len(small_room_list)):
    df.loc[(df["course name"] == sorted(small_room_list)[i]) & allowed, "room assignment"] = "Small-" + str(i + 1)

 

# column 이름 바꾸기
df.rename(columns={"room assignment": "room number"}, inplace = True)

 

모범 답안

import pandas as pd

df = pd.read_csv('data/enrolment_3.csv')

# 과목별 인원 가져오기
allowed = df["status"] == "allowed"
course_counts = df.loc[allowed, "course name"].value_counts()

# 각 강의실 규모에 해당되는 과목 리스트 만들기
auditorium_list = list(course_counts[course_counts >= 80].index)
large_room_list = list(course_counts[(80 > course_counts) & (course_counts >= 40)].index)
medium_room_list = list(course_counts[(40 > course_counts) & (course_counts >= 15)].index)
small_room_list = list(course_counts[(15 > course_counts) & (course_counts > 4)].index)

# 강의실 이름 붙이기
for i in range(len(auditorium_list)):
    df.loc[(df["course name"] == sorted(auditorium_list)[i]) & allowed, "room assignment"] = "Auditorium-" + str(i + 1)

for i in range(len(large_room_list)):
    df.loc[(df["course name"] == sorted(large_room_list)[i]) & allowed, "room assignment"] = "Large-" + str(i + 1)
    
for i in range(len(medium_room_list)):
    df.loc[(df["course name"] == sorted(medium_room_list)[i]) & allowed, "room assignment"] = "Medium-" + str(i + 1)
    
for i in range(len(small_room_list)):
    df.loc[(df["course name"] == sorted(small_room_list)[i]) & allowed, "room assignment"] = "Small-" + str(i + 1)

# column 이름 바꾸기
df.rename(columns={"room assignment": "room number"}, inplace = True)
    
# 테스트 코드
df

import pandas as pd

df = pd.read_csv('data/enrolment_2.csv')

# 여기에 코드를 작성하세요
df['room assignment']='Auditorium'
allowed = df['status']=='allowed'
course_counts=df.loc[allowed, 'course name'].value_counts()
course_counts
big=list(course_counts[course_counts>=80].index)
for course in big:
    df.loc[df['course name']==course,'room assignment']='Auditorium'
large=list(course_counts[(course_counts>=40)&(course_counts<80)].index)
for course in large:
    df.loc[df['course name']==course,'room assignment']='Large room'
mid=list(course_counts[(course_counts>=15)&(course_counts<40)].index)
for course in mid:
    df.loc[df['course name']==course,'room assignment']='Medium room'
small=list(course_counts[(course_counts>=5)&(course_counts<15)].index)
for course in small:
    df.loc[df['course name']==course,'room assignment']='Small room'
not_allowed = df['status']=='not allowed'
df.loc[not_allowed,'room assignment']='not assigned'
# # 테스트 코드
df

해설

수강 인원에 따라 총 다섯 개의 조건

  1. 80명 이상의 학생이 수강하는 과목은 “Auditorium”
  2. 40명 이상, 80명 미만의 학생이 수강하는 과목은 “Large room”
  3. 15명 이상, 40명 미만의 학생이 수강하는 과목은 “Medium room”
  4. 5명 이상, 15명 미만의 학생이 수강하는 과목은 “Small room”
  5. 폐강 등의 이유로 status가 “not allowed”인 수강생은 room assignment 또한 “not assigned”

status가 "allowed"인 course들에 대해서 수강 인원

import pandas as pd

df = pd.read_csv('data/enrolment_2.csv')

# 과목별 인원 가져오기
allowed = df["status"] == "allowed"
course_counts = df.loc[allowed, "course name"].value_counts()
course_counts
arts                                         158
science                                      124
commerce                                     101
english                                       56
                                            ... 
mca                                            5
interior design                                5
building construction and mangement            5
nanotechnology                                 5
Name: course name, Length: 82, dtype: int64

 

# 각 강의실 규모에 해당되는 과목 리스트 만들기
auditorium_list = list(course_counts[course_counts >= 80].index)
large_room_list = list(course_counts[(80 > course_counts) & (course_counts >= 40)].index)
medium_room_list = list(course_counts[(40 > course_counts) & (course_counts >= 15)].index)
small_room_list = list(course_counts[(15 > course_counts) & (course_counts > 4)].index)

 

# not allowed 과목에 대해 값 지정해주기
not_allowed = df["status"] == "not allowed"
df.loc[not_allowed, "room assignment"] = "not assigned"

# allowed 과목에 대해 값 지정해주기
for course in auditorium_list:
    df.loc[(df["course name"] == course) & allowed, "room assignment"] = "Auditorium"

for course in large_room_list:
    df.loc[(df["course name"] == course) & allowed, "room assignment"] = "Large room"
    
for course in medium_room_list:
    df.loc[(df["course name"] == course) & allowed, "room assignment"] = "Medium room"
    
for course in small_room_list:
    df.loc[(df["course name"] == course) & allowed, "room assignment"] = "Small room"

 

모범 답안

import pandas as pd

df = pd.read_csv('data/enrolment_1.csv')
df["status"] = "allowed"

# 조건 1
boolean1 = df["course name"] == "information technology"
boolean2 = df["year"] == 1
df.loc[boolean1 & boolean2, "status"] = "not allowed"

# 조건 2
boolean3= df["course name"] == "commerce"
boolean4= df["year"] == 4
df.loc[boolean3& boolean4, "status"] = "not allowed"

# 조건 3
allowed = df["status"] == "allowed"
course_counts = df.loc[allowed, "course name"].value_counts()
closed_courses = list(course_counts[course_counts < 5].index)
for course in closed_courses:
    df.loc[df["course name"] == course, "status"] = "not allowed"

# 테스트 코드
df

 

퀴즈 해설

 
df['City / Urban area'].value_counts()
df['City / Urban area'].value_counts().shape
(249,)
df['Country'].value_counts().shape
(61,)

퀴즈 해설

 

먼저 인구 밀도를 계산

df["Density"] = df["Population"] / df["Land area (in sqKm)"]

계산한 인구 밀도에 불린 연산을 적용하여 인덱싱

df_high_density = df[df["Density"] > 10000]

info() 메소드를 적용

df_high_density.info()
<class 'pandas.core.frame.DataFrame'>
Int64Index: 19 entries, 32 to 129
Data columns (total 5 columns):
City / Urban area      19 non-null object
Country                19 non-null object
Population             19 non-null int64
Land area (in sqKm)    19 non-null int64
Density                19 non-null float64
dtypes: float64(1), int64(2), object(2)
memory usage: 912.0+ bytes

퀴즈 해설

 

df["Density"] = df["Population"] / df["Land area (in sqKm)"]

 

density_ranks = df.sort_values(by="Density", ascending = False)

 

density_ranks['City / Urban area']
75                      Mumbai
74                     Kolkata
101                    Karachi
                ...           
57                         Pau
220                    Hickory
196            Barnstable Town
Name: City / Urban area, Length: 249, dtype: object

 

퀴즈 해설

 

먼저, 각 도시의 국가 정보를 가져옵니다.

import pandas as pd

world_cities = pd.read_csv("data/world_cities.csv")

world_cities['Country']
0       Argentina
1       Australia
2       Australia
3       Australia
4       Australia
5       Australia
6       Australia
7         Austria
8      Azerbaijan
9         Belgium
10        Belgium
          ...    
220           USA
221           USA
countries = world_cities['Country'].value_counts()
USA             106
France           15
Brazil           10
Canada            9
               ... 
Vietnam           1
Sudan             1
countries[countries == 4]
Italy    4
Name: Country, dtype: int64

 

+ Recent posts