Pandas: Counting and Summarizing

Pandas: Counting and Summarizing#

這個章節將更系統化地用實際案例來介紹，

Reading files: pd.read_csv('data.csv')或讀取JSON pd.read_json('data.json')
Filtering data: Slicing data df[0:10]、Selecting data df['col1']、Filtering data df[df['col1'] > 0]
Mutating a new variable(on column), Observing a variable, Changing data type
Summarizing data (group by columns): df.groupby('col1').mean()
Arranging (Sorting) data: df.sort_values(by = 'col1')
Concatenating data: pd.concat([df1, df2])

P1. Read data#

1.1 Read CSV: 違規藥品廣告#

違規藥品廣告資料集 https://data.nat.gov.tw/dataset/14196

import pandas as pd
drug_df = pd.read_csv('https://raw.githubusercontent.com/p4css/py4css/main/data/drug_156_2.csv')
# drug_df
drug_df.head()

/Users/jirlong/opt/anaconda3/lib/python3.9/site-packages/pandas/core/computation/expressions.py:21: UserWarning: Pandas requires version '2.8.4' or newer of 'numexpr' (version '2.8.1' currently installed).
  from pandas.core.computation.check import NUMEXPR_INSTALLED
/Users/jirlong/opt/anaconda3/lib/python3.9/site-packages/pandas/core/arrays/masked.py:60: UserWarning: Pandas requires version '1.3.6' or newer of 'bottleneck' (version '1.3.4' currently installed).
  from pandas.core import (

---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Input In [1], in <cell line: 2>()
import pandas as pd
----> 2 drug_df = pd.read_csv('https://raw.githubusercontent.com/p4css/py4css/main/data/drug_156_2.csv')
# drug_df
drug_df.head()

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1026, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
kwds_defaults = _refine_defaults_read(
   dialect,
   delimiter,
   (...)
   dtype_backend=dtype_backend,
)
kwds.update(kwds_defaults)
-> 1026 return _read(filepath_or_buffer, kwds)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:620, in _read(filepath_or_buffer, kwds)
_validate_names(kwds.get("names", None))
# Create the parser.
--> 620 parser = TextFileReader(filepath_or_buffer, **kwds)
if chunksize or iterator:
   return parser

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1620, in TextFileReader.__init__(self, f, engine, **kwds)
   self.options["has_index_names"] = kwds["has_index_names"]
self.handles: IOHandles | None = None
-> 1620 self._engine = self._make_engine(f, self.engine)

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/parsers/readers.py:1880, in TextFileReader._make_engine(self, f, engine)
   if "b" not in mode:
       mode += "b"
-> 1880 self.handles = get_handle(
   f,
   mode,
   encoding=self.options.get("encoding", None),
   compression=self.options.get("compression", None),
   memory_map=self.options.get("memory_map", False),
   is_text=is_text,
   errors=self.options.get("encoding_errors", "strict"),
   storage_options=self.options.get("storage_options", None),
)
assert self.handles is not None
f = self.handles.handle

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/common.py:728, in get_handle(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)
   codecs.lookup_error(errors)
# open URLs
--> 728 ioargs = _get_filepath_or_buffer(
   path_or_buf,
   encoding=encoding,
   compression=compression,
   mode=mode,
   storage_options=storage_options,
)
handle = ioargs.filepath_or_buffer
handles: list[BaseBuffer]

File ~/opt/anaconda3/lib/python3.9/site-packages/pandas/io/common.py:389, in _get_filepath_or_buffer(filepath_or_buffer, encoding, compression, mode, storage_options)
       if content_encoding == "gzip":
           # Override compression based on Content-Encoding header
           compression = {"method": "gzip"}
--> 389         reader = BytesIO(req.read())
   return IOArgs(
       filepath_or_buffer=reader,
       encoding=encoding,
   (...)
       mode=fsspec_mode,
   )
if is_fsspec_url(filepath_or_buffer):

File ~/opt/anaconda3/lib/python3.9/http/client.py:476, in HTTPResponse.read(self, amt)
else:
   try:
--> 476         s = self._safe_read(self.length)
   except IncompleteRead:
       self._close_conn()

File ~/opt/anaconda3/lib/python3.9/http/client.py:626, in HTTPResponse._safe_read(self, amt)
s = []
while amt > 0:
--> 626     chunk = self.fp.read(min(amt, MAXAMOUNT))
   if not chunk:
       raise IncompleteRead(b''.join(s), amt)

File ~/opt/anaconda3/lib/python3.9/socket.py:704, in SocketIO.readinto(self, b)
while True:
   try:
--> 704         return self._sock.recv_into(b)
   except timeout:
       self._timeout_occurred = True

File ~/opt/anaconda3/lib/python3.9/ssl.py:1241, in SSLSocket.recv_into(self, buffer, nbytes, flags)
   if flags != 0:
       raise ValueError(
         "non-zero flags not allowed in calls to recv_into() on %s" %
         self.__class__)
-> 1241     return self.read(nbytes, buffer)
else:
   return super().recv_into(buffer, nbytes, flags)

File ~/opt/anaconda3/lib/python3.9/ssl.py:1099, in SSLSocket.read(self, len, buffer)
try:
   if buffer is not None:
-> 1099         return self._sslobj.read(len, buffer)
   else:
       return self._sslobj.read(len)

KeyboardInterrupt: 

# `%who` is a magic command that lists all variables of the current session
%who

drug_df	 pd

1.2 Display and rename columns#

要大概觀察DataFrame的內容，我們可以使用.head()方法。這個方法將預設顯示DataFrame的前5行資料，以便我們快速查看資料的結構和內容。
有時候，我們可能想要更改DataFrame中欄位的名稱，以使其更具描述性或符合我們的需求。可以像下面一樣直接將所有新欄位名稱傳遞給.columns屬性，或者使用.rename()方法來將舊欄位名稱映射到新欄位名稱。例如df.rename(columns={'Name': 'Full Name', 'Age': 'Years Old'}, inplace=True)

# Check all variables of the dataframe
drug_df.columns

Index(['違規產品名稱', '違規廠商名稱或負責人', '處分機關', '處分日期', '處分法條', '違規情節', '刊播日期',
       '刊播媒體類別', '刊播媒體', '查處情形'],
      dtype='object')

drug_df.columns = [
    'pname', 'cname', 'agency', 'issuedate', 'law', 'fact', 
    'pubDate', 'pubMediaType', 'pubMedia', 'trace']
drug_df.columns
drug_df.head()

	pname	cname	agency	issuedate	law	fact	pubDate	pubMediaType	pubMedia	trace
0	維他肝	豐怡生化科技股份有限公司/朱O	NaN	03 31 2022 12:00AM	NaN	廣告內容誇大不實	02 2 2022 12:00AM	廣播電台	噶瑪蘭廣播電台股份有限公司	NaN
1	現貨澳洲Swisse ULTIBOOST維他命D片calcium vitamin VITAM...	張O雯/張O雯	NaN	01 21 2022 12:00AM	NaN	廣告違規	11 30 2021 12:00AM	網路	蝦皮購物	輔導結案
2	✈日本代購參天製藥處方簽點眼液	蘇O涵/蘇O涵	NaN	01 25 2022 12:00AM	NaN	無照藥商	08 27 2021 12:00AM	網路	蝦皮購物	NaN
3	✈日本代購 TSUMURA 中將湯 24天包裝	蘇O涵/蘇O涵	NaN	01 25 2022 12:00AM	NaN	無照藥商	08 27 2021 12:00AM	網路	蝦皮購物	輔導結案
4	_Salty.shop 日本代購樂敦小花	曾O嫺/曾O嫺	NaN	02 17 2022 12:00AM	藥事法第27條	無照藥商	12 6 2021 12:00AM	網路	蝦皮購物	處分結案

1.3 Pandas series#

Pandas Series 是一種一維陣列結構，用於存儲同類型的資料，並且每個資料點都有一個與之相關聯的索引。與 Python 的原生 list 不同，Pandas Series 提供了更豐富的功能和操作，包括但不限於資料對齊、切片、過濾以及集成的描述性統計方法。除了這些，Pandas Series 支援更多複雜的資料類型，並且能夠更高效地進行向量化操作。簡而言之，雖然 Pandas Series 和 list 都可以用於存儲一維資料，但 Series 提供了更多專為數據分析而設計的功能和優化。

print(type(drug_df.pubMediaType))
pubMediaType = list(drug_df.pubMediaType)
print(type(pubMediaType))

pubMediaType_df = pd.DataFrame(pubMediaType, columns=['pubMediaType'])
pubMediaType_df.head()         # Show the first 10 rows
# print(set(drug_df.pubMediaType))
# drug_df

<class 'pandas.core.series.Series'>
<class 'list'>

	pubMediaType
0	廣播電台
1	網路
2	網路
3	網路
4	網路

1.4 Counting#

接下來希望計算一下每個廣告類型pubMediaType的數量，以便我們可以了解哪些類型的廣告最多。

過去我們會用collections套件的Counter方法來計算。但是現在我們可以使用.value_counts()方法來計算每個類型的廣告數量。這個方法將返回一個包含每個類型廣告數量的 Pandas Series，其中索引是廣告類型，值是廣告數量。

# Using `Counter` to count the number of each type of publication media
from collections import Counter

type_dict = Counter(drug_df.pubMediaType)
print(type_dict)
print(Counter(drug_df.fact).most_common(10))

Counter({'網路': 2609, '廣播電台': 119, '平面媒體': 117, '電視': 109, '其他': 16, nan: 2})
[('無照藥商', 1434), ('廣告違規', 248), ('無違規', 185), ('其刊登或宣播之廣告內容與原核准廣告內容不符', 134), (nan, 132), ('非藥商刊登或宣播藥物廣告', 108), ('藥品未申請查驗登記', 94), ('刊播未申請核准之廣告', 85), ('廣告內容誇大不實', 67), ('禁藥', 40)]

drug_df.pubMediaType.value_counts()
# drug_df['pubMediaType'].value_counts()

pubMediaType
網路      2609
廣播電台     119
平面媒體     117
電視       109
其他        16
Name: count, dtype: int64

P2. Read JSON : Youbike#

2.1 Convert dict of dict to list of dict, then to DataFrame#

import pandas as pd

pd.set_option('display.max_columns', None)  # 顯示所有欄位

import requests
data = requests.get('https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json').json()

# Convert dict of dict to list of dict
all_list = []
for k, v in data["retVal"].items():
    all_list.append(v)

# Using list comprehension
# all_list = [v for v in data["retVal"].values()]
    
ubike_df = pd.DataFrame(all_list)

2.2 Display part of columns#

Subsetting them by passing a list of column names inside the square brackets df[['col1', 'col2']]
Using the .drop() method to drop columns df.drop(['col1', 'col2'], axis=1)

ubike_df[['sno', 'sna', 'tot', 'sbi', 'sarea']].head()
ubike_df.drop(['ar', 'aren', 'snaen', 'lat', 'lng'], axis=1).head()

	sno	sna	tot	sbi	sarea	mday	sareaen	bemp	act
0001	0001	捷運市政府站(3號出口)	84	74	信義區	20221030185227	Xinyi Dist.	10	1
0002	0002	捷運國父紀念館站(2號出口)	16	4	大安區	20221030185231	Daan Dist.	12	1
0004	0004	市民廣場	32	0	信義區	20221030185217	Xinyi Dist.	31	1
0005	0005	興雅國中	10	1	信義區	20221030185241	Xinyi Dist.	9	1
0006	0006	臺北南山廣場	54	11	信義區	20221030185230	Xinyi Dist.	43	1

2.3 Using built-in function to pandas#

# Using pandas built-in function to convert dictionary to pandas df

import requests
data = requests.get('https://tcgbusfs.blob.core.windows.net/dotapp/youbike/v2/youbike_immediate.json').json()
ubike_df = pd.DataFrame(data)
ubike_df.head()

	sno	sna	sarea	mday	ar	sareaen	snaen	aren	act	srcUpdateTime	updateTime	infoTime	infoDate	total	available_rent_bikes	latitude	longitude	available_return_bikes
0	500101001	YouBike2.0_捷運科技大樓站	大安區	2025-03-13 22:40:32	復興南路二段235號前	Daan Dist.	YouBike2.0_MRT Technology Bldg. Sta.	No.235， Sec. 2， Fuxing S. Rd.	1	2025-03-13 22:42:20	2025-03-13 22:42:52	2025-03-13 22:40:32	2025-03-13	28	3	25.02605	121.54360	25
1	500101002	YouBike2.0_復興南路二段273號前	大安區	2025-03-13 22:34:28	復興南路二段273號西側	Daan Dist.	YouBike2.0_No.273， Sec. 2， Fuxing S. Rd.	No.273， Sec. 2， Fuxing S. Rd. (West)	1	2025-03-13 22:42:20	2025-03-13 22:42:52	2025-03-13 22:34:28	2025-03-13	21	1	25.02565	121.54357	20
2	500101003	YouBike2.0_國北教大實小東側門	大安區	2025-03-13 22:40:32	和平東路二段96巷7號	Daan Dist.	YouBike2.0_NTUE Experiment Elementary School (...	No. 7， Ln. 96， Sec. 2， Heping E. Rd	1	2025-03-13 22:42:20	2025-03-13 22:42:52	2025-03-13 22:40:32	2025-03-13	16	4	25.02429	121.54124	12
3	500101004	YouBike2.0_和平公園東側	大安區	2025-03-13 22:33:30	和平東路二段118巷33號	Daan Dist.	YouBike2.0_Heping Park (East)	No. 33， Ln. 118， Sec. 2， Heping E. Rd	1	2025-03-13 22:42:20	2025-03-13 22:42:52	2025-03-13 22:33:30	2025-03-13	11	1	25.02351	121.54282	10
4	500101005	YouBike2.0_辛亥復興路口西北側	大安區	2025-03-13 22:41:29	復興南路二段368號	Daan Dist.	YouBike2.0_Xinhai Fuxing Rd. Intersection (Nor...	No. 368， Sec. 2， Fuxing S. Rd.	1	2025-03-13 22:42:20	2025-03-13 22:42:52	2025-03-13 22:41:29	2025-03-13	16	2	25.02153	121.54299	14

P2. Observing data#

Oberserving data df.info() and df.describe() 觀察各個變數的分佈 to check data type or get basic summary of data 。

2.4 Overview whole df#

ubike_df.shape

(1533, 18)

ubike_df.describe()

	total	available_rent_bikes	latitude	longitude	available_return_bikes
count	1533.000000	1533.000000	1533.000000	1533.000000	1533.000000
mean	26.525766	8.622309	25.052833	121.544699	17.195042
std	13.711934	8.789692	0.033320	0.031682	14.619827
min	5.000000	0.000000	24.976190	121.462280	0.000000
25%	16.000000	1.000000	25.029740	121.522240	7.000000
50%	23.000000	6.000000	25.049870	121.540620	14.000000
75%	32.000000	13.000000	25.072640	121.565860	24.000000
max	99.000000	55.000000	25.145820	121.623060	99.000000

2.5 Access data by index#

# Access single row
print(ubike_df.iloc[2])

# Access single cell
print(ubike_df.iloc[2, 0])

sno                                                               500101003
sna                                                    YouBike2.0_國北教大實小東側門
sarea                                                                   大安區
mday                                                    2025-03-13 22:40:32
ar                                                              和平東路二段96巷7號
sareaen                                                          Daan Dist.
snaen                     YouBike2.0_NTUE Experiment Elementary School (...
aren                                    No. 7， Ln. 96， Sec. 2， Heping E. Rd
act                                                                       1
srcUpdateTime                                           2025-03-13 22:42:20
updateTime                                              2025-03-13 22:42:52
infoTime                                                2025-03-13 22:40:32
infoDate                                                         2025-03-13
total                                                                    16
available_rent_bikes                                                      4
latitude                                                           25.02429
longitude                                                         121.54124
available_return_bikes                                                   12
Name: 2, dtype: object
500101003

2.6 Select variables and slice data#

ubike_df[:3][['sno', 'sna', 'total', 'available_rent_bikes', 'sarea']]

	sno	sna	total	available_rent_bikes	sarea
0	500101001	YouBike2.0_捷運科技大樓站	28	3	大安區
1	500101002	YouBike2.0_復興南路二段273號前	21	1	大安區
2	500101003	YouBike2.0_國北教大實小東側門	16	4	大安區

2.7 Convert variable type#

Cleaning data pd.to_numeric(var) 修改變數型態 to convert data type

# ratio = sbi/tot
print(type(ubike_df['available_rent_bikes']))
ubike_df['ratio'] = pd.to_numeric(ubike_df['available_rent_bikes'])/pd.to_numeric(ubike_df['total'])
ubike_df[['sno', 'sna', 'total', 'available_rent_bikes', 'ratio']].head()

<class 'pandas.core.series.Series'>

	sno	sna	total	available_rent_bikes	ratio
0	500101001	YouBike2.0_捷運科技大樓站	28	3	0.107143
1	500101002	YouBike2.0_復興南路二段273號前	21	1	0.047619
2	500101003	YouBike2.0_國北教大實小東側門	16	4	0.250000
3	500101004	YouBike2.0_和平公園東側	11	1	0.090909
4	500101005	YouBike2.0_辛亥復興路口西北側	16	2	0.125000

2.8 Mutate new variable#

df = df.assign(new_var = old_var1 / old_var2) 產生新的變數（方法一） to create or convert new variable. Be careful! You must assign to left to overwrite original df.
df["new_var"] = df.old_var1 / df.old_var2 產生新的變數（方法二）
Creating ratio = available_rent_bikes/total for ubike data
pd.to_numeric() to covert one variable type

ubike_df = ubike_df.assign(
    available_rent_bikes = pd.to_numeric(ubike_df.available_rent_bikes),
    total = pd.to_numeric(ubike_df.total))

ubike_df["ratio"] = ubike_df.available_rent_bikes / ubike_df.total
# ubike_df.ratio2 = ubike_df.sbi / ubike_df.tot
# ubike_df = ubike_df.assign(ratio = ubike_df.sbi/ubike_df.tot)
# ubike_df.info()
ubike_df[['sno', 'sna', 'total', 'available_rent_bikes', 'ratio']].head()

	sno	sna	total	available_rent_bikes	ratio
0	500101001	YouBike2.0_捷運科技大樓站	28	3	0.107143
1	500101002	YouBike2.0_復興南路二段273號前	21	1	0.047619
2	500101003	YouBike2.0_國北教大實小東側門	16	4	0.250000
3	500101004	YouBike2.0_和平公園東側	11	1	0.090909
4	500101005	YouBike2.0_辛亥復興路口西北側	16	2	0.125000

ubike_df.describe()

	total	available_rent_bikes	latitude	longitude	available_return_bikes	ratio	sbi	tot
count	1533.000000	1533.000000	1533.000000	1533.000000	1533.000000	1533.000000	1533.000000	1533.000000
mean	26.525766	8.622309	25.052833	121.544699	17.195042	0.355990	8.622309	26.525766
std	13.711934	8.789692	0.033320	0.031682	14.619827	0.311784	8.789692	13.711934
min	5.000000	0.000000	24.976190	121.462280	0.000000	0.000000	0.000000	5.000000
25%	16.000000	1.000000	25.029740	121.522240	7.000000	0.052632	1.000000	16.000000
50%	23.000000	6.000000	25.049870	121.540620	14.000000	0.300000	6.000000	23.000000
75%	32.000000	13.000000	25.072640	121.565860	24.000000	0.605263	13.000000	32.000000
max	99.000000	55.000000	25.145820	121.623060	99.000000	1.000000	55.000000	99.000000

P3. Summarizing data#

Tutorial: groupby pandas

df.groupby(col1)[col2].count() summarize col2 according to col1 by counting
df.groupby(col1)[col2].mean() summarize col2 according to col1 by calculating their average of each category
df.groupby([col1, col3])[col2].count() summarize col2 according to col1 and col3 by counting

3.1 Count single variable by `value_counts()`#

# Conventional way
from collections import Counter
Counter(drug_df.pubMediaType)

Counter({'網路': 2609, '廣播電台': 119, '平面媒體': 117, '電視': 109, '其他': 16, nan: 2})

drug_df.head()

	pname	cname	agency	issuedate	law	fact	pubDate	pubMediaType	pubMedia	trace
0	維他肝	豐怡生化科技股份有限公司/朱O	NaN	03 31 2022 12:00AM	NaN	廣告內容誇大不實	02 2 2022 12:00AM	廣播電台	噶瑪蘭廣播電台股份有限公司	NaN
1	現貨澳洲Swisse ULTIBOOST維他命D片calcium vitamin VITAM...	張O雯/張O雯	NaN	01 21 2022 12:00AM	NaN	廣告違規	11 30 2021 12:00AM	網路	蝦皮購物	輔導結案
2	✈日本代購參天製藥處方簽點眼液	蘇O涵/蘇O涵	NaN	01 25 2022 12:00AM	NaN	無照藥商	08 27 2021 12:00AM	網路	蝦皮購物	NaN
3	✈日本代購 TSUMURA 中將湯 24天包裝	蘇O涵/蘇O涵	NaN	01 25 2022 12:00AM	NaN	無照藥商	08 27 2021 12:00AM	網路	蝦皮購物	輔導結案
4	_Salty.shop 日本代購樂敦小花	曾O嫺/曾O嫺	NaN	02 17 2022 12:00AM	藥事法第27條	無照藥商	12 6 2021 12:00AM	網路	蝦皮購物	處分結案

# Pandas way
drug_df.pubMediaType.value_counts()

pubMediaType
網路      2609
廣播電台     119
平面媒體     117
電視       109
其他        16
Name: count, dtype: int64

3.2 `groupby()` then `count()`#

drug_df.groupby('pubMediaType')[['pname', 'agency']].count()

	pname	agency
pubMediaType
其他	16	0
平面媒體	117	117
廣播電台	119	0
網路	2609	0
電視	109	0

3.3 `groupby()` then summarize#

Format: df.groupby(col_for_group)[variable_to_group].func()

col_for_group: 要作為群組根據的variable，例如將村里彙整為鄉鎮市區資料時，鄉鎮市區資料極為group的根據
variable_to_group: 哪些要根據上述的群組來彙整，通常是值，如上例，那就是人口數、土地面積等。
func(): 要用什麼函式來計算，是要計算每組組內的加總或平均？

ubike_df[['sarea', 'total', 'available_rent_bikes']]

	sarea	total	available_rent_bikes
0	大安區	28	3
1	大安區	21	1
2	大安區	16	4
3	大安區	11	1
4	大安區	16	2
...	...	...	...
1528	臺大公館校區	36	0
1529	臺大公館校區	40	0
1530	臺大公館校區	33	0
1531	臺大公館校區	99	46
1532	臺大公館校區	67	0

1533 rows × 3 columns

# sum up tot in each region(town-level)

ubike_df.groupby("sarea")["total"].sum()

sarea
中山區       4342
中正區       3493
信義區       3559
內湖區       4181
北投區       2832
南港區       2723
士林區       3162
大同區       1899
大安區       5348
文山區       2478
松山區       2490
臺大公館校區    2048
萬華區       2109
Name: total, dtype: int64

ubike_stat = ubike_df.groupby("sarea").agg({"total": "sum", "available_rent_bikes": "sum"}).reset_index()
ubike_stat

	sarea	total	available_rent_bikes
0	中山區	4342	1398
1	中正區	3493	892
2	信義區	3559	1077
3	內湖區	4181	1303
4	北投區	2832	1079
5	南港區	2723	1107
6	士林區	3162	1020
7	大同區	1899	917
8	大安區	5348	1362
9	文山區	2478	986
10	松山區	2490	873
11	臺大公館校區	2048	203
12	萬華區	2109	1001

P4. Sort-by (Arrange)#

# ascending=False to sort by dscending order
ubike_stat.sort_values('total', ascending=False)

	sarea	total	available_rent_bikes
8	大安區	5348	1362
0	中山區	4342	1398
3	內湖區	4181	1303
2	信義區	3559	1077
1	中正區	3493	892
6	士林區	3162	1020
4	北投區	2832	1079
5	南港區	2723	1107
10	松山區	2490	873
9	文山區	2478	986
12	萬華區	2109	1001
11	臺大公館校區	2048	203
7	大同區	1899	917

ubike_df.sort_values('ratio', ascending=False)[['sno', 'sna', 'total', 'available_rent_bikes', 'ratio']].head(10)

	sno	sna	total	available_rent_bikes	ratio
1192	500111015	YouBike2.0_福山公園	30	30	1.0
1235	500111058	YouBike2.0_舊莊國小(舊莊街一段)	18	18	1.0
311	500104052	YouBike2.0_重慶北路四段190巷口	13	13	1.0
1138	500110052	YouBike2.0_南京東路五段59巷口	17	17	1.0
762	500107141	YouBike2.0_建國北路高架橋下停車場G區	18	18	1.0
757	500107136	YouBike2.0_建國五常街口	20	20	1.0
1370	500112094	YouBike2.0_吳興街284巷(廣安宮)	15	15	1.0
1234	500111057	YouBike2.0_力行橋(南港)	18	18	1.0
1236	500111059	YouBike2.0_向陽公園	17	17	1.0
1259	500111082	YouBike2.0_南港路三段130巷口	12	12	1.0