query条件查询

import pandas as pd
# 读取减肥数据集,查看一月的数据
weight_loss = pd.read_csv('data/weight_loss.csv')
weight_loss.query('Month == "Jan"')
 NameMonthWeekWeight
0BobJanWeek 1291
1AmyJanWeek 1197
2BobJanWeek 2288
3AmyJanWeek 2189
4BobJanWeek 3283
5AmyJanWeek 3189
6BobJanWeek 4283
7AmyJanWeek 4190
# 定义一个求减肥比例的函数
def find_perc_loss(s):
    return (s - s.iloc[0]) / s.iloc[0]
# 查看Bob在一月的减肥成果
bob_jan = weight_loss.query('Name=="Bob" and Month=="Jan"')
find_perc_loss(bob_jan['Weight'])
'''
0    0.000000
2   -0.010309
4   -0.027491
6   -0.027491
Name: Weight, dtype: float64
'''
# 对Name和Month进行分组,然后使用transform方法,传入函数,对数值进行转换
pcnt_loss = weight_loss.groupby(['Name', 'Month'])['Weight'].transform(find_perc_loss)
pcnt_loss.head(8)
0    0.000000
1    0.000000
2   -0.010309
3   -0.040609
4   -0.027491
5   -0.040609
6   -0.027491
7   -0.035533
Name: Weight, dtype: float64
# transform之后的结果,行数不变,可以赋值给原始DataFrame作为一个新列;
# 为了缩短输出,只选择Bob的前两个月数据
weight_loss['Perc Weight Loss'] = pcnt_loss.round(3)
weight_loss.query('Name=="Bob" and Month in ["Jan", "Feb"]')
 NameMonthWeekWeightPerc Weight Loss
0BobJanWeek 12910.000
2BobJanWeek 2288-0.010
4BobJanWeek 3283-0.027
6BobJanWeek 4283-0.027
8BobFebWeek 12830.000
10BobFebWeek 2275-0.028
12BobFebWeek 3268-0.053
14BobFebWeek 4268-0.053
# 因为最重要的是每个月的第4周,只选择第4周的数据
week4 = weight_loss.query('Week == "Week 4"')
week4
 NameMonthWeekWeightPerc Weight Loss
6BobJanWeek 4283-0.027
7AmyJanWeek 4190-0.036
14BobFebWeek 4268-0.053
15AmyFebWeek 4173-0.089
22BobMarWeek 4261-0.026
23AmyMarWeek 4170-0.017
30BobAprWeek 4250-0.042
31AmyAprWeek 4161-0.053
# 用pivot重构DataFrame,让Amy和Bob的数据并排放置
winner = week4.pivot(index='Month', columns='Name', values='Perc Weight Loss')
winner
NameAmyBob
Month  
Apr-0.053-0.042
Feb-0.089-0.053
Jan-0.036-0.027
Mar-0.017-0.026
# 用where方法选出每月的赢家
winner['Winner'] = np.where(winner['Amy'] < winner['Bob'], 'Amy', 'Bob')
winner.style.highlight_min(axis=1)
NameAmyBobWinner
Month   
Apr-0.053000-0.042000Amy
Feb-0.089000-0.053000Amy
Jan-0.036000-0.027000Amy
Mar-0.017000-0.026000Bob
# 用value_counts()返回最后的比分
winner.Winner.value_counts()
'''
Amy    3
Bob    1
Name: Winner, dtype: int64
'''
# Pandas默认是按字母排序的
week4a = week4.copy()
month_chron = week4a['Month'].unique() 
month_chron
#array(['Jan', 'Feb', 'Mar', 'Apr'], dtype=object)
# 转换为Categorical变量,可以做成按时间排序
week4a['Month'] = pd.Categorical(week4a['Month'], categories=month_chron,ordered=True)
week4a.pivot(index='Month', columns='Name', values='Perc Weight Loss')
NameAmyBob
Month  
Jan-0.036-0.027
Feb-0.089-0.053
Mar-0.017-0.026
Apr-0.053-0.042
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

缘 源 园

你的鼓励将是我创造的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值