Hive-bdp_CodingPark编程公园

本文详细解析了Hive SQL语句的查询顺序,包括SELECT、FROM、WHERE、GROUP BY、ORDER BY和子查询的使用,以及如何查看分区、表套表操作和聚合函数的运用实例。通过实例演示,帮助读者理解Hive查询的逻辑结构和实践技巧。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

HIVE

语句的查询顺序

在这里插入图片描述


SELECT [ALL | DISTINCT] select_expr, select_expr, ...
	FROM table_reference
	[WHERE where_condition]
	[GROUP BY col_list]
	[ORDER BY col_list]
	[CLUSTER BY col_list]
		| [DISTRIBUTE BY col_list] [SORT BY col_list]
	[LIMIT number]


查看分区

DESC xxx
select distinct dp from xxx limit 10

表套着表

SELECT avg(t6.开始填写资料_to_首次提交审核) as 开始填写资料_to_首次提交审核平均值,
            avg(t6.首次提交成功_to_入驻审核通过) as 首次提交成功_to_入驻审核通过平均值,
            avg(t6.开始填写资料_to_入驻审核通过) as 开始填写资料_to_入驻审核通过平均值,
            avg(t6.入驻审核通过_to_首个商品上架) as 入驻审核通过_to_首个商品上架平均值,
            avg(t6.首个商品上架_to_首笔订单产生) as 首个商品上架_to_首笔订单产生平均值
            
from (

SELECT
	t1.seller_id AS Seller_id,
	t2.vender_id AS Vender_id,
	t4.sequence3_created_first AS 开始填写材料,
	t4.sequence9_created_first AS 首次提交审核,
	t4.sequence19_created_first AS 业务审核通过,
	t3.first_on AS 首个商品上架,
	t5.first_deal AS 首笔订单产生,
	IF(ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2)) AS 开始填写资料_to_首次提交审核,
	ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) AS 左侧的_含异常数据,
	IF(ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2)) AS 首次提交成功_to_入驻审核通过,
	ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2) AS 左侧的_含异常数据,
	ROUND((unix_timestamp(t3.first_on) - unix_timestamp(t4.sequence19_created_first)) / 86400, 2) AS 入驻审核通过_to_首个商品上架,
	ROUND((unix_timestamp(t5.first_deal) - unix_timestamp(t3.first_on)) / 86400, 2) AS 首个商品上架_to_首笔订单产生,
    IF(ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2)) AS 开始填写资料_to_入驻审核通过,
	ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) AS 左侧的_全量数据
FROM
	(
		SELECT
			seller_id
		FROM
			fdm.fdm_popvender_shop_process_log_chain
		WHERE
			dp = 'ACTIVE'
		GROUP BY
			seller_id
	)
	t1
LEFT JOIN
	(
		SELECT
			id AS shop_id,
			vender_id
		FROM
			fdm.fdm_popvender_shop_seller_chain
		WHERE
			dp = 'ACTIVE'
			AND vender_id IS NOT NULL
	)
	t2
ON
	t1.seller_id = t2.shop_id
LEFT JOIN
	(
		SELECT
			vender_id,
			MIN(ON_SHELVES_TIME) AS FIRST_ON
		FROM
			fdm.fdm_vt_producttha_sku_chain
        where ON_SHELVES_TIME !=''
		GROUP BY
			vender_id
	)
	t3
ON
	t2.vender_id = t3.vender_id
LEFT JOIN
	(
		SELECT
			MIN(ord_deal_tm) AS first_deal,
			vender_id
		FROM
			adm.adm_s04_glb_trade_ord_det_sum
		GROUP BY
			vender_id
	)
	t5
ON
	t2.vender_id = t5.vender_id
LEFT JOIN
	(
		SELECT
			seller_id,
			MIN(
				CASE
					WHEN sequence = '1'
					THEN created
				END) AS sequence1_created_first,
			MAX(
				CASE
					WHEN sequence = '1'
					THEN created
				END) AS sequence1_created_last,
			MIN(
				CASE
					WHEN sequence = '2'
					THEN created
				END) AS sequence2_created_first,
			MAX(
				CASE
					WHEN sequence = '2'
					THEN created
				END) AS sequence2_created_last,
			MIN(
				CASE
					WHEN sequence = '3'
					THEN created
				END) AS sequence3_created_first,
			MAX(
				CASE
					WHEN sequence = '3'
					THEN created
				END) AS sequence3_created_last,
			MIN(
				CASE
					WHEN sequence = '4'
					THEN created
				END) AS sequence4_created_first,
			MAX(
				CASE
					WHEN sequence = '4'
					THEN created
				END) AS sequence4_created_last,
			MIN(
				CASE
					WHEN sequence = '5'
					THEN created
				END) AS sequence5_created_first,
			MAX(
				CASE
					WHEN sequence = '5'
					THEN created
				END) AS sequence5_created_last,
			MIN(
				CASE
					WHEN sequence = '6'
					THEN created
				END) AS sequence6_created_first,
			MAX(
				CASE
					WHEN sequence = '6'
					THEN created
				END) AS sequence6_created_last,
			MIN(
				CASE
					WHEN sequence = '7'
					THEN created
				END) AS sequence7_created_first,
			MAX(
				CASE
					WHEN sequence = '7'
					THEN created
				END) AS sequence7_created_last,
			MIN(
				CASE
					WHEN sequence = '8'
					THEN created
				END) AS sequence8_created_first,
			MAX(
				CASE
					WHEN sequence = '8'
					THEN created
				END) AS sequence8_created_last,
			MIN(
				CASE
					WHEN sequence = '9'
					THEN created
				END) AS sequence9_created_first,
			MAX(
				CASE
					WHEN sequence = '9'
					THEN created
				END) AS sequence9_created_last,
			MIN(
				CASE
					WHEN sequence = '10'
					THEN created
				END) AS sequence10_created_first,
			MAX(
				CASE
					WHEN sequence = '10'
					THEN created
				END) AS sequence10_created_last,
			MIN(
				CASE
					WHEN sequence = '11'
					THEN created
				END) AS sequence11_created_first,
			MAX(
				CASE
					WHEN sequence = '11'
					THEN created
				END) AS sequence11_created_last,
			MIN(
				CASE
					WHEN sequence = '12'
					THEN created
				END) AS sequence12_created_first,
			MAX(
				CASE
					WHEN sequence = '12'
					THEN created
				END) AS sequence12_created_last,
			MAX(
				CASE
					WHEN sequence = '13-1-1'
					THEN created
				END) AS sequence1311_created,
			MAX(
				CASE
					WHEN sequence = '13-1-2'
					THEN created
				END) AS sequence1312_created,
			MAX(
				CASE
					WHEN sequence = '13-2-1'
					THEN created
				END) AS sequence1321_created,
			MAX(
				CASE
					WHEN sequence = '13-2-2'
					THEN created
				END) AS sequence1322_created,
			MAX(
				CASE
					WHEN sequence = '13-3-1'
					THEN created
				END) AS sequence1331_created,
			MAX(
				CASE
					WHEN sequence = '13-3-2'
					THEN created
				END) AS sequence1332_created,
			MAX(
				CASE
					WHEN sequence = '13-4-1'
					THEN created
				END) AS sequence1341_created,
			MAX(
				CASE
					WHEN sequence = '13-4-2'
					THEN created
				END) AS sequence1342_created,
			MAX(
				CASE
					WHEN sequence = '13-5-1'
					THEN created
				END) AS sequence1351_created,
			MAX(
				CASE
					WHEN sequence = '13-5-2'
					THEN created
				END) AS sequence1352_created,
			MIN(
				CASE
					WHEN sequence = '14'
					THEN created
				END) AS sequence14_created_first,
			MAX(
				CASE
					WHEN sequence = '14'
					THEN created
				END) AS sequence14_created_last,
			MIN(
				CASE
					WHEN sequence = '15'
					THEN created
				END) AS sequence15_created_first,
			MAX(
				CASE
					WHEN sequence = '15'
					THEN created
				END) AS sequence15_created_last,
			MAX(
				CASE
					WHEN sequence = '16'
					THEN created
				END) AS sequence16_created,
			MAX(
				CASE
					WHEN sequence = '16-1'
					THEN created
				END) AS sequence161_created,
			MAX(
				CASE
					WHEN sequence = '17'
					THEN created
				END) AS sequence17_created,
			MAX(
				CASE
					WHEN sequence = '17-1'
					THEN created
				END) AS sequence171_created,
			MIN(
				CASE
					WHEN sequence = '18'
					THEN created
				END) AS sequence18_created_first,
			MAX(
				CASE
					WHEN sequence = '18'
					THEN created
				END) AS sequence18_created_last,
			MIN(
				CASE
					WHEN sequence = '19'
					THEN created
				END) AS sequence19_created_first,
			MAX(
				CASE
					WHEN sequence = '19'
					THEN created
				END) AS sequence19_created_last
		FROM
			fdm.fdm_popvender_shop_process_log_chain
		WHERE
			dp = 'ACTIVE'
			AND SUBSTR(created, 0, 10) <= sysdate( - 1)
		GROUP BY
			seller_id
	)
	t4
ON
	t1.seller_id = t4.seller_id
WHERE
	sequence3_created_first IS NOT NULL
) t6





group by - 条数的聚合,合并同类项

GROUP BY 语句用于结合聚合函数,根据一个或多个列对结果集进行分组

下面是选自 “Websites” 表的数据:
±—±-------------±--------------------------±------±--------+
| id | name | url | alexa | country |
±—±-------------±--------------------------±------±--------+
| 1 | Google | https://www.google.cm/ | 1 | USA |
| 2 | 淘宝 | https://www.taobao.com/ | 13 | CN |
| 3 | 菜鸟教程 | http://www.runoob.com/ | 4689 | CN |
| 4 | 微博 | http://weibo.com/ | 20 | CN |
| 5 | Facebook | https://www.facebook.com/ | 3 | USA |
| 7 | stackoverflow | http://stackoverflow.com/ | 0 | IND |
±—±--------------±--------------------------±------±--------+

下面是 “access_log” 网站访问记录表的数据:
mysql> SELECT * FROM access_log;
±----±--------±------±-----------+
| aid | site_id | count | date |
±----±--------±------±-----------+
| 1 | 1 | 45 | 2016-05-10 |
| 2 | 3 | 100 | 2016-05-13 |
| 3 | 1 | 230 | 2016-05-14 |
| 4 | 2 | 10 | 2016-05-14 |
| 5 | 5 | 205 | 2016-05-14 |
| 6 | 4 | 13 | 2016-05-15 |
| 7 | 3 | 220 | 2016-05-15 |
| 8 | 5 | 545 | 2016-05-16 |
| 9 | 3 | 201 | 2016-05-17 |
±----±--------±------±-----------+
9 rows in set (0.00 sec)

GROUP BY 简单应用

SELECT site_id, SUM(access_log.count) AS nums
FROM access_log GROUP BY site_id;

在这里插入图片描述

GROUP BY 多表连接

SELECT Websites.name,COUNT(access_log.aid) AS nums FROM access_log
LEFT JOIN Websites
ON access_log.site_id=Websites.id
GROUP BY Websites.name;

在这里插入图片描述


order by - 排序

ORDER BY 关键字用于对结果集按照一个列或者多个列进行排序。
±—±-------------±--------------------------±------±--------+
| id | name | url | alexa | country |
±—±-------------±--------------------------±------±--------+
| 1 | Google | https://www.google.cm/ | 1 | USA |
| 2 | 淘宝 | https://www.taobao.com/ | 13 | CN |
| 3 | 菜鸟教程 | http://www.runoob.com/ | 4689 | CN |
| 4 | 微博 | http://weibo.com/ | 20 | CN |
| 5 | Facebook | https://www.facebook.com/ | 3 | USA |
±—±-------------±--------------------------±------±--------+

SELECT * FROM Websites
ORDER BY alexa;

在这里插入图片描述


sum() - 具体数值加和

SUM() 函数返回数值列的总数

mysql> SELECT * FROM access_log;
±----±--------±------±-----------+
| aid | site_id | count | date |
±----±--------±------±-----------+
| 1 | 1 | 45 | 2016-05-10 |
| 2 | 3 | 100 | 2016-05-13 |
| 3 | 1 | 230 | 2016-05-14 |
| 4 | 2 | 10 | 2016-05-14 |
| 5 | 5 | 205 | 2016-05-14 |
| 6 | 4 | 13 | 2016-05-15 |
| 7 | 3 | 220 | 2016-05-15 |
| 8 | 5 | 545 | 2016-05-16 |
| 9 | 3 | 201 | 2016-05-17 |
±----±--------±------±-----------+
9 rows in set (0.00 sec)

SELECT SUM(count) AS nums FROM access_log;

在这里插入图片描述


where - 当

SELECT * FROM Websites WHERE country='CN';

CONCAT - 拼接

CONCAT(item_sku_id, user_log_acct) AS D

CASE WHEN THEN END - 如果 ,就

select name,id,(case when id=34 then salary*2

                     when id=45 then salary*3

                                else salary

                                end) new_salary
                                
from semp;

📍Hive之查询语法
https://blog.csdn.net/cch19930303/article/details/108609035?spm=1001.2014.3001.5506
📍大数据技术之Hive
https://blog.csdn.net/huxili2020/article/details/119982368

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

TEAM-AG

编程公园:输出是最好的学习方式

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值