HIVE
语句的查询顺序
SELECT [ALL | DISTINCT] select_expr, select_expr, ...
FROM table_reference
[WHERE where_condition]
[GROUP BY col_list]
[ORDER BY col_list]
[CLUSTER BY col_list]
| [DISTRIBUTE BY col_list] [SORT BY col_list]
[LIMIT number]
查看分区
DESC xxx
select distinct dp from xxx limit 10
表套着表
SELECT avg(t6.开始填写资料_to_首次提交审核) as 开始填写资料_to_首次提交审核平均值,
avg(t6.首次提交成功_to_入驻审核通过) as 首次提交成功_to_入驻审核通过平均值,
avg(t6.开始填写资料_to_入驻审核通过) as 开始填写资料_to_入驻审核通过平均值,
avg(t6.入驻审核通过_to_首个商品上架) as 入驻审核通过_to_首个商品上架平均值,
avg(t6.首个商品上架_to_首笔订单产生) as 首个商品上架_to_首笔订单产生平均值
from (
SELECT
t1.seller_id AS Seller_id,
t2.vender_id AS Vender_id,
t4.sequence3_created_first AS 开始填写材料,
t4.sequence9_created_first AS 首次提交审核,
t4.sequence19_created_first AS 业务审核通过,
t3.first_on AS 首个商品上架,
t5.first_deal AS 首笔订单产生,
IF(ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2)) AS 开始填写资料_to_首次提交审核,
ROUND((unix_timestamp(t4.sequence9_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) AS 左侧的_含异常数据,
IF(ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2)) AS 首次提交成功_to_入驻审核通过,
ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence9_created_first)) / 86400, 2) AS 左侧的_含异常数据,
ROUND((unix_timestamp(t3.first_on) - unix_timestamp(t4.sequence19_created_first)) / 86400, 2) AS 入驻审核通过_to_首个商品上架,
ROUND((unix_timestamp(t5.first_deal) - unix_timestamp(t3.first_on)) / 86400, 2) AS 首个商品上架_to_首笔订单产生,
IF(ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) >= 90, NULL, ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2)) AS 开始填写资料_to_入驻审核通过,
ROUND((unix_timestamp(t4.sequence19_created_first) - unix_timestamp(t4.sequence3_created_first)) / 86400, 2) AS 左侧的_全量数据
FROM
(
SELECT
seller_id
FROM
fdm.fdm_popvender_shop_process_log_chain
WHERE
dp = 'ACTIVE'
GROUP BY
seller_id
)
t1
LEFT JOIN
(
SELECT
id AS shop_id,
vender_id
FROM
fdm.fdm_popvender_shop_seller_chain
WHERE
dp = 'ACTIVE'
AND vender_id IS NOT NULL
)
t2
ON
t1.seller_id = t2.shop_id
LEFT JOIN
(
SELECT
vender_id,
MIN(ON_SHELVES_TIME) AS FIRST_ON
FROM
fdm.fdm_vt_producttha_sku_chain
where ON_SHELVES_TIME !=''
GROUP BY
vender_id
)
t3
ON
t2.vender_id = t3.vender_id
LEFT JOIN
(
SELECT
MIN(ord_deal_tm) AS first_deal,
vender_id
FROM
adm.adm_s04_glb_trade_ord_det_sum
GROUP BY
vender_id
)
t5
ON
t2.vender_id = t5.vender_id
LEFT JOIN
(
SELECT
seller_id,
MIN(
CASE
WHEN sequence = '1'
THEN created
END) AS sequence1_created_first,
MAX(
CASE
WHEN sequence = '1'
THEN created
END) AS sequence1_created_last,
MIN(
CASE
WHEN sequence = '2'
THEN created
END) AS sequence2_created_first,
MAX(
CASE
WHEN sequence = '2'
THEN created
END) AS sequence2_created_last,
MIN(
CASE
WHEN sequence = '3'
THEN created
END) AS sequence3_created_first,
MAX(
CASE
WHEN sequence = '3'
THEN created
END) AS sequence3_created_last,
MIN(
CASE
WHEN sequence = '4'
THEN created
END) AS sequence4_created_first,
MAX(
CASE
WHEN sequence = '4'
THEN created
END) AS sequence4_created_last,
MIN(
CASE
WHEN sequence = '5'
THEN created
END) AS sequence5_created_first,
MAX(
CASE
WHEN sequence = '5'
THEN created
END) AS sequence5_created_last,
MIN(
CASE
WHEN sequence = '6'
THEN created
END) AS sequence6_created_first,
MAX(
CASE
WHEN sequence = '6'
THEN created
END) AS sequence6_created_last,
MIN(
CASE
WHEN sequence = '7'
THEN created
END) AS sequence7_created_first,
MAX(
CASE
WHEN sequence = '7'
THEN created
END) AS sequence7_created_last,
MIN(
CASE
WHEN sequence = '8'
THEN created
END) AS sequence8_created_first,
MAX(
CASE
WHEN sequence = '8'
THEN created
END) AS sequence8_created_last,
MIN(
CASE
WHEN sequence = '9'
THEN created
END) AS sequence9_created_first,
MAX(
CASE
WHEN sequence = '9'
THEN created
END) AS sequence9_created_last,
MIN(
CASE
WHEN sequence = '10'
THEN created
END) AS sequence10_created_first,
MAX(
CASE
WHEN sequence = '10'
THEN created
END) AS sequence10_created_last,
MIN(
CASE
WHEN sequence = '11'
THEN created
END) AS sequence11_created_first,
MAX(
CASE
WHEN sequence = '11'
THEN created
END) AS sequence11_created_last,
MIN(
CASE
WHEN sequence = '12'
THEN created
END) AS sequence12_created_first,
MAX(
CASE
WHEN sequence = '12'
THEN created
END) AS sequence12_created_last,
MAX(
CASE
WHEN sequence = '13-1-1'
THEN created
END) AS sequence1311_created,
MAX(
CASE
WHEN sequence = '13-1-2'
THEN created
END) AS sequence1312_created,
MAX(
CASE
WHEN sequence = '13-2-1'
THEN created
END) AS sequence1321_created,
MAX(
CASE
WHEN sequence = '13-2-2'
THEN created
END) AS sequence1322_created,
MAX(
CASE
WHEN sequence = '13-3-1'
THEN created
END) AS sequence1331_created,
MAX(
CASE
WHEN sequence = '13-3-2'
THEN created
END) AS sequence1332_created,
MAX(
CASE
WHEN sequence = '13-4-1'
THEN created
END) AS sequence1341_created,
MAX(
CASE
WHEN sequence = '13-4-2'
THEN created
END) AS sequence1342_created,
MAX(
CASE
WHEN sequence = '13-5-1'
THEN created
END) AS sequence1351_created,
MAX(
CASE
WHEN sequence = '13-5-2'
THEN created
END) AS sequence1352_created,
MIN(
CASE
WHEN sequence = '14'
THEN created
END) AS sequence14_created_first,
MAX(
CASE
WHEN sequence = '14'
THEN created
END) AS sequence14_created_last,
MIN(
CASE
WHEN sequence = '15'
THEN created
END) AS sequence15_created_first,
MAX(
CASE
WHEN sequence = '15'
THEN created
END) AS sequence15_created_last,
MAX(
CASE
WHEN sequence = '16'
THEN created
END) AS sequence16_created,
MAX(
CASE
WHEN sequence = '16-1'
THEN created
END) AS sequence161_created,
MAX(
CASE
WHEN sequence = '17'
THEN created
END) AS sequence17_created,
MAX(
CASE
WHEN sequence = '17-1'
THEN created
END) AS sequence171_created,
MIN(
CASE
WHEN sequence = '18'
THEN created
END) AS sequence18_created_first,
MAX(
CASE
WHEN sequence = '18'
THEN created
END) AS sequence18_created_last,
MIN(
CASE
WHEN sequence = '19'
THEN created
END) AS sequence19_created_first,
MAX(
CASE
WHEN sequence = '19'
THEN created
END) AS sequence19_created_last
FROM
fdm.fdm_popvender_shop_process_log_chain
WHERE
dp = 'ACTIVE'
AND SUBSTR(created, 0, 10) <= sysdate( - 1)
GROUP BY
seller_id
)
t4
ON
t1.seller_id = t4.seller_id
WHERE
sequence3_created_first IS NOT NULL
) t6
group by - 条数的聚合,合并同类项
GROUP BY 语句用于结合聚合函数,根据一个或多个列对结果集进行分组
下面是选自 “Websites” 表的数据:
±—±-------------±--------------------------±------±--------+
| id | name | url | alexa | country |
±—±-------------±--------------------------±------±--------+
| 1 | Google | https://www.google.cm/ | 1 | USA |
| 2 | 淘宝 | https://www.taobao.com/ | 13 | CN |
| 3 | 菜鸟教程 | http://www.runoob.com/ | 4689 | CN |
| 4 | 微博 | http://weibo.com/ | 20 | CN |
| 5 | Facebook | https://www.facebook.com/ | 3 | USA |
| 7 | stackoverflow | http://stackoverflow.com/ | 0 | IND |
±—±--------------±--------------------------±------±--------+
下面是 “access_log” 网站访问记录表的数据:
mysql> SELECT * FROM access_log;
±----±--------±------±-----------+
| aid | site_id | count | date |
±----±--------±------±-----------+
| 1 | 1 | 45 | 2016-05-10 |
| 2 | 3 | 100 | 2016-05-13 |
| 3 | 1 | 230 | 2016-05-14 |
| 4 | 2 | 10 | 2016-05-14 |
| 5 | 5 | 205 | 2016-05-14 |
| 6 | 4 | 13 | 2016-05-15 |
| 7 | 3 | 220 | 2016-05-15 |
| 8 | 5 | 545 | 2016-05-16 |
| 9 | 3 | 201 | 2016-05-17 |
±----±--------±------±-----------+
9 rows in set (0.00 sec)
GROUP BY 简单应用
SELECT site_id, SUM(access_log.count) AS nums
FROM access_log GROUP BY site_id;
GROUP BY 多表连接
SELECT Websites.name,COUNT(access_log.aid) AS nums FROM access_log
LEFT JOIN Websites
ON access_log.site_id=Websites.id
GROUP BY Websites.name;
order by - 排序
ORDER BY 关键字用于对结果集按照一个列或者多个列进行排序。
±—±-------------±--------------------------±------±--------+
| id | name | url | alexa | country |
±—±-------------±--------------------------±------±--------+
| 1 | Google | https://www.google.cm/ | 1 | USA |
| 2 | 淘宝 | https://www.taobao.com/ | 13 | CN |
| 3 | 菜鸟教程 | http://www.runoob.com/ | 4689 | CN |
| 4 | 微博 | http://weibo.com/ | 20 | CN |
| 5 | Facebook | https://www.facebook.com/ | 3 | USA |
±—±-------------±--------------------------±------±--------+
SELECT * FROM Websites
ORDER BY alexa;
sum() - 具体数值加和
SUM() 函数返回数值列的总数
mysql> SELECT * FROM access_log;
±----±--------±------±-----------+
| aid | site_id | count | date |
±----±--------±------±-----------+
| 1 | 1 | 45 | 2016-05-10 |
| 2 | 3 | 100 | 2016-05-13 |
| 3 | 1 | 230 | 2016-05-14 |
| 4 | 2 | 10 | 2016-05-14 |
| 5 | 5 | 205 | 2016-05-14 |
| 6 | 4 | 13 | 2016-05-15 |
| 7 | 3 | 220 | 2016-05-15 |
| 8 | 5 | 545 | 2016-05-16 |
| 9 | 3 | 201 | 2016-05-17 |
±----±--------±------±-----------+
9 rows in set (0.00 sec)
SELECT SUM(count) AS nums FROM access_log;
where - 当
SELECT * FROM Websites WHERE country='CN';
CONCAT - 拼接
CONCAT(item_sku_id, user_log_acct) AS D
CASE WHEN THEN END - 如果 ,就
select name,id,(case when id=34 then salary*2
when id=45 then salary*3
else salary
end) new_salary
from semp;
📍Hive之查询语法
https://blog.csdn.net/cch19930303/article/details/108609035?spm=1001.2014.3001.5506
📍大数据技术之Hive
https://blog.csdn.net/huxili2020/article/details/119982368