SQL-更进一步

266 阅读2分钟

-- 1.冷门优化知识

GROUP BY和DISTINCT两种去重方式:

  • 数据量达到一定程度,去重字段枚举值也很复杂时,GROUP BY确实在性能上更优秀,同时可以避免数据倾斜。但具体情况具体分析
  • DISTINCT命令会在内存中构建一个hashtable,查找去重的时间复杂度是O(1);GROUP BY在不同版本间变动比较大,有的版本会用构建hashtable的形式去重,有的版本会通过排序的方式, 排序的最优时间复杂度无法到O(1)。

-- 2.语句函数相关使用

  • 日期差
select w1.id as Id 
from Weather w1
join Weather w2
on Date(w1.recordDate) = Date(w2.recordDate) + INTERVAL 1 DAY
where w1.temperature > w2.temperature;
select w1.id 
from Weather as w1,Weather as w2 
where datediff(w1.recordDate,w2.recordDate) = 1 
where w1.temperature > w2.temperature;
  • 小数四舍五入保留位数,平均值
select a1.machine_id,round(avg(a2.timestamp - a1.timestamp),3) as processing_time
 from Activity a1 
 left join Activity a2 
 on a1.machine_id=a2.machine_id 
 and a1.process_id = a2.process_id 
 and a2.activity_type = 'end' 
 where a1.activity_type = 'start'
group by a1.machine_id
  • 连表数量大于5
自内联表然后直接
HAVING COUNT(m.managerId) >= 5

select e1.name from Employee e1 left join
(select e2.managerId,count(1) cnt from Employee e2 where e2.managerId is not null group by e2.managerId) ee
on e1.id = ee.managerId 
where ee.cnt >=5;
  • 平均数 可加条件
SELECT s.user_id,
ROUND(IFNULL(AVG(c.action='confirmed'), 0), 2) AS confirmation_rate
FROM
    Signups AS s
LEFT JOIN
    Confirmations AS c
ON s.user_id = c.user_id
GROUP BY s.user_id
  • 取余
select *
from cinema
where description != 'boring' and mod(id,2) = 1
order by rating desc
  • 灵活的子查询
select contest_id,
round(count(r.user_id)*100/(select count(u2.user_id) from users u2),2) percentage
from register r 
group by contest_id
order by percentage desc,contest_id asc
  • 一种count带条件计算方式
select query_name
,ROUND(avg(rating/position),2) quality 
,ROUND(count(rating < 3 or null)/count(1)*100,2) poor_query_percentage 
from Queries
group by query_name
  • 分组求首条,双字段匹配
select round (
    count(order_date = customer_pref_delivery_date or null) * 100 /count(*),2) as immediate_percentage
from Delivery 
where (customer_id, order_date) in (
 select customer_id, min(order_date)
    from delivery
    group by customer_id )
  • 同上,分组求最小日期,再匹配前一天日期
select round(a.cnt/b.cnt,2) as fraction 
from
(select count(1) cnt
from Activity
where (player_id, DATE_SUB(event_date, INTERVAL 1 DAY)) in 
(select player_id,MIN(event_date)
from Activity group by player_id)
) a
, 
(select count(distinct player_id) cnt from Activity ) b
  • 统计 仅 在某时间区间内存在记录的id,使用max和min以及having
select product_id, product_name
from Product
where product_id in(
    select product_id
    from Sales
    Group by product_id
    Having min(sale_date) >= '2019-01-01' and max(sale_date) <= '2019-03-31'
)
  • 多用having
SELECT
class
FROM
Courses
GROUP BY
class
HAVING COUNT(distinct student) >= 5