在几千条记录里,存在着些相同的记录,如何能用SQL语句,删除掉重复的数据。
单字段去重
- 查找出表中所有的重复数据
select *
from #{table}
where #{col} in
(select #{col} from #{table} group by #{col} having count(1) > 1);
- 删除表中多余的重复数据
-- 表中存在唯一列
delete from #{table}
where #{col} in
(select #{col} from #{table} group by #{col} having count(1) > 1)
and #{onlyCol} not in (select min(#{onlyCol})
from #{table}
group by #{col}
having count(1) > 1);
-- 表中不存在唯一列:oracle
delete from #{table}
where #{col} in
(select #{col} from #{table} group by #{col} having count(1) > 1)
and rowid not in
(select min(rowid) from #{table} group by #{col} having count(1) > 1);
多字段去重
- 查找出表中所有的重复数据
select *
from #{table}
where (#{col1}, #{col2}, #{col3}.. .) in
(select #{col1}, #{col2}, #{col3} .. .
from #{table}
group by #{col1}, #{col2}, #{col3} .. .
having count(1) > 1)
- 删除表中多余的重复数据
-- 表中存在唯一列
delete from #{table}
where (#{col1}, #{col2}, #{col3}.. .) in
(select (#{col1}, #{col2}, #{col3}.. .)
from #{table}
group by #{col}
having count(1) > 1)
and #{onlyCol} not in (select min(#{onlyCol})
from #{table}
group by (#{col1}, #{col2}, #{col3}.. .)
having count(1) > 1);
-- 表中不存在唯一列:oracle
delete from #{table}
where (#{col1}, #{col2}, #{col3}.. .) in
(select (#{col1}, #{col2}, #{col3}.. .)
from #{table}
group by (#{col1}, #{col2}, #{col3}.. .)
having count(1) > 1)
and rowid not in (select min(rowid)
from #{table}
group by (#{col1}, #{col2}, #{col3}.. .)
having count(1) > 1);