重复数据查询

262 阅读1分钟

在几千条记录里,存在着些相同的记录,如何能用SQL语句,删除掉重复的数据。

单字段去重

  1. 查找出表中所有的重复数据
select *
  from #{table}
 where #{col} in
       (select #{col} from #{table} group by #{col} having count(1) > 1);
  1. 删除表中多余的重复数据
-- 表中存在唯一列
delete from #{table}
 where #{col} in
       (select #{col} from #{table} group by #{col} having count(1) > 1)
   and #{onlyCol} not in (select min(#{onlyCol})
                            from #{table}
                           group by #{col}
                          having count(1) > 1);
                          
-- 表中不存在唯一列:oracle
delete from #{table}
 where #{col} in
       (select #{col} from #{table} group by #{col} having count(1) > 1)
   and rowid not in
       (select min(rowid) from #{table} group by #{col} having count(1) > 1);

多字段去重

  1. 查找出表中所有的重复数据
select *
  from #{table}
 where (#{col1}, #{col2}, #{col3}.. .) in
       (select #{col1}, #{col2}, #{col3} .. .
          from #{table}
         group by #{col1}, #{col2}, #{col3} .. .
        having count(1) > 1)
  1. 删除表中多余的重复数据
-- 表中存在唯一列
delete from #{table}
 where (#{col1}, #{col2}, #{col3}.. .) in
       (select (#{col1}, #{col2}, #{col3}.. .)
          from #{table}
         group by #{col}
        having count(1) > 1)
   and #{onlyCol} not in (select min(#{onlyCol})
                            from #{table}
                           group by (#{col1}, #{col2}, #{col3}.. .)
                          having count(1) > 1);
                          
-- 表中不存在唯一列:oracle
delete from #{table}
 where (#{col1}, #{col2}, #{col3}.. .) in
       (select (#{col1}, #{col2}, #{col3}.. .)
          from #{table}
         group by (#{col1}, #{col2}, #{col3}.. .)
        having count(1) > 1)
   and rowid not in (select min(rowid)
                       from #{table}
                      group by (#{col1}, #{col2}, #{col3}.. .)
                     having count(1) > 1);