conn / as sysdba spool c:\temp\duplikate_loeschen_test.txt set lin 300 @ c:\temp\create_bigtab_2M set timing on set autotrace on explain statistics PROMPT Anzahl der Duplikate ermitteln SELECT COUNT(*), zahl FROM (SELECT owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary, COUNT(*) zahl FROM scott.big_tab GROUP BY owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary HAVING COUNT(*) > 1) GROUP BY zahl; ALTER SYSTEM FLUSH buffer_cache; ALTER SYSTEM FLUSH shared_pool; PROMPT Loeschen mit analytischen Funktionen DELETE FROM scott.big_tab WHERE rowid IN (SELECT rid FROM (SELECT rowid rid, ROW_NUMBER() OVER(PARTITION BY owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary ORDER BY rowid) rn FROM scott.big_tab) WHERE rn <> 1); @ c:\temp\create_bigtab_2M ALTER SYSTEM FLUSH buffer_cache; ALTER SYSTEM FLUSH shared_pool; PROMPT Loeschen mit analytischen Funktionen - Partitionierung über eine Spalte DELETE FROM scott.big_tab WHERE rowid IN (SELECT rid FROM (SELECT rowid rid, ROW_NUMBER() OVER (PARTITION BY object_id ORDER BY rowid) rn FROM scott.big_tab) WHERE rn <> 1); @ c:\temp\create_bigtab_2M ALTER SYSTEM FLUSH buffer_cache; ALTER SYSTEM FLUSH shared_pool; PROMPT Loeschen über korrelierte Unterabfrage DELETE FROM scott.big_tab b1 WHERE rowid < (SELECT MAX(rowid) FROM scott.big_tab b2 WHERE b1.owner = b2.owner AND b1.object_name = b2.object_name AND NVL(b1.subobject_name, 'nn') = NVL(b2.subobject_name, 'nn') AND b1.object_id = b2.object_id AND NVL(b1.data_object_id,0) = NVL(b2.data_object_id,0) AND b1.object_type = b2.object_type AND b1.created = b2.created AND NVL(b1.last_ddl_time,sysdate) = NVL(b2.last_ddl_time,sysdate) AND NVL(b1.timestamp, systimestamp) = NVL(b2.timestamp, systimestamp) AND b1.status = b2.status AND b1.temporary = b2.temporary AND b1.generated = b2.generated AND b1.secondary = b2.secondary); @ c:\temp\create_bigtab_2M ALTER SYSTEM FLUSH buffer_cache; ALTER SYSTEM FLUSH shared_pool; PROMPT Loeschen über nicht-korrelierte Unterabfrage DELETE FROM scott.big_tab WHERE rowid NOT IN (SELECT MIN(rowid) FROM scott.big_tab GROUP BY owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary); PROMPT Duplikate eliminieren ueber neue Tabelle conn scott/tiger PROMPT Tabelle ohne Duplikate erstellen CREATE TABLE big_tab2 AS SELECT id, owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary FROM (SELECT b.*, ROW_NUMBER() OVER(PARTITION BY owner, object_name, subobject_name, object_id, data_object_id, object_type, created, last_ddl_time, timestamp, status, temporary, generated, secondary ORDER BY rowid) rn FROM big_tab b) WHERE rn = 1; PROMPT alte Tabelle löschen DROP TABLE big_tab PURGE; PROMPT Primärschlüssel auf neuer Tabelle erstellen ALTER TABLE big_tab2 ADD CONSTRAINT big_tab_pk PRIMARY KEY(id); PROMPT neue Tabelle umbenennen RENAME big_tab2 TO big_tab; spool off DROP TABLE big_tab PURGE; exit