File: delete-duplicates.sql

package info (click to toggle)
python-stetl 2.0%2Bds-3
  • links: PTS, VCS
  • area: main
  • in suites: bullseye
  • size: 90,156 kB
  • sloc: python: 5,103; xml: 707; sql: 430; makefile: 154; sh: 65
file content (59 lines) | stat: -rw-r--r-- 2,212 bytes parent folder | download | duplicates (6)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
-- Auteur: Frank Steggink
-- Doel: script om dubbele records te verwijderen, middels een hulptabel

SET search_path={schema},public;

DROP TABLE IF EXISTS _nlx_temp;

CREATE OR REPLACE FUNCTION _nlx_dedup_data(tablename VARCHAR)
RETURNS VARCHAR AS
$$
DECLARE
    rowcount INTEGER;
BEGIN

    EXECUTE 'CREATE TABLE _nlx_temp AS SELECT ogc_fid, identificatie FROM ' || tablename || ' WHERE identificatie IN (SELECT identificatie FROM (SELECT identificatie, COUNT(*) AS aantal FROM ' || tablename || ' GROUP BY identificatie ORDER BY identificatie) AS x WHERE aantal>1);';

    EXECUTE 'DELETE FROM ' || tablename || ' WHERE ogc_fid IN (SELECT ogc_fid FROM _nlx_temp WHERE ogc_fid NOT IN (SELECT MIN(ogc_fid) FROM _nlx_temp GROUP BY identificatie));';
    GET DIAGNOSTICS rowcount = ROW_COUNT;

    DROP TABLE IF EXISTS _nlx_temp;

    RETURN tablename || ': ' || rowcount;

END;
$$
LANGUAGE plpgsql;

SELECT _nlx_dedup_data('functioneelgebied_punt');
SELECT _nlx_dedup_data('functioneelgebied_vlak');
SELECT _nlx_dedup_data('gebouw_vlak');
SELECT _nlx_dedup_data('geografischgebied_punt');
SELECT _nlx_dedup_data('geografischgebied_vlak');
SELECT _nlx_dedup_data('hoogteofdiepte_punt');
SELECT _nlx_dedup_data('hoogteverschilhz_lijn');
SELECT _nlx_dedup_data('hoogteverschillz_lijn');
SELECT _nlx_dedup_data('inrichtingselement_lijn');
SELECT _nlx_dedup_data('inrichtingselement_punt');
SELECT _nlx_dedup_data('isohoogte_lijn');
SELECT _nlx_dedup_data('kadeofwal_lijn');
SELECT _nlx_dedup_data('overigrelief_lijn');
SELECT _nlx_dedup_data('overigrelief_punt');
SELECT _nlx_dedup_data('registratiefgebied_punt');
SELECT _nlx_dedup_data('registratiefgebied_vlak');
SELECT _nlx_dedup_data('spoorbaandeel_lijn');
SELECT _nlx_dedup_data('spoorbaandeel_punt');
SELECT _nlx_dedup_data('terrein_vlak');
SELECT _nlx_dedup_data('waterdeel_lijn');
SELECT _nlx_dedup_data('waterdeel_punt');
SELECT _nlx_dedup_data('waterdeel_vlak');
SELECT _nlx_dedup_data('wegdeel_hartlijn');
SELECT _nlx_dedup_data('wegdeel_hartpunt');
SELECT _nlx_dedup_data('wegdeel_lijn');
SELECT _nlx_dedup_data('wegdeel_punt');
SELECT _nlx_dedup_data('wegdeel_vlak');

DROP FUNCTION _nlx_dedup_data(tablename VARCHAR);

SET search_path="$user",public;