insert into match_candidates (row1_id,row2_id,strat_name,status) select t1.row_id, t2.row_id, 'name_address_city_zip_county_units_weak_improved_trim_v5', case when t1.shim_id = t2.shim_id then 'good' else 'bad' end from merge_list t1, merge_list t2 where t1.row_id < t2.row_id -- looking for matches from the AHI-Doug source to some -- other source and ( (t1.source = 'AHI-Doug' or t2.source = 'AHI-Doug') and (t1.source != t2.source) ) -- no duplicate matches from known phase things -- this can be weakened to show no previous -- matches from _any_ stratagy and ( not exists ( select * from match_candidates m where t1.row_id = m.row1_id and t2.row_id = m.row2_id ) ) -- strategy layers start here (assume at least one) AND -- similar name (weak) t1.name is not null and t2.name is not null and ( regexp_replace(lower(t1.name),'[^a-z ]+','','g') ~ regexp_replace(lower(t2.name),'[^a-z ]+','','g') or regexp_replace(lower(t2.name),'[^a-z ]+','','g') ~ regexp_replace(lower(t1.name),'[^a-z ]+','','g') ) AND -- similar address (weak) t1.address is not null and t2.address is not null and ( regexp_replace(lower(t1.address),'[^a-z ]+','','g') ~ regexp_replace(lower(t2.address),'[^a-z ]+','','g') or regexp_replace(lower(t2.address),'[^a-z ]+','','g') ~ regexp_replace(lower(t1.address),'[^a-z ]+','','g') ) AND -- similar city (weak) t1.city is not null and t2.city is not null and ( regexp_replace(lower(t1.city),'[^a-z ]+','','g') ~ regexp_replace(lower(t2.city),'[^a-z ]+','','g') or regexp_replace(lower(t2.city),'[^a-z ]+','','g') ~ regexp_replace(lower(t1.city),'[^a-z ]+','','g') )