|
| 1 | +-- Commands are written on one line for psql execution, can be formatted for readability when running in a SQL client. |
| 2 | + |
| 3 | +-- This script performs the following steps: |
| 4 | +-- 1 - imports new data from csv file. The csv file is expected to be a full export of all partner records, but only new records will be inserted into the permanent table. |
| 5 | +-- 2 - creates a list of email addresses of participants eligible for incentives |
| 6 | +-- 3 - updates participants that have been exported so that they are timestamped as having received an incentive |
| 7 | + |
| 8 | +-- Incentive eligibility is based on: |
| 9 | +-- - having submitted the questionnaire |
| 10 | +-- - not having already received an incentive |
| 11 | +-- - Online submitted_at date is earlier than telephone questionnaire date_conducted. |
| 12 | + |
| 13 | +-- Partner import strategy: |
| 14 | +-- The partner sends a full CSV export every week. Rather than replacing all data each run, we insert only |
| 15 | +-- new records using a UNIQUE index on (nhs_number, conducted_at) and ON CONFLICT DO NOTHING. |
| 16 | +-- Records removed from the partner's source will NOT be deleted. |
| 17 | + |
| 18 | + |
| 19 | +-- Steps to follow: |
| 20 | +-- 1. Log into AVD. |
| 21 | +-- 2. Upload csv file to AVD |
| 22 | +-- 3. Find file in RemoteVirtualDrive and copy to accessible location for psql COPY command. |
| 23 | +-- 4. PATH_TO_FILE - search for this and replace with actual file path. |
| 24 | +-- 5. Login to DB in AVD. |
| 25 | + |
| 26 | + |
| 27 | +-- ============================================================ |
| 28 | +-- RUN ONCE: Create permanent partner import table |
| 29 | +-- Only run this block on first setup. The unique constraint on |
| 30 | +-- (nhs_number, conducted_at) prevents duplicate rows being |
| 31 | +-- inserted on subsequent weekly loads. |
| 32 | +-- ============================================================ |
| 33 | +CREATE TABLE IF NOT EXISTS inhealth_partner_data (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, conducted_at TIMESTAMPTZ, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT, created_at TIMESTAMPTZ NOT NULL DEFAULT now(), CONSTRAINT uq_partner_nhs_conducted_at UNIQUE (nhs_number, conducted_at)); |
| 34 | + |
| 35 | + |
| 36 | +-- ============================================================ |
| 37 | +-- RUN WEEKLY: Load new partner records from CSV |
| 38 | +-- Step 1: Import CSV into a temporary staging table. |
| 39 | +-- Step 2: Insert only rows where (nhs_number, conducted_at) |
| 40 | +-- do not already exist in the permanent table. |
| 41 | +-- Existing rows are silently skipped (DO NOTHING). |
| 42 | +-- ============================================================ |
| 43 | + |
| 44 | +-- Step 1: Create staging table and load CSV |
| 45 | +CREATE TEMP TABLE tmp_incentive_partner_staging (nhs_number TEXT, date_of_birth TEXT, date_conducted TEXT, smoking_status TEXT, average_cigarettes_per_day_while_smoking TEXT, duration_smoked_years TEXT, years_since_quitting_smoking TEXT, height_measurement_type TEXT, height_measurement_value_metric_cm TEXT, weight_measurement_type TEXT, weight_measurement_value_metric_kg TEXT, previous_respiratory_diagnosis TEXT, personal_history_of_previous_cancer TEXT, family_history_of_lung_cancer TEXT, personal_history_of_asthma TEXT, asbestos_exposure_from_job_or_activity TEXT, education TEXT, ethnicity TEXT, plco_lung_cancer_risk_score TEXT, llp_lung_cancer_risk_score TEXT); |
| 46 | + |
| 47 | +-- Copy data from file into staging table - update PATH_TO_FILE before running |
| 48 | + |
| 49 | +\copy tmp_incentive_partner_staging (nhs_number, date_of_birth, date_conducted, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) FROM 'PATH_TO_FILE' WITH (FORMAT csv, HEADER true); |
| 50 | + |
| 51 | +-- Step 2: Insert all rows from staging. Existing rows with the same (nhs_number, conducted_at) |
| 52 | +-- are skipped. |
| 53 | +INSERT INTO inhealth_partner_data (nhs_number, date_of_birth, date_conducted, conducted_at, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score) SELECT nhs_number, date_of_birth, date_conducted, to_timestamp(NULLIF(date_conducted, ''), 'DD/MM/YYYY HH24:MI')::timestamptz, smoking_status, average_cigarettes_per_day_while_smoking, duration_smoked_years, years_since_quitting_smoking, height_measurement_type, height_measurement_value_metric_cm, weight_measurement_type, weight_measurement_value_metric_kg, previous_respiratory_diagnosis, personal_history_of_previous_cancer, family_history_of_lung_cancer, personal_history_of_asthma, asbestos_exposure_from_job_or_activity, education, ethnicity, plco_lung_cancer_risk_score, llp_lung_cancer_risk_score FROM tmp_incentive_partner_staging ON CONFLICT (nhs_number, conducted_at) DO NOTHING; |
| 54 | + |
| 55 | +-- Delete temporary staging table |
| 56 | +DROP TABLE IF EXISTS tmp_incentive_partner_staging; |
| 57 | + |
| 58 | + |
| 59 | +-- TRANSACTION START for exporting eligible participants for incentives and updating incentivised table. |
| 60 | +-- Update PATH_TO_EXPORT_FILE before running. |
| 61 | + |
| 62 | +\r |
| 63 | +BEGIN; |
| 64 | +CREATE TEMP TABLE tmp_eligible_incentive_export AS WITH canonical_users AS (SELECT DISTINCT ON (nhs_number) id, email, given_name, family_name, nhs_number FROM questions_user WHERE nhs_number IS NOT NULL ORDER BY nhs_number ASC, created_at DESC) SELECT DISTINCT ON (cu.nhs_number) cu.id AS user_id, qrs.id AS response_set_id, cu.email, cu.given_name, cu.family_name FROM canonical_users cu JOIN questions_responseset qrs ON qrs.user_id = cu.id JOIN inhealth_partner_data ipd ON ipd.nhs_number = cu.nhs_number WHERE ipd.conducted_at > qrs.submitted_at::timestamptz AND NOT EXISTS (SELECT 1 FROM questions_incentivised qi JOIN questions_user iu ON iu.id = qi.user_id WHERE iu.nhs_number = cu.nhs_number) ORDER BY cu.nhs_number ASC, qrs.submitted_at DESC, qrs.id DESC; |
| 65 | +\copy (SELECT email, given_name, family_name FROM tmp_eligible_incentive_export ORDER BY family_name) TO 'PATH_TO_EXPORT_FILE' WITH (FORMAT csv, HEADER true); |
| 66 | +INSERT INTO questions_incentivised (created_at, updated_at, incentivised_at, user_id, response_set_id) SELECT now(), now(), now(), user_id, response_set_id FROM tmp_eligible_incentive_export; |
| 67 | +SELECT count(*) AS rows_exported_and_marked FROM tmp_eligible_incentive_export; |
| 68 | + |
| 69 | +-- If happy with the Select result type COMMIT; if not, ROLLBACK; to undo changes; |
| 70 | + |
| 71 | +-- COMMIT; |
| 72 | +-- ROLLBACK; |
| 73 | + |
| 74 | +-- TRANSACTION END |
| 75 | + |
| 76 | +-- DELETE temporary export table |
| 77 | +DROP TABLE IF EXISTS tmp_eligible_incentive_export; |
0 commit comments