99from datetime import datetime , timezone
1010from pydantic import ValidationError
1111from mesh_download .errors import MeshMessageNotFound
12+ from mesh_download .document_store import DocumentAlreadyExistsError
1213
1314
1415def setup_mocks ():
@@ -19,6 +20,7 @@ def setup_mocks():
1920 # Set up default config attributes
2021 config .mesh_client = Mock ()
2122 config .download_metric = Mock ()
23+ config .duplicate_download_metric = Mock ()
2224 config .s3_client = Mock ()
2325 config .environment = 'development'
2426 config .transactional_data_bucket = 'test-pii-bucket'
@@ -48,9 +50,9 @@ def create_valid_cloud_event():
4850 'traceparent' : '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01' ,
4951 'dataschema' : 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-mesh-inbox-message-received-data.schema.json' ,
5052 'data' : {
51- 'meshMessageId' : 'test_message_123 ' ,
52- 'senderId' : 'TEST_SENDER ' ,
53- 'messageReference' : 'ref_001 '
53+ 'meshMessageId' : 'test-message-123 ' ,
54+ 'senderId' : 'TEST-SENDER ' ,
55+ 'messageReference' : 'ref-001 '
5456 }
5557 }
5658
@@ -99,6 +101,7 @@ def test_processor_initialization_calls_mesh_handshake(self):
99101 log = log ,
100102 mesh_client = config .mesh_client ,
101103 download_metric = config .download_metric ,
104+ duplicate_download_metric = config .duplicate_download_metric ,
102105 document_store = document_store ,
103106 event_publisher = event_publisher
104107 )
@@ -115,7 +118,7 @@ def test_process_sqs_message_success(self, mock_datetime):
115118 fixed_time = datetime (2025 , 11 , 19 , 15 , 30 , 45 , tzinfo = timezone .utc )
116119 mock_datetime .now .return_value = fixed_time
117120
118- document_store .store_document .return_value = 'document-reference/SENDER_001_ref_001 '
121+ document_store .store_document .return_value = 'document-reference/SENDER-001/ref-001_test-message-123 '
119122
120123 event_publisher .send_events .return_value = []
121124
@@ -124,6 +127,7 @@ def test_process_sqs_message_success(self, mock_datetime):
124127 log = log ,
125128 mesh_client = config .mesh_client ,
126129 download_metric = config .download_metric ,
130+ duplicate_download_metric = config .duplicate_download_metric ,
127131 document_store = document_store ,
128132 event_publisher = event_publisher
129133 )
@@ -133,15 +137,17 @@ def test_process_sqs_message_success(self, mock_datetime):
133137
134138 sqs_record = create_sqs_record ()
135139
136- processor .process_sqs_message (sqs_record )
140+ outcome = processor .process_sqs_message (sqs_record )
137141
138- config .mesh_client .retrieve_message .assert_called_once_with ('test_message_123' )
142+ assert outcome == 'downloaded'
143+ config .mesh_client .retrieve_message .assert_called_once_with ('test-message-123' )
139144
140145 mesh_message .read .assert_called_once ()
141146
142147 document_store .store_document .assert_called_once_with (
143- sender_id = 'TEST_SENDER' ,
144- message_reference = 'ref_001' ,
148+ sender_id = 'TEST-SENDER' ,
149+ message_reference = 'ref-001' ,
150+ mesh_message_id = 'test-message-123' ,
145151 content = b'Test message content'
146152 )
147153
@@ -172,9 +178,9 @@ def test_process_sqs_message_success(self, mock_datetime):
172178
173179 # Verify CloudEvent data payload
174180 event_data = published_event ['data' ]
175- assert event_data ['senderId' ] == 'TEST_SENDER '
176- assert event_data ['messageReference' ] == 'ref_001 '
177- assert event_data ['messageUri' ] == 's3://test-pii-bucket/document-reference/SENDER_001_ref_001 '
181+ assert event_data ['senderId' ] == 'TEST-SENDER '
182+ assert event_data ['messageReference' ] == 'ref-001 '
183+ assert event_data ['messageUri' ] == 's3://test-pii-bucket/document-reference/SENDER-001/ref-001_test-message-123 '
178184 assert set (event_data .keys ()) == {'senderId' , 'messageReference' , 'messageUri' , 'meshMessageId' }
179185
180186 def test_process_sqs_message_validation_failure (self ):
@@ -188,6 +194,7 @@ def test_process_sqs_message_validation_failure(self):
188194 log = log ,
189195 mesh_client = config .mesh_client ,
190196 download_metric = config .download_metric ,
197+ duplicate_download_metric = config .duplicate_download_metric ,
191198 document_store = document_store ,
192199 event_publisher = event_publisher
193200 )
@@ -212,6 +219,7 @@ def test_process_sqs_message_missing_mesh_message_id(self):
212219 log = log ,
213220 mesh_client = config .mesh_client ,
214221 download_metric = config .download_metric ,
222+ duplicate_download_metric = config .duplicate_download_metric ,
215223 document_store = document_store ,
216224 event_publisher = event_publisher
217225 )
@@ -239,17 +247,18 @@ def test_download_and_store_message_not_found(self):
239247 log = log ,
240248 mesh_client = config .mesh_client ,
241249 download_metric = config .download_metric ,
250+ duplicate_download_metric = config .duplicate_download_metric ,
242251 document_store = document_store ,
243252 event_publisher = event_publisher
244253 )
245254
246255 config .mesh_client .retrieve_message .return_value = None
247256 sqs_record = create_sqs_record ()
248257
249- with pytest .raises (MeshMessageNotFound , match = "MESH message with ID test_message_123 not found" ):
258+ with pytest .raises (MeshMessageNotFound , match = "MESH message with ID test-message-123 not found" ):
250259 processor .process_sqs_message (sqs_record )
251260
252- config .mesh_client .retrieve_message .assert_called_once_with ('test_message_123 ' )
261+ config .mesh_client .retrieve_message .assert_called_once_with ('test-message-123 ' )
253262 document_store .store_document .assert_not_called ()
254263 event_publisher .send_events .assert_not_called ()
255264 config .download_metric .record .assert_not_called ()
@@ -269,6 +278,7 @@ def test_document_store_failure_prevents_ack_and_raises(self):
269278 log = log ,
270279 mesh_client = config .mesh_client ,
271280 download_metric = config .download_metric ,
281+ duplicate_download_metric = config .duplicate_download_metric ,
272282 document_store = document_store ,
273283 event_publisher = event_publisher
274284 )
@@ -304,6 +314,7 @@ def test_bucket_selection_with_mesh_mock_enabled(self, mock_datetime):
304314 log = log ,
305315 mesh_client = config .mesh_client ,
306316 download_metric = config .download_metric ,
317+ duplicate_download_metric = config .duplicate_download_metric ,
307318 document_store = document_store ,
308319 event_publisher = event_publisher
309320 )
@@ -312,8 +323,9 @@ def test_bucket_selection_with_mesh_mock_enabled(self, mock_datetime):
312323 config .mesh_client .retrieve_message .return_value = mesh_message
313324 sqs_record = create_sqs_record ()
314325
315- processor .process_sqs_message (sqs_record )
326+ outcome = processor .process_sqs_message (sqs_record )
316327
328+ assert outcome == 'downloaded'
317329 # Verify event was published with PII bucket in URI
318330 event_publisher .send_events .assert_called_once ()
319331 published_events = event_publisher .send_events .call_args [0 ][0 ]
@@ -342,6 +354,7 @@ def test_bucket_selection_with_mesh_mock_disabled(self, mock_datetime):
342354 log = log ,
343355 mesh_client = config .mesh_client ,
344356 download_metric = config .download_metric ,
357+ duplicate_download_metric = config .duplicate_download_metric ,
345358 document_store = document_store ,
346359 event_publisher = event_publisher
347360 )
@@ -350,10 +363,52 @@ def test_bucket_selection_with_mesh_mock_disabled(self, mock_datetime):
350363 config .mesh_client .retrieve_message .return_value = mesh_message
351364 sqs_record = create_sqs_record ()
352365
353- processor .process_sqs_message (sqs_record )
366+ outcome = processor .process_sqs_message (sqs_record )
354367
368+ assert outcome == 'downloaded'
355369 event_publisher .send_events .assert_called_once ()
356370 published_events = event_publisher .send_events .call_args [0 ][0 ]
357371 assert len (published_events ) == 1
358372 message_uri = published_events [0 ]['data' ]['messageUri' ]
359373 assert message_uri .startswith ('s3://test-pii-bucket/' )
374+
375+ def test_duplicate_delivery_skips_publish_and_acknowledge (self ):
376+ """When S3 signals the object already exists, processor logs a warning, skips publishing and metric, but still acknowledges"""
377+ from mesh_download .processor import MeshDownloadProcessor
378+
379+ config , log , event_publisher , document_store = setup_mocks ()
380+ bound_logger = Mock ()
381+ log .bind .return_value = bound_logger
382+
383+ document_store .store_document .side_effect = DocumentAlreadyExistsError (
384+ "Document already exists for key: document-reference/TEST-SENDER/ref-001_mesh-123"
385+ )
386+
387+ processor = MeshDownloadProcessor (
388+ config = config ,
389+ log = log ,
390+ mesh_client = config .mesh_client ,
391+ download_metric = config .download_metric ,
392+ duplicate_download_metric = config .duplicate_download_metric ,
393+ document_store = document_store ,
394+ event_publisher = event_publisher
395+ )
396+
397+ mesh_message = create_mesh_message ()
398+ config .mesh_client .retrieve_message .return_value = mesh_message
399+ sqs_record = create_sqs_record ()
400+
401+ # Should complete without raising
402+ outcome = processor .process_sqs_message (sqs_record )
403+
404+ assert outcome == 'skipped'
405+ bound_logger .warning .assert_called_once ()
406+ warning_msg = bound_logger .warning .call_args [0 ][0 ]
407+ assert "already stored" in warning_msg
408+ config .duplicate_download_metric .record .assert_called_once ()
409+
410+ event_publisher .send_events .assert_not_called ()
411+ config .download_metric .record .assert_not_called ()
412+
413+ # Acknowledge should still be called to remove message from MESH inbox
414+ mesh_message .acknowledge .assert_called_once ()
0 commit comments