diff --git a/infrastructure/terraform/components/dl/README.md b/infrastructure/terraform/components/dl/README.md
index f763d5c0a..964704f0f 100644
--- a/infrastructure/terraform/components/dl/README.md
+++ b/infrastructure/terraform/components/dl/README.md
@@ -42,6 +42,7 @@ No requirements.
| Name | Source | Version |
|------|--------|---------|
| [core\_notifier](#module\_core\_notifier) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
+| [file\_scanner](#module\_file\_scanner) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-kms.zip | n/a |
| [lambda\_apim\_key\_generation](#module\_lambda\_apim\_key\_generation) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
| [lambda\_lambda\_apim\_refresh\_token](#module\_lambda\_lambda\_apim\_refresh\_token) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
@@ -65,6 +66,7 @@ No requirements.
| [sqs\_pdm\_uploader](#module\_sqs\_pdm\_uploader) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| [sqs\_print\_analyser](#module\_sqs\_print\_analyser) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.30/terraform-sqs.zip | n/a |
| [sqs\_print\_status\_handler](#module\_sqs\_print\_status\_handler) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.30/terraform-sqs.zip | n/a |
+| [sqs\_scanner](#module\_sqs\_scanner) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| [sqs\_ttl](#module\_sqs\_ttl) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| [sqs\_ttl\_handle\_expiry\_errors](#module\_sqs\_ttl\_handle\_expiry\_errors) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a |
| [ttl\_create](#module\_ttl\_create) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a |
diff --git a/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf b/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf
new file mode 100644
index 000000000..22a31d84f
--- /dev/null
+++ b/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf
@@ -0,0 +1,19 @@
+resource "aws_cloudwatch_event_rule" "item_dequeued" {
+ name = "${local.csi}-item-dequeued"
+ description = "Queue item dequeued event rule"
+ event_bus_name = aws_cloudwatch_event_bus.main.name
+
+ event_pattern = jsonencode({
+ "detail" : {
+ "type" : [
+ "uk.nhs.notify.digital.letters.queue.item.dequeued.v1"
+ ]
+ }
+ })
+}
+
+resource "aws_cloudwatch_event_target" "item_dequeued_scanner" {
+ rule = aws_cloudwatch_event_rule.item_dequeued.name
+ arn = module.sqs_scanner.sqs_queue_arn
+ event_bus_name = aws_cloudwatch_event_bus.main.name
+}
diff --git a/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf b/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf
new file mode 100644
index 000000000..7e946716d
--- /dev/null
+++ b/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf
@@ -0,0 +1,7 @@
+resource "aws_lambda_event_source_mapping" "file_scanner" {
+ event_source_arn = module.sqs_scanner.sqs_queue_arn
+ function_name = module.file_scanner.function_name
+ batch_size = 10
+ maximum_batching_window_in_seconds = 5
+ function_response_types = ["ReportBatchItemFailures"]
+}
diff --git a/infrastructure/terraform/components/dl/locals.tf b/infrastructure/terraform/components/dl/locals.tf
index 9298b97b9..da187ad9e 100644
--- a/infrastructure/terraform/components/dl/locals.tf
+++ b/infrastructure/terraform/components/dl/locals.tf
@@ -5,6 +5,7 @@ locals {
apim_api_key_ssm_parameter_name = "/${var.component}/${var.environment}/apim/api_key"
apim_private_key_ssm_parameter_name = "/${var.component}/${var.environment}/apim/private_key"
apim_keystore_s3_bucket = "nhs-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-static-assets"
+ unscanned_files_bucket = local.acct.additional_s3_buckets["digital-letters_unscanned-files"]["id"]
ssm_mesh_prefix = "/${var.component}/${var.environment}/mesh"
mock_mesh_endpoint = "s3://${module.s3bucket_non_pii_data.bucket}/mock-mesh"
root_domain_name = "${var.environment}.${local.acct.route53_zone_names["digital-letters"]}"
diff --git a/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf b/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf
new file mode 100644
index 000000000..877550684
--- /dev/null
+++ b/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf
@@ -0,0 +1,131 @@
+module "file_scanner" {
+ source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip"
+
+ function_name = "file-scanner"
+ description = "A function for extracting PDFs from DocumentReference and storing in UnscannedFiles bucket for virus scanning"
+
+ aws_account_id = var.aws_account_id
+ component = local.component
+ environment = var.environment
+ project = var.project
+ region = var.region
+ group = var.group
+
+ log_retention_in_days = var.log_retention_in_days
+ kms_key_arn = module.kms.key_arn
+
+ iam_policy_document = {
+ body = data.aws_iam_policy_document.file_scanner_lambda.json
+ }
+
+ function_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"]
+ function_code_base_path = local.aws_lambda_functions_dir_path
+ function_code_dir = "file-scanner-lambda/dist"
+ function_include_common = true
+ handler_function_name = "handler"
+ runtime = "nodejs22.x"
+ memory = 512
+ timeout = 60
+ log_level = var.log_level
+
+ force_lambda_code_deploy = var.force_lambda_code_deploy
+ enable_lambda_insights = false
+
+ log_destination_arn = local.log_destination_arn
+ log_subscription_role_arn = local.acct.log_subscription_role_arn
+
+ lambda_env_vars = {
+ "DOCUMENT_REFERENCE_BUCKET" = module.s3bucket_pii_data.bucket
+ "UNSCANNED_FILES_BUCKET" = local.unscanned_files_bucket
+ "UNSCANNED_FILES_PATH_PREFIX" = var.environment
+ "EVENT_PUBLISHER_EVENT_BUS_ARN" = aws_cloudwatch_event_bus.main.arn
+ "EVENT_PUBLISHER_DLQ_URL" = module.sqs_event_publisher_errors.sqs_queue_url
+ }
+}
+
+data "aws_iam_policy_document" "file_scanner_lambda" {
+ statement {
+ sid = "ReadDocumentReferenceBucket"
+ effect = "Allow"
+
+ actions = [
+ "s3:GetObject",
+ "s3:ListBucket",
+ ]
+
+ resources = [
+ module.s3bucket_pii_data.arn,
+ "${module.s3bucket_pii_data.arn}/*",
+ ]
+ }
+
+ statement {
+ sid = "WriteUnscannedFilesBucket"
+ effect = "Allow"
+
+ actions = [
+ "s3:PutObject",
+ ]
+
+ resources = [
+ "arn:aws:s3:::${local.unscanned_files_bucket}/*",
+ ]
+ }
+
+ statement {
+ sid = "PutEvents"
+ effect = "Allow"
+
+ actions = [
+ "events:PutEvents",
+ ]
+
+ resources = [
+ aws_cloudwatch_event_bus.main.arn,
+ ]
+ }
+
+ statement {
+ sid = "SQSPermissionsDLQs"
+ effect = "Allow"
+
+ actions = [
+ "sqs:SendMessage",
+ "sqs:SendMessageBatch",
+ ]
+
+ resources = [
+ module.sqs_event_publisher_errors.sqs_queue_arn,
+ ]
+ }
+
+ statement {
+ sid = "SQSPermissionsScannerQueue"
+ effect = "Allow"
+
+ actions = [
+ "sqs:ReceiveMessage",
+ "sqs:DeleteMessage",
+ "sqs:GetQueueAttributes",
+ "sqs:GetQueueUrl",
+ ]
+
+ resources = [
+ module.sqs_scanner.sqs_queue_arn,
+ ]
+ }
+
+ statement {
+ sid = "KMSPermissions"
+ effect = "Allow"
+
+ actions = [
+ "kms:Decrypt",
+ "kms:GenerateDataKey",
+ ]
+
+ resources = [
+ module.kms.key_arn,
+ ]
+ }
+}
diff --git a/infrastructure/terraform/components/dl/module_sqs_scanner.tf b/infrastructure/terraform/components/dl/module_sqs_scanner.tf
new file mode 100644
index 000000000..248fe16aa
--- /dev/null
+++ b/infrastructure/terraform/components/dl/module_sqs_scanner.tf
@@ -0,0 +1,41 @@
+module "sqs_scanner" {
+ source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip"
+
+ aws_account_id = var.aws_account_id
+ component = local.component
+ environment = var.environment
+ project = var.project
+ region = var.region
+ name = "file-scanner"
+ sqs_kms_key_arn = module.kms.key_arn
+ visibility_timeout_seconds = 60
+ delay_seconds = 0
+ create_dlq = true
+ sqs_policy_overload = data.aws_iam_policy_document.sqs_scanner.json
+}
+
+data "aws_iam_policy_document" "sqs_scanner" {
+ statement {
+ sid = "AllowEventBridgeToSendMessage"
+ effect = "Allow"
+
+ principals {
+ type = "Service"
+ identifiers = ["events.amazonaws.com"]
+ }
+
+ actions = [
+ "sqs:SendMessage"
+ ]
+
+ resources = [
+ "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-file-scanner-queue"
+ ]
+
+ condition {
+ test = "ArnEquals"
+ variable = "aws:SourceArn"
+ values = [aws_cloudwatch_event_rule.item_dequeued.arn]
+ }
+ }
+}
diff --git a/lambdas/file-scanner-lambda/jest.config.ts b/lambdas/file-scanner-lambda/jest.config.ts
new file mode 100644
index 000000000..c02601aec
--- /dev/null
+++ b/lambdas/file-scanner-lambda/jest.config.ts
@@ -0,0 +1,5 @@
+import { baseJestConfig } from '../../jest.config.base';
+
+const config = baseJestConfig;
+
+export default config;
diff --git a/lambdas/file-scanner-lambda/package.json b/lambdas/file-scanner-lambda/package.json
new file mode 100644
index 000000000..b530f0e98
--- /dev/null
+++ b/lambdas/file-scanner-lambda/package.json
@@ -0,0 +1,26 @@
+{
+ "dependencies": {
+ "@aws-sdk/client-s3": "^3.908.0",
+ "aws-lambda": "^1.0.7",
+ "digital-letters-events": "^0.0.1",
+ "utils": "^0.0.1"
+ },
+ "devDependencies": {
+ "@tsconfig/node22": "^22.0.2",
+ "@types/aws-lambda": "^8.10.155",
+ "@types/jest": "^29.5.14",
+ "jest": "^29.7.0",
+ "jest-mock-extended": "^3.0.7",
+ "typescript": "^5.9.3"
+ },
+ "name": "nhs-notify-digital-letters-file-scanner-lambda",
+ "private": true,
+ "scripts": {
+ "lambda-build": "rm -rf dist && npx esbuild --bundle --minify --sourcemap --target=es2020 --platform=node --loader:.node=file --entry-names=[name] --outdir=dist src/index.ts",
+ "lint": "eslint .",
+ "lint:fix": "eslint . --fix",
+ "test:unit": "jest",
+ "typecheck": "tsc --noEmit"
+ },
+ "version": "0.0.1"
+}
diff --git a/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts b/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts
new file mode 100644
index 000000000..6fb343295
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts
@@ -0,0 +1,505 @@
+import { createHandler } from 'apis/sqs-handler';
+import type { SQSEvent } from 'aws-lambda';
+import { mockDeep } from 'jest-mock-extended';
+import type { HandlerDependencies } from 'apis/sqs-handler';
+import { FileScanner } from 'app/file-scanner';
+import { EventPublisher, Logger } from 'utils';
+
+const mockFileScanner = mockDeep();
+const mockEventPublisher = mockDeep();
+const mockLogger = mockDeep();
+
+const createSQSEvent = (
+ records: {
+ messageId: string;
+ body: string;
+ }[],
+): SQSEvent => ({
+ Records: records.map((record) => ({
+ messageId: record.messageId,
+ receiptHandle: 'test-receipt-handle',
+ body: record.body,
+ attributes: {
+ ApproximateReceiveCount: '1',
+ SentTimestamp: '1',
+ SenderId: 'test',
+ ApproximateFirstReceiveTimestamp: '1',
+ },
+ messageAttributes: {},
+ md5OfBody: 'test',
+ eventSource: 'aws:sqs',
+ eventSourceARN: 'arn:aws:sqs:us-east-1:123456789012:test-queue',
+ awsRegion: 'us-east-1',
+ })),
+});
+
+const createValidItemDequeuedBody = (
+ messageReference: string,
+ senderId: string,
+ messageUri: string,
+) =>
+ JSON.stringify({
+ detail: {
+ specversion: '1.0',
+ id: `event-${messageReference}`,
+ source:
+ '/nhs/england/notify/development/primary/data-plane/digitalletters/queue',
+ subject: `message/${messageReference}`,
+ type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1',
+ time: '2026-01-19T12:00:00Z',
+ recordedtime: '2026-01-19T12:00:00.100Z',
+ datacontenttype: 'application/json',
+ dataschema:
+ 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json',
+ traceparent: '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01',
+ severitynumber: 2,
+ data: {
+ messageReference,
+ senderId,
+ messageUri,
+ },
+ },
+ });
+
+describe('SQS Handler', () => {
+ let handler: ReturnType;
+ let dependencies: HandlerDependencies;
+
+ beforeEach(() => {
+ jest.clearAllMocks();
+ dependencies = {
+ eventPublisher: mockEventPublisher,
+ logger: mockLogger,
+ fileScanner: mockFileScanner,
+ };
+ handler = createHandler(dependencies);
+ });
+
+ describe('successful processing', () => {
+ it('should process valid ItemDequeued events successfully', async () => {
+ mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' });
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([]);
+ expect(mockFileScanner.scanFile).toHaveBeenCalledWith('s3://bucket/key', {
+ messageReference: 'ref-001',
+ senderId: 'SENDER_001',
+ createdAt: '2026-01-19T12:00:00Z',
+ });
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Starting file scanner batch',
+ recordCount: 1,
+ }),
+ );
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Processing ItemDequeued event',
+ messageReference: 'ref-001',
+ senderId: 'SENDER_001',
+ }),
+ );
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 1,
+ failureCount: 0,
+ }),
+ );
+ });
+
+ it('should process multiple valid events in batch', async () => {
+ mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' });
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key1',
+ ),
+ },
+ {
+ messageId: 'msg-002',
+ body: createValidItemDequeuedBody(
+ 'ref-002',
+ 'SENDER_002',
+ 's3://bucket/key2',
+ ),
+ },
+ {
+ messageId: 'msg-003',
+ body: createValidItemDequeuedBody(
+ 'ref-003',
+ 'SENDER_003',
+ 's3://bucket/key3',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([]);
+ expect(mockFileScanner.scanFile).toHaveBeenCalledTimes(3);
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 3,
+ failureCount: 0,
+ }),
+ );
+ });
+ });
+
+ describe('failure handling', () => {
+ it('should return failed message when file scanner fails', async () => {
+ mockFileScanner.scanFile.mockResolvedValue({
+ outcome: 'failed',
+ errorMessage: 'PDF extraction failed',
+ });
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ ]);
+
+ expect(mockLogger.error).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error processing record',
+ messageId: 'msg-001',
+ }),
+ );
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 0,
+ failureCount: 1,
+ }),
+ );
+ });
+
+ it('should handle partial batch failures', async () => {
+ mockFileScanner.scanFile
+ .mockResolvedValueOnce({ outcome: 'success' })
+ .mockResolvedValueOnce({
+ outcome: 'failed',
+ errorMessage: 'Error',
+ })
+ .mockResolvedValueOnce({ outcome: 'success' });
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key1',
+ ),
+ },
+ {
+ messageId: 'msg-002',
+ body: createValidItemDequeuedBody(
+ 'ref-002',
+ 'SENDER_002',
+ 's3://bucket/key2',
+ ),
+ },
+ {
+ messageId: 'msg-003',
+ body: createValidItemDequeuedBody(
+ 'ref-003',
+ 'SENDER_003',
+ 's3://bucket/key3',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-002' },
+ ]);
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 2,
+ failureCount: 1,
+ }),
+ );
+ });
+
+ it('should handle fileScanner throwing exception', async () => {
+ mockFileScanner.scanFile.mockRejectedValue(new Error('Unexpected error'));
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ ]);
+ });
+
+ it('should handle fileScanner throwing non-Error object', async () => {
+ mockFileScanner.scanFile.mockRejectedValue('string error');
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-002',
+ body: createValidItemDequeuedBody(
+ 'ref-002',
+ 'SENDER_002',
+ 's3://bucket/key2',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-002' },
+ ]);
+
+ expect(mockLogger.error).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error processing record',
+ err: 'string error',
+ messageId: 'msg-002',
+ }),
+ );
+ });
+ });
+
+ describe('validation errors', () => {
+ it('should skip invalid event type', async () => {
+ const invalidEvent = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: JSON.stringify({
+ detail: {
+ specversion: '1.0',
+ id: 'event-001',
+ source:
+ '/nhs/england/notify/development/primary/data-plane/digitalletters/queue',
+ type: 'uk.nhs.notify.wrong.event.type.v1',
+ time: '2026-01-19T12:00:00Z',
+ data: {
+ messageReference: 'ref-001',
+ senderId: 'SENDER_001',
+ messageUri: 's3://bucket/key',
+ },
+ },
+ }),
+ },
+ ]);
+
+ const response = await handler(invalidEvent);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ ]);
+ expect(mockFileScanner.scanFile).not.toHaveBeenCalled();
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Invalid record will be retried',
+ }),
+ );
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error parsing queue entry',
+ }),
+ );
+ });
+
+ it('should skip malformed JSON', async () => {
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: 'invalid json {',
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ ]);
+ expect(mockFileScanner.scanFile).not.toHaveBeenCalled();
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error parsing SQS record',
+ }),
+ );
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Invalid record will be retried',
+ messageId: 'msg-001',
+ }),
+ );
+ });
+
+ it('should skip events with missing required fields', async () => {
+ const invalidEvent = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: JSON.stringify({
+ detail: {
+ specversion: '1.0',
+ id: 'event-001',
+ source:
+ '/nhs/england/notify/development/primary/data-plane/digitalletters/queue',
+ type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1',
+ time: '2026-01-19T12:00:00Z',
+ data: {
+ // Missing messageReference, senderId, messageUri
+ },
+ },
+ }),
+ },
+ ]);
+
+ const response = await handler(invalidEvent);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ ]);
+ expect(mockFileScanner.scanFile).not.toHaveBeenCalled();
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error parsing queue entry',
+ }),
+ );
+ expect(mockLogger.warn).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Invalid record will be retried',
+ messageId: 'msg-001',
+ }),
+ );
+ });
+
+ it('should process valid events and skip invalid ones in same batch', async () => {
+ mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' });
+
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: createValidItemDequeuedBody(
+ 'ref-001',
+ 'SENDER_001',
+ 's3://bucket/key1',
+ ),
+ },
+ {
+ messageId: 'msg-002',
+ body: 'invalid json',
+ },
+ {
+ messageId: 'msg-003',
+ body: createValidItemDequeuedBody(
+ 'ref-003',
+ 'SENDER_003',
+ 's3://bucket/key3',
+ ),
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-002' },
+ ]);
+ expect(mockFileScanner.scanFile).toHaveBeenCalledTimes(2);
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 2,
+ failureCount: 1,
+ }),
+ );
+ });
+ });
+
+ describe('empty batch handling', () => {
+ it('should handle empty records array', async () => {
+ const event = createSQSEvent([]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([]);
+ expect(mockFileScanner.scanFile).not.toHaveBeenCalled();
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Starting file scanner batch',
+ recordCount: 0,
+ }),
+ );
+ });
+
+ it('should handle batch with all invalid records', async () => {
+ const event = createSQSEvent([
+ {
+ messageId: 'msg-001',
+ body: 'invalid',
+ },
+ {
+ messageId: 'msg-002',
+ body: 'also invalid',
+ },
+ ]);
+
+ const response = await handler(event);
+
+ expect(response.batchItemFailures).toEqual([
+ { itemIdentifier: 'msg-001' },
+ { itemIdentifier: 'msg-002' },
+ ]);
+ expect(mockFileScanner.scanFile).not.toHaveBeenCalled();
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Completed file scanner batch',
+ successCount: 0,
+ failureCount: 2,
+ }),
+ );
+ });
+ });
+});
diff --git a/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts b/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts
new file mode 100644
index 000000000..451d91ba4
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts
@@ -0,0 +1,288 @@
+import { FileScanner } from 'app/file-scanner';
+import { S3Client } from '@aws-sdk/client-s3';
+import { Logger } from 'utils';
+import { mockDeep } from 'jest-mock-extended';
+import * as utilsModule from 'utils';
+
+const mockS3Client = mockDeep();
+const mockLogger = mockDeep();
+
+jest.mock('utils', () => ({
+ ...jest.requireActual('utils'),
+ getS3Object: jest.fn(),
+}));
+
+const mockGetS3Object = utilsModule.getS3Object as jest.MockedFunction<
+ typeof utilsModule.getS3Object
+>;
+
+describe('FileScanner', () => {
+ let fileScanner: FileScanner;
+
+ beforeEach(() => {
+ jest.clearAllMocks();
+ fileScanner = new FileScanner({
+ documentReferenceBucket: 'test-doc-ref-bucket',
+ unscannedFilesBucket: 'test-unscanned-bucket',
+ unscannedFilesPathPrefix: 'dev',
+ s3Client: mockS3Client,
+ logger: mockLogger,
+ });
+ });
+
+ describe('scanFile', () => {
+ const validMessageUri =
+ 's3://test-doc-ref-bucket/document-reference/test-ref-001';
+ const validMetadata = {
+ messageReference: 'test-ref-001',
+ senderId: 'SENDER_001',
+ createdAt: '2026-01-19T12:00:00Z',
+ };
+
+ const validDocumentReference = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ content: [
+ {
+ attachment: {
+ contentType: 'application/pdf',
+ data: Buffer.from('test pdf content').toString('base64'),
+ },
+ },
+ ],
+ };
+
+ it('should successfully extract PDF and upload to unscanned bucket', async () => {
+ mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference));
+ (mockS3Client.send as any).mockResolvedValue({});
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('success');
+ expect(result.errorMessage).toBeUndefined();
+
+ // Verify DocumentReference was retrieved
+ expect(mockGetS3Object).toHaveBeenCalledWith({
+ Bucket: 'test-doc-ref-bucket',
+ Key: 'document-reference/test-ref-001',
+ });
+
+ // Verify PDF was uploaded
+ expect(mockS3Client.send).toHaveBeenCalledWith(
+ expect.objectContaining({
+ input: expect.objectContaining({
+ Bucket: 'test-unscanned-bucket',
+ Key: 'dev/test-ref-001.pdf',
+ ContentType: 'application/pdf',
+ Metadata: {
+ messageReference: 'test-ref-001',
+ senderId: 'SENDER_001',
+ createdAt: '2026-01-19T12:00:00Z',
+ },
+ }),
+ }),
+ );
+
+ // Verify logging
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Starting file scan',
+ messageUri: validMessageUri,
+ }),
+ );
+
+ expect(mockLogger.info).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Successfully processed file for scanning',
+ messageReference: 'test-ref-001',
+ }),
+ );
+ });
+
+ it('should handle invalid S3 URI format', async () => {
+ const invalidUri = 'not-an-s3-uri';
+
+ const result = await fileScanner.scanFile(invalidUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('Invalid S3 URI format');
+
+ expect(mockLogger.error).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error processing file for scanning',
+ messageReference: 'test-ref-001',
+ }),
+ );
+ });
+
+ it('should handle S3 URI without key', async () => {
+ const invalidUri = 's3://bucket-only/';
+
+ const result = await fileScanner.scanFile(invalidUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('Invalid S3 URI format');
+ });
+
+ it('should handle missing DocumentReference content', async () => {
+ const invalidDocRef = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ content: [],
+ };
+
+ mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef));
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('DocumentReference has no content');
+ });
+
+ it('should handle undefined content array', async () => {
+ const invalidDocRef = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ };
+
+ mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef));
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('DocumentReference has no content');
+ });
+
+ it('should handle missing attachment in DocumentReference', async () => {
+ const invalidDocRef = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ content: [{}],
+ };
+
+ mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef));
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain(
+ 'DocumentReference content has no attachment data',
+ );
+ });
+
+ it('should handle missing attachment data', async () => {
+ const invalidDocRef = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ content: [
+ {
+ attachment: {
+ contentType: 'application/pdf',
+ },
+ },
+ ],
+ };
+
+ mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef));
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain(
+ 'DocumentReference content has no attachment data',
+ );
+ });
+
+ it('should handle S3 getObject error', async () => {
+ mockGetS3Object.mockRejectedValue(new Error('S3 access denied'));
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('S3 access denied');
+
+ expect(mockLogger.error).toHaveBeenCalledWith(
+ expect.objectContaining({
+ description: 'Error processing file for scanning',
+ err: expect.objectContaining({
+ message: 'S3 access denied',
+ }),
+ }),
+ );
+ });
+
+ it('should handle S3 putObject error', async () => {
+ mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference));
+ (mockS3Client.send as any).mockRejectedValue(
+ new Error('S3 upload failed'),
+ );
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toContain('S3 upload failed');
+ });
+
+ it('should handle invalid JSON in DocumentReference', async () => {
+ mockGetS3Object.mockResolvedValue('invalid json {');
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toBeDefined();
+ });
+
+ it('should handle non-Error exceptions', async () => {
+ mockGetS3Object.mockRejectedValue('string error');
+
+ const result = await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(result.outcome).toBe('failed');
+ expect(result.errorMessage).toBe('string error');
+ });
+
+ it('should correctly build unscanned file key with environment prefix', async () => {
+ mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference));
+ (mockS3Client.send as any).mockResolvedValue({});
+
+ await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(mockS3Client.send).toHaveBeenCalledWith(
+ expect.objectContaining({
+ input: expect.objectContaining({
+ Key: 'dev/test-ref-001.pdf',
+ }),
+ }),
+ );
+ });
+
+ it('should decode base64 PDF correctly', async () => {
+ const pdfContent = 'This is a test PDF content';
+ const docRef = {
+ resourceType: 'DocumentReference',
+ id: 'test-id',
+ content: [
+ {
+ attachment: {
+ contentType: 'application/pdf',
+ data: Buffer.from(pdfContent).toString('base64'),
+ },
+ },
+ ],
+ };
+
+ mockGetS3Object.mockResolvedValue(JSON.stringify(docRef));
+ (mockS3Client.send as any).mockResolvedValue({});
+
+ await fileScanner.scanFile(validMessageUri, validMetadata);
+
+ expect(mockS3Client.send).toHaveBeenCalledWith(
+ expect.objectContaining({
+ input: expect.objectContaining({
+ Body: Buffer.from(pdfContent),
+ }),
+ }),
+ );
+ });
+ });
+});
diff --git a/lambdas/file-scanner-lambda/src/__tests__/container.test.ts b/lambdas/file-scanner-lambda/src/__tests__/container.test.ts
new file mode 100644
index 000000000..f4275bab7
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/__tests__/container.test.ts
@@ -0,0 +1,56 @@
+import { createContainer } from 'container';
+import * as configModule from 'infra/config';
+
+jest.mock('infra/config');
+
+const mockLoadConfig = configModule.loadConfig as jest.MockedFunction<
+ typeof configModule.loadConfig
+>;
+
+describe('createContainer', () => {
+ beforeEach(() => {
+ jest.clearAllMocks();
+ });
+
+ it('should create container with all dependencies', () => {
+ mockLoadConfig.mockReturnValue({
+ documentReferenceBucket: 'test-doc-ref-bucket',
+ unscannedFilesBucket: 'test-unscanned-bucket',
+ unscannedFilesPathPrefix: 'dev',
+ eventPublisherEventBusArn:
+ 'arn:aws:events:us-east-1:123456789012:event-bus/test',
+ eventPublisherDlqUrl: 'https://sqs.us-east-1.amazonaws.com/dlq',
+ });
+
+ const container = createContainer();
+
+ expect(container).toHaveProperty('eventPublisher');
+ expect(container).toHaveProperty('logger');
+ expect(container).toHaveProperty('fileScanner');
+ expect(mockLoadConfig).toHaveBeenCalledTimes(1);
+ });
+
+ it('should call loadConfig to get configuration', () => {
+ const mockConfig = {
+ documentReferenceBucket: 'test-bucket',
+ unscannedFilesBucket: 'test-unscanned',
+ unscannedFilesPathPrefix: 'dev',
+ eventPublisherEventBusArn: 'arn:test',
+ eventPublisherDlqUrl: 'url:test',
+ };
+
+ mockLoadConfig.mockReturnValue(mockConfig);
+
+ createContainer();
+
+ expect(mockLoadConfig).toHaveBeenCalled();
+ });
+
+ it('should propagate config errors', () => {
+ mockLoadConfig.mockImplementation(() => {
+ throw new Error('Missing required config');
+ });
+
+ expect(() => createContainer()).toThrow('Missing required config');
+ });
+});
diff --git a/lambdas/file-scanner-lambda/src/__tests__/index.test.ts b/lambdas/file-scanner-lambda/src/__tests__/index.test.ts
new file mode 100644
index 000000000..00232a5e9
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/__tests__/index.test.ts
@@ -0,0 +1,26 @@
+// Set environment variables before any imports
+process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+process.env.UNSCANNED_FILES_PATH_PREFIX = 'test-prefix';
+process.env.EVENT_PUBLISHER_EVENT_BUS_ARN =
+ 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus';
+process.env.EVENT_PUBLISHER_DLQ_URL =
+ 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq';
+
+// eslint-disable-next-line import-x/first
+import { handler } from '..';
+
+describe('Lambda Handler', () => {
+ afterAll(() => {
+ delete process.env.DOCUMENT_REFERENCE_BUCKET;
+ delete process.env.UNSCANNED_FILES_BUCKET;
+ delete process.env.UNSCANNED_FILES_PATH_PREFIX;
+ delete process.env.EVENT_PUBLISHER_EVENT_BUS_ARN;
+ delete process.env.EVENT_PUBLISHER_DLQ_URL;
+ });
+
+ it('should export handler function', () => {
+ expect(handler).toBeDefined();
+ expect(typeof handler).toBe('function');
+ });
+});
diff --git a/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts b/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts
new file mode 100644
index 000000000..825baa305
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts
@@ -0,0 +1,95 @@
+import { loadConfig } from 'infra/config';
+
+describe('loadConfig', () => {
+ const originalEnv = process.env;
+
+ beforeEach(() => {
+ jest.resetModules();
+ process.env = { ...originalEnv };
+ });
+
+ afterAll(() => {
+ process.env = originalEnv;
+ });
+
+ it('should load valid configuration', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN =
+ 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus';
+ process.env.EVENT_PUBLISHER_DLQ_URL =
+ 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq';
+
+ const config = loadConfig();
+
+ expect(config).toEqual({
+ documentReferenceBucket: 'test-doc-ref-bucket',
+ unscannedFilesBucket: 'test-unscanned-bucket',
+ unscannedFilesPathPrefix: 'dev',
+ eventPublisherEventBusArn:
+ 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus',
+ eventPublisherDlqUrl:
+ 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq',
+ });
+ });
+
+ it('should throw error when DOCUMENT_REFERENCE_BUCKET is missing', () => {
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test';
+ process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq';
+
+ expect(() => loadConfig()).toThrow('DOCUMENT_REFERENCE_BUCKET is not set');
+ });
+
+ it('should throw error when UNSCANNED_FILES_BUCKET is missing', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test';
+ process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq';
+
+ expect(() => loadConfig()).toThrow('UNSCANNED_FILES_BUCKET is not set');
+ });
+
+ it('should throw error when UNSCANNED_FILES_PATH_PREFIX is missing', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test';
+ process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq';
+
+ expect(() => loadConfig()).toThrow(
+ 'UNSCANNED_FILES_PATH_PREFIX is not set',
+ );
+ });
+
+ it('should throw error when EVENT_PUBLISHER_EVENT_BUS_ARN is missing', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq';
+
+ expect(() => loadConfig()).toThrow(
+ 'EVENT_PUBLISHER_EVENT_BUS_ARN is not set',
+ );
+ });
+
+ it('should throw error when EVENT_PUBLISHER_DLQ_URL is missing', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket';
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test';
+
+ expect(() => loadConfig()).toThrow('EVENT_PUBLISHER_DLQ_URL is not set');
+ });
+
+ it('should handle empty string values as missing', () => {
+ process.env.DOCUMENT_REFERENCE_BUCKET = '';
+ process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket';
+ process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev';
+ process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test';
+ process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq';
+
+ expect(() => loadConfig()).toThrow('DOCUMENT_REFERENCE_BUCKET is not set');
+ });
+});
diff --git a/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts
new file mode 100644
index 000000000..d8b7b7062
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts
@@ -0,0 +1,127 @@
+import { FileScanner } from 'app/file-scanner';
+import type {
+ SQSBatchItemFailure,
+ SQSBatchResponse,
+ SQSEvent,
+} from 'aws-lambda';
+import { ItemDequeued } from 'digital-letters-events';
+import itemDequeuedValidator from 'digital-letters-events/ItemDequeued.js';
+import { EventPublisher, Logger } from 'utils';
+
+export interface HandlerDependencies {
+ eventPublisher: EventPublisher;
+ logger: Logger;
+ fileScanner: FileScanner;
+}
+
+type ValidatedRecord = {
+ messageId: string;
+ event: ItemDequeued;
+};
+
+function validateRecord(
+ { body, messageId }: { body: string; messageId: string },
+ logger: Logger,
+): ValidatedRecord | null {
+ try {
+ const sqsEventBody = JSON.parse(body);
+ const sqsEventDetail = sqsEventBody.detail;
+
+ const isEventValid = itemDequeuedValidator(sqsEventDetail);
+ if (!isEventValid) {
+ logger.warn({
+ err: itemDequeuedValidator.errors,
+ description: 'Error parsing queue entry',
+ });
+
+ return null;
+ }
+
+ return { messageId, event: sqsEventDetail };
+ } catch (error) {
+ logger.warn({
+ err: error,
+ description: 'Error parsing SQS record',
+ });
+ return null;
+ }
+}
+
+async function processRecord(
+ validatedRecord: ValidatedRecord,
+ { fileScanner, logger }: HandlerDependencies,
+): Promise {
+ const { event } = validatedRecord;
+
+ logger.info({
+ description: 'Processing ItemDequeued event',
+ eventId: event.id,
+ messageReference: event.data.messageReference,
+ senderId: event.data.senderId,
+ });
+
+ const result = await fileScanner.scanFile(event.data.messageUri, {
+ messageReference: event.data.messageReference,
+ senderId: event.data.senderId,
+ createdAt: event.time,
+ });
+
+ if (result.outcome === 'failed') {
+ throw new Error(
+ `Failed to process file for scanning: ${result.errorMessage}`,
+ );
+ }
+}
+
+export function createHandler(dependencies: HandlerDependencies) {
+ return async function handler(event: SQSEvent): Promise {
+ const { logger } = dependencies;
+
+ logger.info({
+ description: 'Starting file scanner batch',
+ recordCount: event.Records.length,
+ });
+
+ const itemFailures: SQSBatchItemFailure[] = [];
+
+ for (const record of event.Records) {
+ const validatedRecord = validateRecord(record, logger);
+
+ if (validatedRecord) {
+ try {
+ await processRecord(validatedRecord, dependencies);
+ } catch (error) {
+ logger.error({
+ description: 'Error processing record',
+ err:
+ error instanceof Error
+ ? {
+ message: error.message,
+ name: error.name,
+ stack: error.stack,
+ }
+ : error,
+ messageId: validatedRecord.messageId,
+ });
+
+ itemFailures.push({ itemIdentifier: validatedRecord.messageId });
+ }
+ } else {
+ // Validation failed - return to queue for retry/DLQ
+ logger.warn({
+ description: 'Invalid record will be retried',
+ messageId: record.messageId,
+ });
+ itemFailures.push({ itemIdentifier: record.messageId });
+ }
+ }
+
+ logger.info({
+ description: 'Completed file scanner batch',
+ successCount: event.Records.length - itemFailures.length,
+ failureCount: itemFailures.length,
+ });
+
+ return { batchItemFailures: itemFailures };
+ };
+}
diff --git a/lambdas/file-scanner-lambda/src/app/file-scanner.ts b/lambdas/file-scanner-lambda/src/app/file-scanner.ts
new file mode 100644
index 000000000..3eabde7a5
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/app/file-scanner.ts
@@ -0,0 +1,172 @@
+import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3';
+import { Logger, getS3Object } from 'utils';
+
+export interface DocumentReference {
+ resourceType: string;
+ id: string;
+ content?: {
+ attachment?: {
+ contentType?: string;
+ data?: string;
+ };
+ }[];
+}
+
+export interface FileScannerDependencies {
+ documentReferenceBucket: string;
+ unscannedFilesBucket: string;
+ unscannedFilesPathPrefix: string;
+ s3Client: S3Client;
+ logger: Logger;
+}
+
+export interface ScanFileMetadata {
+ messageReference: string;
+ senderId: string;
+ createdAt: string;
+}
+
+export type ScanFileResult = {
+ outcome: 'success' | 'failed';
+ errorMessage?: string;
+};
+
+export class FileScanner {
+ private readonly documentReferenceBucket: string;
+
+ private readonly unscannedFilesBucket: string;
+
+ private readonly unscannedFilesPathPrefix: string;
+
+ private readonly s3Client: S3Client;
+
+ private readonly logger: Logger;
+
+ constructor({
+ documentReferenceBucket,
+ logger,
+ s3Client,
+ unscannedFilesBucket,
+ unscannedFilesPathPrefix,
+ }: FileScannerDependencies) {
+ this.documentReferenceBucket = documentReferenceBucket;
+ this.unscannedFilesBucket = unscannedFilesBucket;
+ this.unscannedFilesPathPrefix = unscannedFilesPathPrefix;
+ this.s3Client = s3Client;
+ this.logger = logger;
+ }
+
+ async scanFile(
+ messageUri: string,
+ metadata: ScanFileMetadata,
+ ): Promise {
+ try {
+ this.logger.info({
+ description: 'Starting file scan',
+ messageUri,
+ messageReference: metadata.messageReference,
+ senderId: metadata.senderId,
+ });
+
+ const documentReferenceKey = FileScanner.extractS3Key(messageUri);
+
+ const documentReferenceJson = await getS3Object({
+ Bucket: this.documentReferenceBucket,
+ Key: documentReferenceKey,
+ });
+
+ const documentReference: DocumentReference = JSON.parse(
+ documentReferenceJson,
+ );
+
+ const pdfBase64 =
+ FileScanner.extractPdfFromDocumentReference(documentReference);
+ const pdfBuffer = Buffer.from(pdfBase64, 'base64');
+
+ const unscannedFileKey = this.buildUnscannedFileKey(
+ metadata.messageReference,
+ );
+
+ await this.uploadToUnscannedBucket(unscannedFileKey, pdfBuffer, metadata);
+
+ this.logger.info({
+ description: 'Successfully processed file for scanning',
+ messageReference: metadata.messageReference,
+ senderId: metadata.senderId,
+ unscannedFileKey,
+ });
+
+ return { outcome: 'success' };
+ } catch (error) {
+ const errorMessage =
+ error instanceof Error ? error.message : String(error);
+
+ this.logger.error({
+ description: 'Error processing file for scanning',
+ err:
+ error instanceof Error
+ ? { message: error.message, name: error.name, stack: error.stack }
+ : error,
+ messageReference: metadata.messageReference,
+ senderId: metadata.senderId,
+ });
+
+ return { outcome: 'failed', errorMessage };
+ }
+ }
+
+ private static extractS3Key(messageUri: string): string {
+ const regex = /^s3:\/\/[^/]+\/(.+)$/;
+ const match = regex.exec(messageUri);
+ if (!match) {
+ throw new Error(`Invalid S3 URI format: ${messageUri}`);
+ }
+ return match[1];
+ }
+
+ private static extractPdfFromDocumentReference(
+ documentReference: DocumentReference,
+ ): string {
+ const { content } = documentReference;
+ if (!content || content.length === 0) {
+ throw new Error('DocumentReference has no content');
+ }
+
+ const { attachment } = content[0];
+ if (!attachment || !attachment.data) {
+ throw new Error('DocumentReference content has no attachment data');
+ }
+
+ return attachment.data;
+ }
+
+ private buildUnscannedFileKey(messageReference: string): string {
+ return `${this.unscannedFilesPathPrefix}/${messageReference}.pdf`;
+ }
+
+ private async uploadToUnscannedBucket(
+ key: string,
+ pdfBuffer: Buffer,
+ metadata: ScanFileMetadata,
+ ): Promise {
+ const params = {
+ Bucket: this.unscannedFilesBucket,
+ Key: key,
+ Body: pdfBuffer,
+ ContentType: 'application/pdf',
+ Metadata: {
+ messageReference: metadata.messageReference,
+ senderId: metadata.senderId,
+ createdAt: metadata.createdAt,
+ },
+ };
+
+ await this.s3Client.send(new PutObjectCommand(params));
+
+ this.logger.info({
+ description: 'PDF uploaded to unscanned files bucket',
+ bucket: this.unscannedFilesBucket,
+ key,
+ });
+ }
+}
diff --git a/lambdas/file-scanner-lambda/src/container.ts b/lambdas/file-scanner-lambda/src/container.ts
new file mode 100644
index 000000000..106871151
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/container.ts
@@ -0,0 +1,40 @@
+import { HandlerDependencies } from 'apis/sqs-handler';
+import { FileScanner } from 'app/file-scanner';
+import { loadConfig } from 'infra/config';
+import {
+ EventPublisher,
+ eventBridgeClient,
+ logger,
+ s3Client,
+ sqsClient,
+} from 'utils';
+
+export const createContainer = (): HandlerDependencies => {
+ const {
+ documentReferenceBucket,
+ eventPublisherDlqUrl,
+ eventPublisherEventBusArn,
+ unscannedFilesBucket,
+ unscannedFilesPathPrefix,
+ } = loadConfig();
+
+ const eventPublisher = new EventPublisher({
+ eventBusArn: eventPublisherEventBusArn,
+ dlqUrl: eventPublisherDlqUrl,
+ logger,
+ sqsClient,
+ eventBridgeClient,
+ });
+
+ const fileScanner = new FileScanner({
+ documentReferenceBucket,
+ unscannedFilesBucket,
+ unscannedFilesPathPrefix,
+ s3Client,
+ logger,
+ });
+
+ return { eventPublisher, logger, fileScanner };
+};
+
+export default createContainer;
diff --git a/lambdas/file-scanner-lambda/src/index.ts b/lambdas/file-scanner-lambda/src/index.ts
new file mode 100644
index 000000000..f25a80861
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/index.ts
@@ -0,0 +1,6 @@
+import { createHandler } from 'apis/sqs-handler';
+import { createContainer } from 'container';
+
+export const handler = createHandler(createContainer());
+
+export default handler;
diff --git a/lambdas/file-scanner-lambda/src/infra/config.ts b/lambdas/file-scanner-lambda/src/infra/config.ts
new file mode 100644
index 000000000..802931add
--- /dev/null
+++ b/lambdas/file-scanner-lambda/src/infra/config.ts
@@ -0,0 +1,52 @@
+import { logger } from 'utils';
+
+export interface Config {
+ documentReferenceBucket: string;
+ unscannedFilesBucket: string;
+ unscannedFilesPathPrefix: string;
+ eventPublisherEventBusArn: string;
+ eventPublisherDlqUrl: string;
+}
+
+export function loadConfig(): Config {
+ const documentReferenceBucket = process.env.DOCUMENT_REFERENCE_BUCKET;
+ const unscannedFilesBucket = process.env.UNSCANNED_FILES_BUCKET;
+ const unscannedFilesPathPrefix = process.env.UNSCANNED_FILES_PATH_PREFIX;
+ const eventPublisherEventBusArn = process.env.EVENT_PUBLISHER_EVENT_BUS_ARN;
+ const eventPublisherDlqUrl = process.env.EVENT_PUBLISHER_DLQ_URL;
+
+ if (!documentReferenceBucket) {
+ throw new Error('DOCUMENT_REFERENCE_BUCKET is not set');
+ }
+
+ if (!unscannedFilesBucket) {
+ throw new Error('UNSCANNED_FILES_BUCKET is not set');
+ }
+
+ if (!unscannedFilesPathPrefix) {
+ throw new Error('UNSCANNED_FILES_PATH_PREFIX is not set');
+ }
+
+ if (!eventPublisherEventBusArn) {
+ throw new Error('EVENT_PUBLISHER_EVENT_BUS_ARN is not set');
+ }
+
+ if (!eventPublisherDlqUrl) {
+ throw new Error('EVENT_PUBLISHER_DLQ_URL is not set');
+ }
+
+ logger.info({
+ description: 'Configuration loaded',
+ documentReferenceBucket,
+ unscannedFilesBucket,
+ unscannedFilesPathPrefix,
+ });
+
+ return {
+ documentReferenceBucket,
+ unscannedFilesBucket,
+ unscannedFilesPathPrefix,
+ eventPublisherEventBusArn,
+ eventPublisherDlqUrl,
+ };
+}
diff --git a/lambdas/file-scanner-lambda/tsconfig.json b/lambdas/file-scanner-lambda/tsconfig.json
new file mode 100644
index 000000000..c2c957f53
--- /dev/null
+++ b/lambdas/file-scanner-lambda/tsconfig.json
@@ -0,0 +1,13 @@
+{
+ "compilerOptions": {
+ "baseUrl": "src"
+ },
+ "exclude": [
+ "node_modules"
+ ],
+ "extends": "@tsconfig/node22/tsconfig.json",
+ "include": [
+ "src/**/*",
+ "jest.config.ts"
+ ]
+}
diff --git a/package-lock.json b/package-lock.json
index af9f46e08..514e05686 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -8,6 +8,7 @@
"name": "nhs-notify-digital-letters",
"version": "0.0.1",
"workspaces": [
+ "lambdas/file-scanner-lambda",
"lambdas/key-generation",
"lambdas/refresh-apim-access-token",
"lambdas/pdm-mock-lambda",
@@ -157,6 +158,77 @@
"typescript": "^3.0.0 || ^4.0.0 || ^5.0.0"
}
},
+ "lambdas/file-scanner-lambda": {
+ "name": "nhs-notify-digital-letters-file-scanner-lambda",
+ "version": "0.0.1",
+ "dependencies": {
+ "@aws-sdk/client-s3": "^3.908.0",
+ "aws-lambda": "^1.0.7",
+ "digital-letters-events": "^0.0.1",
+ "utils": "^0.0.1"
+ },
+ "devDependencies": {
+ "@tsconfig/node22": "^22.0.2",
+ "@types/aws-lambda": "^8.10.155",
+ "@types/jest": "^29.5.14",
+ "jest": "^29.7.0",
+ "jest-mock-extended": "^3.0.7",
+ "typescript": "^5.9.3"
+ }
+ },
+ "lambdas/file-scanner-lambda/node_modules/@types/jest": {
+ "version": "29.5.14",
+ "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz",
+ "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "expect": "^29.0.0",
+ "pretty-format": "^29.0.0"
+ }
+ },
+ "lambdas/file-scanner-lambda/node_modules/jest": {
+ "version": "29.7.0",
+ "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz",
+ "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==",
+ "dev": true,
+ "license": "MIT",
+ "peer": true,
+ "dependencies": {
+ "@jest/core": "^29.7.0",
+ "@jest/types": "^29.6.3",
+ "import-local": "^3.0.2",
+ "jest-cli": "^29.7.0"
+ },
+ "bin": {
+ "jest": "bin/jest.js"
+ },
+ "engines": {
+ "node": "^14.15.0 || ^16.10.0 || >=18.0.0"
+ },
+ "peerDependencies": {
+ "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0"
+ },
+ "peerDependenciesMeta": {
+ "node-notifier": {
+ "optional": true
+ }
+ }
+ },
+ "lambdas/file-scanner-lambda/node_modules/jest-mock-extended": {
+ "version": "3.0.7",
+ "resolved": "https://registry.npmjs.org/jest-mock-extended/-/jest-mock-extended-3.0.7.tgz",
+ "integrity": "sha512-7lsKdLFcW9B9l5NzZ66S/yTQ9k8rFtnwYdCNuRU/81fqDWicNDVhitTSPnrGmNeNm0xyw0JHexEOShrIKRCIRQ==",
+ "dev": true,
+ "license": "MIT",
+ "dependencies": {
+ "ts-essentials": "^10.0.0"
+ },
+ "peerDependencies": {
+ "jest": "^24.0.0 || ^25.0.0 || ^26.0.0 || ^27.0.0 || ^28.0.0 || ^29.0.0",
+ "typescript": "^3.0.0 || ^4.0.0 || ^5.0.0"
+ }
+ },
"lambdas/key-generation": {
"version": "0.0.1",
"dependencies": {
@@ -12556,6 +12628,10 @@
"resolved": "lambdas/core-notifier-lambda",
"link": true
},
+ "node_modules/nhs-notify-digital-letters-file-scanner-lambda": {
+ "resolved": "lambdas/file-scanner-lambda",
+ "link": true
+ },
"node_modules/nhs-notify-digital-letters-integration-tests": {
"resolved": "tests/playwright",
"link": true
diff --git a/package.json b/package.json
index 84f4f8a14..7a1ee60da 100644
--- a/package.json
+++ b/package.json
@@ -54,6 +54,7 @@
},
"version": "0.0.1",
"workspaces": [
+ "lambdas/file-scanner-lambda",
"lambdas/key-generation",
"lambdas/refresh-apim-access-token",
"lambdas/pdm-mock-lambda",
diff --git a/scripts/docker/examples/python/.tool-versions.example b/scripts/docker/examples/python/.tool-versions.example
deleted file mode 100644
index 920931162..000000000
--- a/scripts/docker/examples/python/.tool-versions.example
+++ /dev/null
@@ -1,2 +0,0 @@
-# python, SEE: https://hub.docker.com/_/python/tags
-# docker/python 3.11.4-alpine3.18@sha256:0135ae6442d1269379860b361760ad2cf6ab7c403d21935a8015b48d5bf78a86
diff --git a/scripts/docker/examples/python/Dockerfile b/scripts/docker/examples/python/Dockerfile
deleted file mode 100644
index d0780aa41..000000000
--- a/scripts/docker/examples/python/Dockerfile
+++ /dev/null
@@ -1,33 +0,0 @@
-# `*:latest` will be replaced with a corresponding version stored in the '.tool-versions' file
-# hadolint ignore=DL3007
-FROM python:latest as base
-
-# === Builder ==================================================================
-
-FROM base AS builder
-COPY ./assets/hello_world/requirements.txt /requirements.txt
-WORKDIR /packages
-RUN set -eux; \
- \
- # Install dependencies
- pip install \
- --requirement /requirements.txt \
- --prefix=/packages \
- --no-warn-script-location \
- --no-cache-dir
-
-# === Runtime ==================================================================
-
-FROM base
-ENV \
- LANG="C.UTF-8" \
- LC_ALL="C.UTF-8" \
- PYTHONDONTWRITEBYTECODE="1" \
- PYTHONUNBUFFERED="1" \
- TZ="UTC"
-COPY --from=builder /packages /usr/local
-COPY ./assets/hello_world /hello_world
-WORKDIR /hello_world
-USER nobody
-CMD [ "python", "app.py" ]
-EXPOSE 8000
diff --git a/scripts/docker/examples/python/Dockerfile.effective b/scripts/docker/examples/python/Dockerfile.effective
deleted file mode 100644
index 3f1ea6b07..000000000
--- a/scripts/docker/examples/python/Dockerfile.effective
+++ /dev/null
@@ -1,54 +0,0 @@
-# `*:latest` will be replaced with a corresponding version stored in the '.tool-versions' file
-FROM python:3.11.4-alpine3.18@sha256:0135ae6442d1269379860b361760ad2cf6ab7c403d21935a8015b48d5bf78a86 as base
-
-# === Builder ==================================================================
-
-FROM base AS builder
-COPY ./assets/hello_world/requirements.txt /requirements.txt
-WORKDIR /packages
-RUN set -eux; \
- \
- # Install dependencies
- pip install \
- --requirement /requirements.txt \
- --prefix=/packages \
- --no-warn-script-location \
- --no-cache-dir
-
-# === Runtime ==================================================================
-
-FROM base
-ENV \
- LANG="C.UTF-8" \
- LC_ALL="C.UTF-8" \
- PYTHONDONTWRITEBYTECODE="1" \
- PYTHONUNBUFFERED="1" \
- TZ="UTC"
-COPY --from=builder /packages /usr/local
-COPY ./assets/hello_world /hello_world
-WORKDIR /hello_world
-USER nobody
-CMD [ "python", "app.py" ]
-EXPOSE 8000
-
-# === Metadata =================================================================
-
-ARG IMAGE
-ARG TITLE
-ARG DESCRIPTION
-ARG LICENCE
-ARG GIT_URL
-ARG GIT_BRANCH
-ARG GIT_COMMIT_HASH
-ARG BUILD_DATE
-ARG BUILD_VERSION
-LABEL \
- org.opencontainers.image.base.name=$IMAGE \
- org.opencontainers.image.title="$TITLE" \
- org.opencontainers.image.description="$DESCRIPTION" \
- org.opencontainers.image.licenses="$LICENCE" \
- org.opencontainers.image.url=$GIT_URL \
- org.opencontainers.image.ref.name=$GIT_BRANCH \
- org.opencontainers.image.revision=$GIT_COMMIT_HASH \
- org.opencontainers.image.created=$BUILD_DATE \
- org.opencontainers.image.version=$BUILD_VERSION
diff --git a/scripts/docker/examples/python/VERSION b/scripts/docker/examples/python/VERSION
deleted file mode 100644
index 8acdd82b7..000000000
--- a/scripts/docker/examples/python/VERSION
+++ /dev/null
@@ -1 +0,0 @@
-0.0.1
diff --git a/scripts/docker/examples/python/assets/hello_world/app.py b/scripts/docker/examples/python/assets/hello_world/app.py
deleted file mode 100644
index 4844e89cb..000000000
--- a/scripts/docker/examples/python/assets/hello_world/app.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from flask import Flask
-from flask_wtf.csrf import CSRFProtect
-
-app = Flask(__name__)
-csrf = CSRFProtect()
-csrf.init_app(app)
-
-@app.route("/")
-def index():
- return "Hello World!"
-
-app.run(host='0.0.0.0', port=8000)
diff --git a/scripts/docker/examples/python/assets/hello_world/requirements.txt b/scripts/docker/examples/python/assets/hello_world/requirements.txt
deleted file mode 100644
index fd7fb3118..000000000
--- a/scripts/docker/examples/python/assets/hello_world/requirements.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-blinker==1.6.2
-click==8.1.7
-Flask-WTF==1.2.0
-Flask==2.3.3
-itsdangerous==2.1.2
-Jinja2==3.1.6
-MarkupSafe==2.1.3
-pip==25.3
-setuptools==78.1.1
-Werkzeug==3.1.4
-wheel==0.46.2
-WTForms==3.0.1
diff --git a/scripts/docker/examples/python/tests/goss.yaml b/scripts/docker/examples/python/tests/goss.yaml
deleted file mode 100644
index 589db37bf..000000000
--- a/scripts/docker/examples/python/tests/goss.yaml
+++ /dev/null
@@ -1,8 +0,0 @@
-package:
- python:
- installed: true
-
-command:
- pip list | grep -i flask:
- exit-status: 0
- timeout: 60000
diff --git a/tests/playwright/constants/backend-constants.ts b/tests/playwright/constants/backend-constants.ts
index d9d789130..fae4a1df9 100644
--- a/tests/playwright/constants/backend-constants.ts
+++ b/tests/playwright/constants/backend-constants.ts
@@ -19,6 +19,7 @@ export const TTL_DLQ_NAME = `${CSI}-ttl-dlq`;
export const PDM_UPLOADER_DLQ_NAME = `${CSI}-pdm-uploader-dlq`;
export const PDM_POLL_DLQ_NAME = `${CSI}-pdm-poll-dlq`;
export const CORE_NOTIFIER_DLQ_NAME = `${CSI}-core-notifier-dlq`;
+export const FILE_SCANNER_DLQ_NAME = `${CSI}-scanner-dlq`;
export const PRINT_STATUS_HANDLER_DLQ_NAME = `${CSI}-print-status-handler-dlq`;
export const HANDLE_TTL_DLQ_NAME = `${CSI}-ttl-handle-expiry-errors-queue`;
export const PRINT_ANALYSER_DLQ_NAME = `${CSI}-print-analyser-dlq`;
@@ -42,5 +43,6 @@ export const FILE_SAFE_S3_BUCKET_NAME = `nhs-${process.env.AWS_ACCOUNT_ID}-${REG
export const PDM_UPLOADER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-pdm-uploader`;
export const PDM_POLL_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-pdm-poll`;
export const CORE_NOTIFIER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-core-notifier`;
+export const FILE_SCANNER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-file-scanner`;
export const PRINT_STATUS_HANDLER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-print-status-handler`;
export const PRINT_ANALYSER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-print-analyser`;
diff --git a/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts b/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts
new file mode 100644
index 000000000..8469b9a50
--- /dev/null
+++ b/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts
@@ -0,0 +1,148 @@
+import { expect, test } from '@playwright/test';
+import { ENV, REGION } from 'constants/backend-constants';
+import itemDequeuedValidator from 'digital-letters-events/ItemDequeued.js';
+import eventPublisher from 'helpers/event-bus-helpers';
+import expectToPassEventually from 'helpers/expectations';
+import { v4 as uuidv4 } from 'uuid';
+import {
+ getS3ObjectBufferFromUri,
+ getS3ObjectMetadata,
+ putDataS3,
+} from 'utils';
+
+const DOCUMENT_REFERENCE_BUCKET = `nhs-${process.env.AWS_ACCOUNT_ID}-${REGION}-${ENV}-dl-pii-data`;
+const UNSCANNED_FILES_BUCKET = `nhs-${process.env.AWS_ACCOUNT_ID}-${REGION}-main-acct-digi-unscanned-files`;
+
+test.describe('File Scanner', () => {
+ test.beforeAll(async () => {
+ test.setTimeout(250_000);
+ });
+
+ test('should extract PDF from DocumentReference and store in unscanned bucket with metadata', async () => {
+ const messageReference = uuidv4();
+ const senderId = 'TEST_SENDER_001';
+ const documentReferenceKey = messageReference;
+
+ const pdfContent = Buffer.from('Sample PDF content for test');
+ const documentReference = {
+ resourceType: 'DocumentReference',
+ id: messageReference,
+ content: [
+ {
+ attachment: {
+ contentType: 'application/pdf',
+ data: pdfContent.toString('base64'),
+ },
+ },
+ ],
+ };
+
+ await putDataS3(documentReference, {
+ Bucket: DOCUMENT_REFERENCE_BUCKET,
+ Key: documentReferenceKey,
+ });
+
+ const eventId = uuidv4();
+ const messageUri = `s3://${DOCUMENT_REFERENCE_BUCKET}/${documentReferenceKey}`;
+ const eventTime = new Date().toISOString();
+
+ await eventPublisher.sendEvents(
+ [
+ {
+ id: eventId,
+ specversion: '1.0',
+ source: `/nhs/england/notify/development/dev-1/data-plane/digitalletters/queue`,
+ subject: `message/${messageReference}`,
+ type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1',
+ time: eventTime,
+ recordedtime: eventTime,
+ severitynumber: 2,
+ traceparent:
+ '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01',
+ datacontenttype: 'application/json',
+ dataschema:
+ 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json',
+ severitytext: 'INFO',
+ data: {
+ messageReference,
+ senderId,
+ messageUri,
+ },
+ },
+ ],
+ itemDequeuedValidator,
+ );
+
+ await expectToPassEventually(async () => {
+ const expectedKey = `${ENV}/${messageReference}.pdf`;
+ const expectedUri = `s3://${UNSCANNED_FILES_BUCKET}/${expectedKey}`;
+
+ const storedPdf = await getS3ObjectBufferFromUri(expectedUri);
+ expect(storedPdf).toBeDefined();
+ expect(storedPdf.toString()).toEqual(pdfContent.toString());
+
+ const metadata = await getS3ObjectMetadata({
+ Bucket: UNSCANNED_FILES_BUCKET,
+ Key: expectedKey,
+ });
+ expect(metadata).toBeDefined();
+ expect(metadata?.messagereference).toEqual(messageReference);
+ expect(metadata?.senderid).toEqual(senderId);
+ expect(metadata?.createdat).toBeDefined();
+ }, 120);
+ });
+
+ test('should handle validation errors by sending messages to DLQ', async () => {
+ const messageReference = uuidv4();
+ const senderId = 'TEST_SENDER_002';
+ const documentReferenceKey = `document-reference/${messageReference}`;
+
+ const documentReference = {
+ resourceType: 'DocumentReference',
+ id: messageReference,
+ content: [],
+ };
+
+ await putDataS3(documentReference, {
+ Bucket: DOCUMENT_REFERENCE_BUCKET,
+ Key: documentReferenceKey,
+ });
+
+ const eventId = uuidv4();
+ const messageUri = `s3://${DOCUMENT_REFERENCE_BUCKET}/${documentReferenceKey}`;
+ const eventTime = new Date().toISOString();
+
+ await eventPublisher.sendEvents(
+ [
+ {
+ id: eventId,
+ specversion: '1.0',
+ source: `/nhs/england/notify/development/dev-1/data-plane/digitalletters/queue`,
+ subject: `message/${messageReference}`,
+ type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1',
+ time: eventTime,
+ recordedtime: eventTime,
+ severitynumber: 2,
+ traceparent:
+ '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01',
+ datacontenttype: 'application/json',
+ dataschema:
+ 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json',
+ severitytext: 'INFO',
+ data: {
+ messageReference,
+ senderId,
+ messageUri,
+ },
+ },
+ ],
+ itemDequeuedValidator,
+ );
+
+ await expectToPassEventually(async () => {
+ const expectedKey = `${ENV}/${messageReference}.pdf`;
+ const expectedUri = `s3://${UNSCANNED_FILES_BUCKET}/${expectedKey}`;
+ await expect(getS3ObjectBufferFromUri(expectedUri)).rejects.toThrow();
+ }, 120);
+ });
+});
diff --git a/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts b/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts
index 5a2d99c49..557599464 100644
--- a/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts
+++ b/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts
@@ -9,9 +9,9 @@ import messageDownloadedValidator from 'digital-letters-events/MESHInboxMessageD
import { getLogsFromCloudwatch } from 'helpers/cloudwatch-helpers';
import eventPublisher from 'helpers/event-bus-helpers';
import expectToPassEventually from 'helpers/expectations';
-import { uploadToS3 } from 'helpers/s3-helpers';
import { expectMessageContainingString, purgeQueue } from 'helpers/sqs-helpers';
import { v4 as uuidv4 } from 'uuid';
+import { putDataS3 } from 'utils';
const pdmRequest = {
resourceType: 'DocumentReference',
@@ -64,7 +64,10 @@ test.describe('Digital Letters - Upload to PDM', () => {
const messageReference = uuidv4();
const senderId = 'test-sender-1';
- uploadToS3(JSON.stringify(pdmRequest), LETTERS_S3_BUCKET_NAME, resourceKey);
+ await putDataS3(pdmRequest, {
+ Bucket: LETTERS_S3_BUCKET_NAME,
+ Key: resourceKey,
+ });
await eventPublisher.sendEvents(
[
@@ -120,11 +123,10 @@ test.describe('Digital Letters - Upload to PDM', () => {
unexpectedField: 'I should not be here',
};
- uploadToS3(
- JSON.stringify(invalidPdmRequest),
- LETTERS_S3_BUCKET_NAME,
- resourceKey,
- );
+ await putDataS3(invalidPdmRequest, {
+ Bucket: LETTERS_S3_BUCKET_NAME,
+ Key: resourceKey,
+ });
await eventPublisher.sendEvents(
[
diff --git a/tests/playwright/helpers/s3-helpers.ts b/tests/playwright/helpers/s3-helpers.ts
deleted file mode 100644
index 2b6ec2e66..000000000
--- a/tests/playwright/helpers/s3-helpers.ts
+++ /dev/null
@@ -1,39 +0,0 @@
-import {
- ListBucketsCommand,
- PutObjectCommand,
- S3Client,
-} from '@aws-sdk/client-s3';
-
-const s3 = new S3Client({
- region: process.env.AWS_REGION || 'eu-west-2',
-});
-
-async function listBuckets(substring: string): Promise {
- const resp = await s3.send(new ListBucketsCommand({}));
- const buckets = resp.Buckets ?? [];
- if (!substring) {
- return buckets.map((b) => b.Name!).filter(Boolean);
- }
- const needle = substring.toLowerCase();
- return buckets
- .map((b) => b.Name)
- .filter(
- (name): name is string => !!name && name.toLowerCase().includes(needle),
- );
-}
-
-async function uploadToS3(
- content: string,
- bucket: string,
- key: string,
-): Promise {
- await s3.send(
- new PutObjectCommand({
- Bucket: bucket,
- Key: key,
- Body: content,
- }),
- );
-}
-
-export { listBuckets, uploadToS3 };
diff --git a/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts b/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts
index d9cce7e03..aa6e70143 100644
--- a/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts
+++ b/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts
@@ -3,6 +3,7 @@ import {
getS3Object,
getS3ObjectBufferFromUri,
getS3ObjectFromUri,
+ getS3ObjectMetadata,
s3Client,
} from '../../s3-utils';
@@ -259,3 +260,85 @@ describe('getS3ObjectBufferFromUri', () => {
);
});
});
+
+describe('getS3ObjectMetadata', () => {
+ afterEach(jest.resetAllMocks);
+
+ it('Should retrieve metadata for object', async () => {
+ const metadata = {
+ messagereference: 'test-ref-001',
+ senderid: 'SENDER_001',
+ createdat: '2026-01-19T12:00:00Z',
+ };
+
+ s3Client.send = jest.fn().mockReturnValueOnce({ Metadata: metadata });
+
+ const s3Location = {
+ Bucket: 'bucket-name',
+ Key: 'test-file.pdf',
+ };
+
+ const result = await getS3ObjectMetadata(s3Location);
+
+ expect(s3Client.send).toHaveBeenCalledWith(
+ expect.objectContaining({
+ input: s3Location,
+ }),
+ );
+ expect(result).toEqual(metadata);
+ });
+
+ it('Should retrieve metadata with version ID', async () => {
+ const metadata = {
+ customkey: 'customvalue',
+ };
+
+ s3Client.send = jest.fn().mockReturnValueOnce({ Metadata: metadata });
+
+ const s3Location = {
+ Bucket: 'bucket-name',
+ Key: 'versioned-file.json',
+ VersionId: 'version-123',
+ };
+
+ const result = await getS3ObjectMetadata(s3Location);
+
+ expect(s3Client.send).toHaveBeenCalledWith(
+ expect.objectContaining({
+ input: s3Location,
+ }),
+ );
+ expect(result).toEqual(metadata);
+ });
+
+ it('Should throw an error if object not found', async () => {
+ s3Client.send = jest.fn().mockImplementationOnce(() => {
+ throw new Error('NoSuchKey');
+ });
+
+ await expect(
+ getS3ObjectMetadata({
+ Bucket: 'bucket-name',
+ Key: 'nonexistent.pdf',
+ }),
+ ).rejects.toThrow(
+ "Could not retrieve metadata from bucket 's3://bucket-name/nonexistent.pdf' from S3: NoSuchKey",
+ );
+ });
+
+ it('Should handle error objects', async () => {
+ const error = new Error('Access Denied');
+ s3Client.send = jest.fn().mockImplementationOnce(() => {
+ throw error;
+ });
+
+ await expect(
+ getS3ObjectMetadata({
+ Bucket: 'bucket-name',
+ Key: 'forbidden.pdf',
+ }),
+ ).rejects.toThrow(
+ "Could not retrieve metadata from bucket 's3://bucket-name/forbidden.pdf' from S3: Access Denied",
+ );
+ });
+});
diff --git a/utils/utils/src/s3-utils/get-object-s3.ts b/utils/utils/src/s3-utils/get-object-s3.ts
index 9430b4729..8d541ed3f 100644
--- a/utils/utils/src/s3-utils/get-object-s3.ts
+++ b/utils/utils/src/s3-utils/get-object-s3.ts
@@ -1,5 +1,9 @@
import { type Readable } from 'node:stream';
-import { GetObjectCommand, GetObjectCommandOutput } from '@aws-sdk/client-s3';
+import {
+ GetObjectCommand,
+ GetObjectCommandOutput,
+ HeadObjectCommand,
+} from '@aws-sdk/client-s3';
import { s3Client } from './s3-client';
export function isReadable(
@@ -122,3 +126,24 @@ export async function getS3ObjectBufferFromUri(uri: string): Promise {
);
}
}
+
+export async function getS3ObjectMetadata(
+ location: S3Location,
+): Promise | undefined> {
+ const { Bucket, Key, VersionId } = location;
+ try {
+ const response = await s3Client.send(
+ new HeadObjectCommand({
+ Bucket,
+ Key,
+ VersionId,
+ }),
+ );
+ return response.Metadata;
+ } catch (error) {
+ const msg = error instanceof Error ? error.message : String(error);
+ throw new Error(
+ `Could not retrieve metadata from bucket 's3://${Bucket}/${Key}' from S3: ${msg}`,
+ );
+ }
+}