diff --git a/infrastructure/terraform/components/dl/README.md b/infrastructure/terraform/components/dl/README.md index f763d5c0a..964704f0f 100644 --- a/infrastructure/terraform/components/dl/README.md +++ b/infrastructure/terraform/components/dl/README.md @@ -42,6 +42,7 @@ No requirements. | Name | Source | Version | |------|--------|---------| | [core\_notifier](#module\_core\_notifier) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a | +| [file\_scanner](#module\_file\_scanner) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a | | [kms](#module\_kms) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-kms.zip | n/a | | [lambda\_apim\_key\_generation](#module\_lambda\_apim\_key\_generation) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a | | [lambda\_lambda\_apim\_refresh\_token](#module\_lambda\_lambda\_apim\_refresh\_token) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a | @@ -65,6 +66,7 @@ No requirements. | [sqs\_pdm\_uploader](#module\_sqs\_pdm\_uploader) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a | | [sqs\_print\_analyser](#module\_sqs\_print\_analyser) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.30/terraform-sqs.zip | n/a | | [sqs\_print\_status\_handler](#module\_sqs\_print\_status\_handler) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.30/terraform-sqs.zip | n/a | +| [sqs\_scanner](#module\_sqs\_scanner) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a | | [sqs\_ttl](#module\_sqs\_ttl) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a | | [sqs\_ttl\_handle\_expiry\_errors](#module\_sqs\_ttl\_handle\_expiry\_errors) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip | n/a | | [ttl\_create](#module\_ttl\_create) | https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip | n/a | diff --git a/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf b/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf new file mode 100644 index 000000000..22a31d84f --- /dev/null +++ b/infrastructure/terraform/components/dl/cloudwatch_event_rule_item_dequeued.tf @@ -0,0 +1,19 @@ +resource "aws_cloudwatch_event_rule" "item_dequeued" { + name = "${local.csi}-item-dequeued" + description = "Queue item dequeued event rule" + event_bus_name = aws_cloudwatch_event_bus.main.name + + event_pattern = jsonencode({ + "detail" : { + "type" : [ + "uk.nhs.notify.digital.letters.queue.item.dequeued.v1" + ] + } + }) +} + +resource "aws_cloudwatch_event_target" "item_dequeued_scanner" { + rule = aws_cloudwatch_event_rule.item_dequeued.name + arn = module.sqs_scanner.sqs_queue_arn + event_bus_name = aws_cloudwatch_event_bus.main.name +} diff --git a/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf b/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf new file mode 100644 index 000000000..7e946716d --- /dev/null +++ b/infrastructure/terraform/components/dl/lambda_event_source_mapping_file_scanner.tf @@ -0,0 +1,7 @@ +resource "aws_lambda_event_source_mapping" "file_scanner" { + event_source_arn = module.sqs_scanner.sqs_queue_arn + function_name = module.file_scanner.function_name + batch_size = 10 + maximum_batching_window_in_seconds = 5 + function_response_types = ["ReportBatchItemFailures"] +} diff --git a/infrastructure/terraform/components/dl/locals.tf b/infrastructure/terraform/components/dl/locals.tf index 9298b97b9..da187ad9e 100644 --- a/infrastructure/terraform/components/dl/locals.tf +++ b/infrastructure/terraform/components/dl/locals.tf @@ -5,6 +5,7 @@ locals { apim_api_key_ssm_parameter_name = "/${var.component}/${var.environment}/apim/api_key" apim_private_key_ssm_parameter_name = "/${var.component}/${var.environment}/apim/private_key" apim_keystore_s3_bucket = "nhs-${var.aws_account_id}-${var.region}-${var.environment}-${var.component}-static-assets" + unscanned_files_bucket = local.acct.additional_s3_buckets["digital-letters_unscanned-files"]["id"] ssm_mesh_prefix = "/${var.component}/${var.environment}/mesh" mock_mesh_endpoint = "s3://${module.s3bucket_non_pii_data.bucket}/mock-mesh" root_domain_name = "${var.environment}.${local.acct.route53_zone_names["digital-letters"]}" diff --git a/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf b/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf new file mode 100644 index 000000000..877550684 --- /dev/null +++ b/infrastructure/terraform/components/dl/module_lambda_file_scanner.tf @@ -0,0 +1,131 @@ +module "file_scanner" { + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.29/terraform-lambda.zip" + + function_name = "file-scanner" + description = "A function for extracting PDFs from DocumentReference and storing in UnscannedFiles bucket for virus scanning" + + aws_account_id = var.aws_account_id + component = local.component + environment = var.environment + project = var.project + region = var.region + group = var.group + + log_retention_in_days = var.log_retention_in_days + kms_key_arn = module.kms.key_arn + + iam_policy_document = { + body = data.aws_iam_policy_document.file_scanner_lambda.json + } + + function_s3_bucket = local.acct.s3_buckets["lambda_function_artefacts"]["id"] + function_code_base_path = local.aws_lambda_functions_dir_path + function_code_dir = "file-scanner-lambda/dist" + function_include_common = true + handler_function_name = "handler" + runtime = "nodejs22.x" + memory = 512 + timeout = 60 + log_level = var.log_level + + force_lambda_code_deploy = var.force_lambda_code_deploy + enable_lambda_insights = false + + log_destination_arn = local.log_destination_arn + log_subscription_role_arn = local.acct.log_subscription_role_arn + + lambda_env_vars = { + "DOCUMENT_REFERENCE_BUCKET" = module.s3bucket_pii_data.bucket + "UNSCANNED_FILES_BUCKET" = local.unscanned_files_bucket + "UNSCANNED_FILES_PATH_PREFIX" = var.environment + "EVENT_PUBLISHER_EVENT_BUS_ARN" = aws_cloudwatch_event_bus.main.arn + "EVENT_PUBLISHER_DLQ_URL" = module.sqs_event_publisher_errors.sqs_queue_url + } +} + +data "aws_iam_policy_document" "file_scanner_lambda" { + statement { + sid = "ReadDocumentReferenceBucket" + effect = "Allow" + + actions = [ + "s3:GetObject", + "s3:ListBucket", + ] + + resources = [ + module.s3bucket_pii_data.arn, + "${module.s3bucket_pii_data.arn}/*", + ] + } + + statement { + sid = "WriteUnscannedFilesBucket" + effect = "Allow" + + actions = [ + "s3:PutObject", + ] + + resources = [ + "arn:aws:s3:::${local.unscanned_files_bucket}/*", + ] + } + + statement { + sid = "PutEvents" + effect = "Allow" + + actions = [ + "events:PutEvents", + ] + + resources = [ + aws_cloudwatch_event_bus.main.arn, + ] + } + + statement { + sid = "SQSPermissionsDLQs" + effect = "Allow" + + actions = [ + "sqs:SendMessage", + "sqs:SendMessageBatch", + ] + + resources = [ + module.sqs_event_publisher_errors.sqs_queue_arn, + ] + } + + statement { + sid = "SQSPermissionsScannerQueue" + effect = "Allow" + + actions = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes", + "sqs:GetQueueUrl", + ] + + resources = [ + module.sqs_scanner.sqs_queue_arn, + ] + } + + statement { + sid = "KMSPermissions" + effect = "Allow" + + actions = [ + "kms:Decrypt", + "kms:GenerateDataKey", + ] + + resources = [ + module.kms.key_arn, + ] + } +} diff --git a/infrastructure/terraform/components/dl/module_sqs_scanner.tf b/infrastructure/terraform/components/dl/module_sqs_scanner.tf new file mode 100644 index 000000000..248fe16aa --- /dev/null +++ b/infrastructure/terraform/components/dl/module_sqs_scanner.tf @@ -0,0 +1,41 @@ +module "sqs_scanner" { + source = "https://github.com/NHSDigital/nhs-notify-shared-modules/releases/download/v2.0.24/terraform-sqs.zip" + + aws_account_id = var.aws_account_id + component = local.component + environment = var.environment + project = var.project + region = var.region + name = "file-scanner" + sqs_kms_key_arn = module.kms.key_arn + visibility_timeout_seconds = 60 + delay_seconds = 0 + create_dlq = true + sqs_policy_overload = data.aws_iam_policy_document.sqs_scanner.json +} + +data "aws_iam_policy_document" "sqs_scanner" { + statement { + sid = "AllowEventBridgeToSendMessage" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["events.amazonaws.com"] + } + + actions = [ + "sqs:SendMessage" + ] + + resources = [ + "arn:aws:sqs:${var.region}:${var.aws_account_id}:${local.csi}-file-scanner-queue" + ] + + condition { + test = "ArnEquals" + variable = "aws:SourceArn" + values = [aws_cloudwatch_event_rule.item_dequeued.arn] + } + } +} diff --git a/lambdas/file-scanner-lambda/jest.config.ts b/lambdas/file-scanner-lambda/jest.config.ts new file mode 100644 index 000000000..c02601aec --- /dev/null +++ b/lambdas/file-scanner-lambda/jest.config.ts @@ -0,0 +1,5 @@ +import { baseJestConfig } from '../../jest.config.base'; + +const config = baseJestConfig; + +export default config; diff --git a/lambdas/file-scanner-lambda/package.json b/lambdas/file-scanner-lambda/package.json new file mode 100644 index 000000000..b530f0e98 --- /dev/null +++ b/lambdas/file-scanner-lambda/package.json @@ -0,0 +1,26 @@ +{ + "dependencies": { + "@aws-sdk/client-s3": "^3.908.0", + "aws-lambda": "^1.0.7", + "digital-letters-events": "^0.0.1", + "utils": "^0.0.1" + }, + "devDependencies": { + "@tsconfig/node22": "^22.0.2", + "@types/aws-lambda": "^8.10.155", + "@types/jest": "^29.5.14", + "jest": "^29.7.0", + "jest-mock-extended": "^3.0.7", + "typescript": "^5.9.3" + }, + "name": "nhs-notify-digital-letters-file-scanner-lambda", + "private": true, + "scripts": { + "lambda-build": "rm -rf dist && npx esbuild --bundle --minify --sourcemap --target=es2020 --platform=node --loader:.node=file --entry-names=[name] --outdir=dist src/index.ts", + "lint": "eslint .", + "lint:fix": "eslint . --fix", + "test:unit": "jest", + "typecheck": "tsc --noEmit" + }, + "version": "0.0.1" +} diff --git a/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts b/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts new file mode 100644 index 000000000..6fb343295 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/__tests__/apis/sqs-handler.test.ts @@ -0,0 +1,505 @@ +import { createHandler } from 'apis/sqs-handler'; +import type { SQSEvent } from 'aws-lambda'; +import { mockDeep } from 'jest-mock-extended'; +import type { HandlerDependencies } from 'apis/sqs-handler'; +import { FileScanner } from 'app/file-scanner'; +import { EventPublisher, Logger } from 'utils'; + +const mockFileScanner = mockDeep(); +const mockEventPublisher = mockDeep(); +const mockLogger = mockDeep(); + +const createSQSEvent = ( + records: { + messageId: string; + body: string; + }[], +): SQSEvent => ({ + Records: records.map((record) => ({ + messageId: record.messageId, + receiptHandle: 'test-receipt-handle', + body: record.body, + attributes: { + ApproximateReceiveCount: '1', + SentTimestamp: '1', + SenderId: 'test', + ApproximateFirstReceiveTimestamp: '1', + }, + messageAttributes: {}, + md5OfBody: 'test', + eventSource: 'aws:sqs', + eventSourceARN: 'arn:aws:sqs:us-east-1:123456789012:test-queue', + awsRegion: 'us-east-1', + })), +}); + +const createValidItemDequeuedBody = ( + messageReference: string, + senderId: string, + messageUri: string, +) => + JSON.stringify({ + detail: { + specversion: '1.0', + id: `event-${messageReference}`, + source: + '/nhs/england/notify/development/primary/data-plane/digitalletters/queue', + subject: `message/${messageReference}`, + type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1', + time: '2026-01-19T12:00:00Z', + recordedtime: '2026-01-19T12:00:00.100Z', + datacontenttype: 'application/json', + dataschema: + 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json', + traceparent: '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', + severitynumber: 2, + data: { + messageReference, + senderId, + messageUri, + }, + }, + }); + +describe('SQS Handler', () => { + let handler: ReturnType; + let dependencies: HandlerDependencies; + + beforeEach(() => { + jest.clearAllMocks(); + dependencies = { + eventPublisher: mockEventPublisher, + logger: mockLogger, + fileScanner: mockFileScanner, + }; + handler = createHandler(dependencies); + }); + + describe('successful processing', () => { + it('should process valid ItemDequeued events successfully', async () => { + mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' }); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([]); + expect(mockFileScanner.scanFile).toHaveBeenCalledWith('s3://bucket/key', { + messageReference: 'ref-001', + senderId: 'SENDER_001', + createdAt: '2026-01-19T12:00:00Z', + }); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Starting file scanner batch', + recordCount: 1, + }), + ); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Processing ItemDequeued event', + messageReference: 'ref-001', + senderId: 'SENDER_001', + }), + ); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 1, + failureCount: 0, + }), + ); + }); + + it('should process multiple valid events in batch', async () => { + mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' }); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key1', + ), + }, + { + messageId: 'msg-002', + body: createValidItemDequeuedBody( + 'ref-002', + 'SENDER_002', + 's3://bucket/key2', + ), + }, + { + messageId: 'msg-003', + body: createValidItemDequeuedBody( + 'ref-003', + 'SENDER_003', + 's3://bucket/key3', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([]); + expect(mockFileScanner.scanFile).toHaveBeenCalledTimes(3); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 3, + failureCount: 0, + }), + ); + }); + }); + + describe('failure handling', () => { + it('should return failed message when file scanner fails', async () => { + mockFileScanner.scanFile.mockResolvedValue({ + outcome: 'failed', + errorMessage: 'PDF extraction failed', + }); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + ]); + + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error processing record', + messageId: 'msg-001', + }), + ); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 0, + failureCount: 1, + }), + ); + }); + + it('should handle partial batch failures', async () => { + mockFileScanner.scanFile + .mockResolvedValueOnce({ outcome: 'success' }) + .mockResolvedValueOnce({ + outcome: 'failed', + errorMessage: 'Error', + }) + .mockResolvedValueOnce({ outcome: 'success' }); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key1', + ), + }, + { + messageId: 'msg-002', + body: createValidItemDequeuedBody( + 'ref-002', + 'SENDER_002', + 's3://bucket/key2', + ), + }, + { + messageId: 'msg-003', + body: createValidItemDequeuedBody( + 'ref-003', + 'SENDER_003', + 's3://bucket/key3', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-002' }, + ]); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 2, + failureCount: 1, + }), + ); + }); + + it('should handle fileScanner throwing exception', async () => { + mockFileScanner.scanFile.mockRejectedValue(new Error('Unexpected error')); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + ]); + }); + + it('should handle fileScanner throwing non-Error object', async () => { + mockFileScanner.scanFile.mockRejectedValue('string error'); + + const event = createSQSEvent([ + { + messageId: 'msg-002', + body: createValidItemDequeuedBody( + 'ref-002', + 'SENDER_002', + 's3://bucket/key2', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-002' }, + ]); + + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error processing record', + err: 'string error', + messageId: 'msg-002', + }), + ); + }); + }); + + describe('validation errors', () => { + it('should skip invalid event type', async () => { + const invalidEvent = createSQSEvent([ + { + messageId: 'msg-001', + body: JSON.stringify({ + detail: { + specversion: '1.0', + id: 'event-001', + source: + '/nhs/england/notify/development/primary/data-plane/digitalletters/queue', + type: 'uk.nhs.notify.wrong.event.type.v1', + time: '2026-01-19T12:00:00Z', + data: { + messageReference: 'ref-001', + senderId: 'SENDER_001', + messageUri: 's3://bucket/key', + }, + }, + }), + }, + ]); + + const response = await handler(invalidEvent); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + ]); + expect(mockFileScanner.scanFile).not.toHaveBeenCalled(); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Invalid record will be retried', + }), + ); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error parsing queue entry', + }), + ); + }); + + it('should skip malformed JSON', async () => { + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: 'invalid json {', + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + ]); + expect(mockFileScanner.scanFile).not.toHaveBeenCalled(); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error parsing SQS record', + }), + ); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Invalid record will be retried', + messageId: 'msg-001', + }), + ); + }); + + it('should skip events with missing required fields', async () => { + const invalidEvent = createSQSEvent([ + { + messageId: 'msg-001', + body: JSON.stringify({ + detail: { + specversion: '1.0', + id: 'event-001', + source: + '/nhs/england/notify/development/primary/data-plane/digitalletters/queue', + type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1', + time: '2026-01-19T12:00:00Z', + data: { + // Missing messageReference, senderId, messageUri + }, + }, + }), + }, + ]); + + const response = await handler(invalidEvent); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + ]); + expect(mockFileScanner.scanFile).not.toHaveBeenCalled(); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error parsing queue entry', + }), + ); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Invalid record will be retried', + messageId: 'msg-001', + }), + ); + }); + + it('should process valid events and skip invalid ones in same batch', async () => { + mockFileScanner.scanFile.mockResolvedValue({ outcome: 'success' }); + + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: createValidItemDequeuedBody( + 'ref-001', + 'SENDER_001', + 's3://bucket/key1', + ), + }, + { + messageId: 'msg-002', + body: 'invalid json', + }, + { + messageId: 'msg-003', + body: createValidItemDequeuedBody( + 'ref-003', + 'SENDER_003', + 's3://bucket/key3', + ), + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-002' }, + ]); + expect(mockFileScanner.scanFile).toHaveBeenCalledTimes(2); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 2, + failureCount: 1, + }), + ); + }); + }); + + describe('empty batch handling', () => { + it('should handle empty records array', async () => { + const event = createSQSEvent([]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([]); + expect(mockFileScanner.scanFile).not.toHaveBeenCalled(); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Starting file scanner batch', + recordCount: 0, + }), + ); + }); + + it('should handle batch with all invalid records', async () => { + const event = createSQSEvent([ + { + messageId: 'msg-001', + body: 'invalid', + }, + { + messageId: 'msg-002', + body: 'also invalid', + }, + ]); + + const response = await handler(event); + + expect(response.batchItemFailures).toEqual([ + { itemIdentifier: 'msg-001' }, + { itemIdentifier: 'msg-002' }, + ]); + expect(mockFileScanner.scanFile).not.toHaveBeenCalled(); + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Completed file scanner batch', + successCount: 0, + failureCount: 2, + }), + ); + }); + }); +}); diff --git a/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts b/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts new file mode 100644 index 000000000..451d91ba4 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/__tests__/app/file-scanner.test.ts @@ -0,0 +1,288 @@ +import { FileScanner } from 'app/file-scanner'; +import { S3Client } from '@aws-sdk/client-s3'; +import { Logger } from 'utils'; +import { mockDeep } from 'jest-mock-extended'; +import * as utilsModule from 'utils'; + +const mockS3Client = mockDeep(); +const mockLogger = mockDeep(); + +jest.mock('utils', () => ({ + ...jest.requireActual('utils'), + getS3Object: jest.fn(), +})); + +const mockGetS3Object = utilsModule.getS3Object as jest.MockedFunction< + typeof utilsModule.getS3Object +>; + +describe('FileScanner', () => { + let fileScanner: FileScanner; + + beforeEach(() => { + jest.clearAllMocks(); + fileScanner = new FileScanner({ + documentReferenceBucket: 'test-doc-ref-bucket', + unscannedFilesBucket: 'test-unscanned-bucket', + unscannedFilesPathPrefix: 'dev', + s3Client: mockS3Client, + logger: mockLogger, + }); + }); + + describe('scanFile', () => { + const validMessageUri = + 's3://test-doc-ref-bucket/document-reference/test-ref-001'; + const validMetadata = { + messageReference: 'test-ref-001', + senderId: 'SENDER_001', + createdAt: '2026-01-19T12:00:00Z', + }; + + const validDocumentReference = { + resourceType: 'DocumentReference', + id: 'test-id', + content: [ + { + attachment: { + contentType: 'application/pdf', + data: Buffer.from('test pdf content').toString('base64'), + }, + }, + ], + }; + + it('should successfully extract PDF and upload to unscanned bucket', async () => { + mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference)); + (mockS3Client.send as any).mockResolvedValue({}); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('success'); + expect(result.errorMessage).toBeUndefined(); + + // Verify DocumentReference was retrieved + expect(mockGetS3Object).toHaveBeenCalledWith({ + Bucket: 'test-doc-ref-bucket', + Key: 'document-reference/test-ref-001', + }); + + // Verify PDF was uploaded + expect(mockS3Client.send).toHaveBeenCalledWith( + expect.objectContaining({ + input: expect.objectContaining({ + Bucket: 'test-unscanned-bucket', + Key: 'dev/test-ref-001.pdf', + ContentType: 'application/pdf', + Metadata: { + messageReference: 'test-ref-001', + senderId: 'SENDER_001', + createdAt: '2026-01-19T12:00:00Z', + }, + }), + }), + ); + + // Verify logging + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Starting file scan', + messageUri: validMessageUri, + }), + ); + + expect(mockLogger.info).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Successfully processed file for scanning', + messageReference: 'test-ref-001', + }), + ); + }); + + it('should handle invalid S3 URI format', async () => { + const invalidUri = 'not-an-s3-uri'; + + const result = await fileScanner.scanFile(invalidUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('Invalid S3 URI format'); + + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error processing file for scanning', + messageReference: 'test-ref-001', + }), + ); + }); + + it('should handle S3 URI without key', async () => { + const invalidUri = 's3://bucket-only/'; + + const result = await fileScanner.scanFile(invalidUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('Invalid S3 URI format'); + }); + + it('should handle missing DocumentReference content', async () => { + const invalidDocRef = { + resourceType: 'DocumentReference', + id: 'test-id', + content: [], + }; + + mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef)); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('DocumentReference has no content'); + }); + + it('should handle undefined content array', async () => { + const invalidDocRef = { + resourceType: 'DocumentReference', + id: 'test-id', + }; + + mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef)); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('DocumentReference has no content'); + }); + + it('should handle missing attachment in DocumentReference', async () => { + const invalidDocRef = { + resourceType: 'DocumentReference', + id: 'test-id', + content: [{}], + }; + + mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef)); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain( + 'DocumentReference content has no attachment data', + ); + }); + + it('should handle missing attachment data', async () => { + const invalidDocRef = { + resourceType: 'DocumentReference', + id: 'test-id', + content: [ + { + attachment: { + contentType: 'application/pdf', + }, + }, + ], + }; + + mockGetS3Object.mockResolvedValue(JSON.stringify(invalidDocRef)); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain( + 'DocumentReference content has no attachment data', + ); + }); + + it('should handle S3 getObject error', async () => { + mockGetS3Object.mockRejectedValue(new Error('S3 access denied')); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('S3 access denied'); + + expect(mockLogger.error).toHaveBeenCalledWith( + expect.objectContaining({ + description: 'Error processing file for scanning', + err: expect.objectContaining({ + message: 'S3 access denied', + }), + }), + ); + }); + + it('should handle S3 putObject error', async () => { + mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference)); + (mockS3Client.send as any).mockRejectedValue( + new Error('S3 upload failed'), + ); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toContain('S3 upload failed'); + }); + + it('should handle invalid JSON in DocumentReference', async () => { + mockGetS3Object.mockResolvedValue('invalid json {'); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toBeDefined(); + }); + + it('should handle non-Error exceptions', async () => { + mockGetS3Object.mockRejectedValue('string error'); + + const result = await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(result.outcome).toBe('failed'); + expect(result.errorMessage).toBe('string error'); + }); + + it('should correctly build unscanned file key with environment prefix', async () => { + mockGetS3Object.mockResolvedValue(JSON.stringify(validDocumentReference)); + (mockS3Client.send as any).mockResolvedValue({}); + + await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(mockS3Client.send).toHaveBeenCalledWith( + expect.objectContaining({ + input: expect.objectContaining({ + Key: 'dev/test-ref-001.pdf', + }), + }), + ); + }); + + it('should decode base64 PDF correctly', async () => { + const pdfContent = 'This is a test PDF content'; + const docRef = { + resourceType: 'DocumentReference', + id: 'test-id', + content: [ + { + attachment: { + contentType: 'application/pdf', + data: Buffer.from(pdfContent).toString('base64'), + }, + }, + ], + }; + + mockGetS3Object.mockResolvedValue(JSON.stringify(docRef)); + (mockS3Client.send as any).mockResolvedValue({}); + + await fileScanner.scanFile(validMessageUri, validMetadata); + + expect(mockS3Client.send).toHaveBeenCalledWith( + expect.objectContaining({ + input: expect.objectContaining({ + Body: Buffer.from(pdfContent), + }), + }), + ); + }); + }); +}); diff --git a/lambdas/file-scanner-lambda/src/__tests__/container.test.ts b/lambdas/file-scanner-lambda/src/__tests__/container.test.ts new file mode 100644 index 000000000..f4275bab7 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/__tests__/container.test.ts @@ -0,0 +1,56 @@ +import { createContainer } from 'container'; +import * as configModule from 'infra/config'; + +jest.mock('infra/config'); + +const mockLoadConfig = configModule.loadConfig as jest.MockedFunction< + typeof configModule.loadConfig +>; + +describe('createContainer', () => { + beforeEach(() => { + jest.clearAllMocks(); + }); + + it('should create container with all dependencies', () => { + mockLoadConfig.mockReturnValue({ + documentReferenceBucket: 'test-doc-ref-bucket', + unscannedFilesBucket: 'test-unscanned-bucket', + unscannedFilesPathPrefix: 'dev', + eventPublisherEventBusArn: + 'arn:aws:events:us-east-1:123456789012:event-bus/test', + eventPublisherDlqUrl: 'https://sqs.us-east-1.amazonaws.com/dlq', + }); + + const container = createContainer(); + + expect(container).toHaveProperty('eventPublisher'); + expect(container).toHaveProperty('logger'); + expect(container).toHaveProperty('fileScanner'); + expect(mockLoadConfig).toHaveBeenCalledTimes(1); + }); + + it('should call loadConfig to get configuration', () => { + const mockConfig = { + documentReferenceBucket: 'test-bucket', + unscannedFilesBucket: 'test-unscanned', + unscannedFilesPathPrefix: 'dev', + eventPublisherEventBusArn: 'arn:test', + eventPublisherDlqUrl: 'url:test', + }; + + mockLoadConfig.mockReturnValue(mockConfig); + + createContainer(); + + expect(mockLoadConfig).toHaveBeenCalled(); + }); + + it('should propagate config errors', () => { + mockLoadConfig.mockImplementation(() => { + throw new Error('Missing required config'); + }); + + expect(() => createContainer()).toThrow('Missing required config'); + }); +}); diff --git a/lambdas/file-scanner-lambda/src/__tests__/index.test.ts b/lambdas/file-scanner-lambda/src/__tests__/index.test.ts new file mode 100644 index 000000000..00232a5e9 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/__tests__/index.test.ts @@ -0,0 +1,26 @@ +// Set environment variables before any imports +process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; +process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; +process.env.UNSCANNED_FILES_PATH_PREFIX = 'test-prefix'; +process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = + 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus'; +process.env.EVENT_PUBLISHER_DLQ_URL = + 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq'; + +// eslint-disable-next-line import-x/first +import { handler } from '..'; + +describe('Lambda Handler', () => { + afterAll(() => { + delete process.env.DOCUMENT_REFERENCE_BUCKET; + delete process.env.UNSCANNED_FILES_BUCKET; + delete process.env.UNSCANNED_FILES_PATH_PREFIX; + delete process.env.EVENT_PUBLISHER_EVENT_BUS_ARN; + delete process.env.EVENT_PUBLISHER_DLQ_URL; + }); + + it('should export handler function', () => { + expect(handler).toBeDefined(); + expect(typeof handler).toBe('function'); + }); +}); diff --git a/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts b/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts new file mode 100644 index 000000000..825baa305 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/__tests__/infra/config.test.ts @@ -0,0 +1,95 @@ +import { loadConfig } from 'infra/config'; + +describe('loadConfig', () => { + const originalEnv = process.env; + + beforeEach(() => { + jest.resetModules(); + process.env = { ...originalEnv }; + }); + + afterAll(() => { + process.env = originalEnv; + }); + + it('should load valid configuration', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = + 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus'; + process.env.EVENT_PUBLISHER_DLQ_URL = + 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq'; + + const config = loadConfig(); + + expect(config).toEqual({ + documentReferenceBucket: 'test-doc-ref-bucket', + unscannedFilesBucket: 'test-unscanned-bucket', + unscannedFilesPathPrefix: 'dev', + eventPublisherEventBusArn: + 'arn:aws:events:us-east-1:123456789012:event-bus/test-bus', + eventPublisherDlqUrl: + 'https://sqs.us-east-1.amazonaws.com/123456789012/test-dlq', + }); + }); + + it('should throw error when DOCUMENT_REFERENCE_BUCKET is missing', () => { + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test'; + process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq'; + + expect(() => loadConfig()).toThrow('DOCUMENT_REFERENCE_BUCKET is not set'); + }); + + it('should throw error when UNSCANNED_FILES_BUCKET is missing', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test'; + process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq'; + + expect(() => loadConfig()).toThrow('UNSCANNED_FILES_BUCKET is not set'); + }); + + it('should throw error when UNSCANNED_FILES_PATH_PREFIX is missing', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test'; + process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq'; + + expect(() => loadConfig()).toThrow( + 'UNSCANNED_FILES_PATH_PREFIX is not set', + ); + }); + + it('should throw error when EVENT_PUBLISHER_EVENT_BUS_ARN is missing', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq'; + + expect(() => loadConfig()).toThrow( + 'EVENT_PUBLISHER_EVENT_BUS_ARN is not set', + ); + }); + + it('should throw error when EVENT_PUBLISHER_DLQ_URL is missing', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = 'test-doc-ref-bucket'; + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test'; + + expect(() => loadConfig()).toThrow('EVENT_PUBLISHER_DLQ_URL is not set'); + }); + + it('should handle empty string values as missing', () => { + process.env.DOCUMENT_REFERENCE_BUCKET = ''; + process.env.UNSCANNED_FILES_BUCKET = 'test-unscanned-bucket'; + process.env.UNSCANNED_FILES_PATH_PREFIX = 'dev'; + process.env.EVENT_PUBLISHER_EVENT_BUS_ARN = 'arn:aws:events:test'; + process.env.EVENT_PUBLISHER_DLQ_URL = 'https://sqs.test.com/dlq'; + + expect(() => loadConfig()).toThrow('DOCUMENT_REFERENCE_BUCKET is not set'); + }); +}); diff --git a/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts new file mode 100644 index 000000000..d8b7b7062 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/apis/sqs-handler.ts @@ -0,0 +1,127 @@ +import { FileScanner } from 'app/file-scanner'; +import type { + SQSBatchItemFailure, + SQSBatchResponse, + SQSEvent, +} from 'aws-lambda'; +import { ItemDequeued } from 'digital-letters-events'; +import itemDequeuedValidator from 'digital-letters-events/ItemDequeued.js'; +import { EventPublisher, Logger } from 'utils'; + +export interface HandlerDependencies { + eventPublisher: EventPublisher; + logger: Logger; + fileScanner: FileScanner; +} + +type ValidatedRecord = { + messageId: string; + event: ItemDequeued; +}; + +function validateRecord( + { body, messageId }: { body: string; messageId: string }, + logger: Logger, +): ValidatedRecord | null { + try { + const sqsEventBody = JSON.parse(body); + const sqsEventDetail = sqsEventBody.detail; + + const isEventValid = itemDequeuedValidator(sqsEventDetail); + if (!isEventValid) { + logger.warn({ + err: itemDequeuedValidator.errors, + description: 'Error parsing queue entry', + }); + + return null; + } + + return { messageId, event: sqsEventDetail }; + } catch (error) { + logger.warn({ + err: error, + description: 'Error parsing SQS record', + }); + return null; + } +} + +async function processRecord( + validatedRecord: ValidatedRecord, + { fileScanner, logger }: HandlerDependencies, +): Promise { + const { event } = validatedRecord; + + logger.info({ + description: 'Processing ItemDequeued event', + eventId: event.id, + messageReference: event.data.messageReference, + senderId: event.data.senderId, + }); + + const result = await fileScanner.scanFile(event.data.messageUri, { + messageReference: event.data.messageReference, + senderId: event.data.senderId, + createdAt: event.time, + }); + + if (result.outcome === 'failed') { + throw new Error( + `Failed to process file for scanning: ${result.errorMessage}`, + ); + } +} + +export function createHandler(dependencies: HandlerDependencies) { + return async function handler(event: SQSEvent): Promise { + const { logger } = dependencies; + + logger.info({ + description: 'Starting file scanner batch', + recordCount: event.Records.length, + }); + + const itemFailures: SQSBatchItemFailure[] = []; + + for (const record of event.Records) { + const validatedRecord = validateRecord(record, logger); + + if (validatedRecord) { + try { + await processRecord(validatedRecord, dependencies); + } catch (error) { + logger.error({ + description: 'Error processing record', + err: + error instanceof Error + ? { + message: error.message, + name: error.name, + stack: error.stack, + } + : error, + messageId: validatedRecord.messageId, + }); + + itemFailures.push({ itemIdentifier: validatedRecord.messageId }); + } + } else { + // Validation failed - return to queue for retry/DLQ + logger.warn({ + description: 'Invalid record will be retried', + messageId: record.messageId, + }); + itemFailures.push({ itemIdentifier: record.messageId }); + } + } + + logger.info({ + description: 'Completed file scanner batch', + successCount: event.Records.length - itemFailures.length, + failureCount: itemFailures.length, + }); + + return { batchItemFailures: itemFailures }; + }; +} diff --git a/lambdas/file-scanner-lambda/src/app/file-scanner.ts b/lambdas/file-scanner-lambda/src/app/file-scanner.ts new file mode 100644 index 000000000..3eabde7a5 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/app/file-scanner.ts @@ -0,0 +1,172 @@ +import { PutObjectCommand, S3Client } from '@aws-sdk/client-s3'; +import { Logger, getS3Object } from 'utils'; + +export interface DocumentReference { + resourceType: string; + id: string; + content?: { + attachment?: { + contentType?: string; + data?: string; + }; + }[]; +} + +export interface FileScannerDependencies { + documentReferenceBucket: string; + unscannedFilesBucket: string; + unscannedFilesPathPrefix: string; + s3Client: S3Client; + logger: Logger; +} + +export interface ScanFileMetadata { + messageReference: string; + senderId: string; + createdAt: string; +} + +export type ScanFileResult = { + outcome: 'success' | 'failed'; + errorMessage?: string; +}; + +export class FileScanner { + private readonly documentReferenceBucket: string; + + private readonly unscannedFilesBucket: string; + + private readonly unscannedFilesPathPrefix: string; + + private readonly s3Client: S3Client; + + private readonly logger: Logger; + + constructor({ + documentReferenceBucket, + logger, + s3Client, + unscannedFilesBucket, + unscannedFilesPathPrefix, + }: FileScannerDependencies) { + this.documentReferenceBucket = documentReferenceBucket; + this.unscannedFilesBucket = unscannedFilesBucket; + this.unscannedFilesPathPrefix = unscannedFilesPathPrefix; + this.s3Client = s3Client; + this.logger = logger; + } + + async scanFile( + messageUri: string, + metadata: ScanFileMetadata, + ): Promise { + try { + this.logger.info({ + description: 'Starting file scan', + messageUri, + messageReference: metadata.messageReference, + senderId: metadata.senderId, + }); + + const documentReferenceKey = FileScanner.extractS3Key(messageUri); + + const documentReferenceJson = await getS3Object({ + Bucket: this.documentReferenceBucket, + Key: documentReferenceKey, + }); + + const documentReference: DocumentReference = JSON.parse( + documentReferenceJson, + ); + + const pdfBase64 = + FileScanner.extractPdfFromDocumentReference(documentReference); + const pdfBuffer = Buffer.from(pdfBase64, 'base64'); + + const unscannedFileKey = this.buildUnscannedFileKey( + metadata.messageReference, + ); + + await this.uploadToUnscannedBucket(unscannedFileKey, pdfBuffer, metadata); + + this.logger.info({ + description: 'Successfully processed file for scanning', + messageReference: metadata.messageReference, + senderId: metadata.senderId, + unscannedFileKey, + }); + + return { outcome: 'success' }; + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + + this.logger.error({ + description: 'Error processing file for scanning', + err: + error instanceof Error + ? { message: error.message, name: error.name, stack: error.stack } + : error, + messageReference: metadata.messageReference, + senderId: metadata.senderId, + }); + + return { outcome: 'failed', errorMessage }; + } + } + + private static extractS3Key(messageUri: string): string { + const regex = /^s3:\/\/[^/]+\/(.+)$/; + const match = regex.exec(messageUri); + if (!match) { + throw new Error(`Invalid S3 URI format: ${messageUri}`); + } + return match[1]; + } + + private static extractPdfFromDocumentReference( + documentReference: DocumentReference, + ): string { + const { content } = documentReference; + if (!content || content.length === 0) { + throw new Error('DocumentReference has no content'); + } + + const { attachment } = content[0]; + if (!attachment || !attachment.data) { + throw new Error('DocumentReference content has no attachment data'); + } + + return attachment.data; + } + + private buildUnscannedFileKey(messageReference: string): string { + return `${this.unscannedFilesPathPrefix}/${messageReference}.pdf`; + } + + private async uploadToUnscannedBucket( + key: string, + pdfBuffer: Buffer, + metadata: ScanFileMetadata, + ): Promise { + const params = { + Bucket: this.unscannedFilesBucket, + Key: key, + Body: pdfBuffer, + ContentType: 'application/pdf', + Metadata: { + messageReference: metadata.messageReference, + senderId: metadata.senderId, + createdAt: metadata.createdAt, + }, + }; + + await this.s3Client.send(new PutObjectCommand(params)); + + this.logger.info({ + description: 'PDF uploaded to unscanned files bucket', + bucket: this.unscannedFilesBucket, + key, + }); + } +} diff --git a/lambdas/file-scanner-lambda/src/container.ts b/lambdas/file-scanner-lambda/src/container.ts new file mode 100644 index 000000000..106871151 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/container.ts @@ -0,0 +1,40 @@ +import { HandlerDependencies } from 'apis/sqs-handler'; +import { FileScanner } from 'app/file-scanner'; +import { loadConfig } from 'infra/config'; +import { + EventPublisher, + eventBridgeClient, + logger, + s3Client, + sqsClient, +} from 'utils'; + +export const createContainer = (): HandlerDependencies => { + const { + documentReferenceBucket, + eventPublisherDlqUrl, + eventPublisherEventBusArn, + unscannedFilesBucket, + unscannedFilesPathPrefix, + } = loadConfig(); + + const eventPublisher = new EventPublisher({ + eventBusArn: eventPublisherEventBusArn, + dlqUrl: eventPublisherDlqUrl, + logger, + sqsClient, + eventBridgeClient, + }); + + const fileScanner = new FileScanner({ + documentReferenceBucket, + unscannedFilesBucket, + unscannedFilesPathPrefix, + s3Client, + logger, + }); + + return { eventPublisher, logger, fileScanner }; +}; + +export default createContainer; diff --git a/lambdas/file-scanner-lambda/src/index.ts b/lambdas/file-scanner-lambda/src/index.ts new file mode 100644 index 000000000..f25a80861 --- /dev/null +++ b/lambdas/file-scanner-lambda/src/index.ts @@ -0,0 +1,6 @@ +import { createHandler } from 'apis/sqs-handler'; +import { createContainer } from 'container'; + +export const handler = createHandler(createContainer()); + +export default handler; diff --git a/lambdas/file-scanner-lambda/src/infra/config.ts b/lambdas/file-scanner-lambda/src/infra/config.ts new file mode 100644 index 000000000..802931add --- /dev/null +++ b/lambdas/file-scanner-lambda/src/infra/config.ts @@ -0,0 +1,52 @@ +import { logger } from 'utils'; + +export interface Config { + documentReferenceBucket: string; + unscannedFilesBucket: string; + unscannedFilesPathPrefix: string; + eventPublisherEventBusArn: string; + eventPublisherDlqUrl: string; +} + +export function loadConfig(): Config { + const documentReferenceBucket = process.env.DOCUMENT_REFERENCE_BUCKET; + const unscannedFilesBucket = process.env.UNSCANNED_FILES_BUCKET; + const unscannedFilesPathPrefix = process.env.UNSCANNED_FILES_PATH_PREFIX; + const eventPublisherEventBusArn = process.env.EVENT_PUBLISHER_EVENT_BUS_ARN; + const eventPublisherDlqUrl = process.env.EVENT_PUBLISHER_DLQ_URL; + + if (!documentReferenceBucket) { + throw new Error('DOCUMENT_REFERENCE_BUCKET is not set'); + } + + if (!unscannedFilesBucket) { + throw new Error('UNSCANNED_FILES_BUCKET is not set'); + } + + if (!unscannedFilesPathPrefix) { + throw new Error('UNSCANNED_FILES_PATH_PREFIX is not set'); + } + + if (!eventPublisherEventBusArn) { + throw new Error('EVENT_PUBLISHER_EVENT_BUS_ARN is not set'); + } + + if (!eventPublisherDlqUrl) { + throw new Error('EVENT_PUBLISHER_DLQ_URL is not set'); + } + + logger.info({ + description: 'Configuration loaded', + documentReferenceBucket, + unscannedFilesBucket, + unscannedFilesPathPrefix, + }); + + return { + documentReferenceBucket, + unscannedFilesBucket, + unscannedFilesPathPrefix, + eventPublisherEventBusArn, + eventPublisherDlqUrl, + }; +} diff --git a/lambdas/file-scanner-lambda/tsconfig.json b/lambdas/file-scanner-lambda/tsconfig.json new file mode 100644 index 000000000..c2c957f53 --- /dev/null +++ b/lambdas/file-scanner-lambda/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "baseUrl": "src" + }, + "exclude": [ + "node_modules" + ], + "extends": "@tsconfig/node22/tsconfig.json", + "include": [ + "src/**/*", + "jest.config.ts" + ] +} diff --git a/package-lock.json b/package-lock.json index af9f46e08..514e05686 100644 --- a/package-lock.json +++ b/package-lock.json @@ -8,6 +8,7 @@ "name": "nhs-notify-digital-letters", "version": "0.0.1", "workspaces": [ + "lambdas/file-scanner-lambda", "lambdas/key-generation", "lambdas/refresh-apim-access-token", "lambdas/pdm-mock-lambda", @@ -157,6 +158,77 @@ "typescript": "^3.0.0 || ^4.0.0 || ^5.0.0" } }, + "lambdas/file-scanner-lambda": { + "name": "nhs-notify-digital-letters-file-scanner-lambda", + "version": "0.0.1", + "dependencies": { + "@aws-sdk/client-s3": "^3.908.0", + "aws-lambda": "^1.0.7", + "digital-letters-events": "^0.0.1", + "utils": "^0.0.1" + }, + "devDependencies": { + "@tsconfig/node22": "^22.0.2", + "@types/aws-lambda": "^8.10.155", + "@types/jest": "^29.5.14", + "jest": "^29.7.0", + "jest-mock-extended": "^3.0.7", + "typescript": "^5.9.3" + } + }, + "lambdas/file-scanner-lambda/node_modules/@types/jest": { + "version": "29.5.14", + "resolved": "https://registry.npmjs.org/@types/jest/-/jest-29.5.14.tgz", + "integrity": "sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "expect": "^29.0.0", + "pretty-format": "^29.0.0" + } + }, + "lambdas/file-scanner-lambda/node_modules/jest": { + "version": "29.7.0", + "resolved": "https://registry.npmjs.org/jest/-/jest-29.7.0.tgz", + "integrity": "sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw==", + "dev": true, + "license": "MIT", + "peer": true, + "dependencies": { + "@jest/core": "^29.7.0", + "@jest/types": "^29.6.3", + "import-local": "^3.0.2", + "jest-cli": "^29.7.0" + }, + "bin": { + "jest": "bin/jest.js" + }, + "engines": { + "node": "^14.15.0 || ^16.10.0 || >=18.0.0" + }, + "peerDependencies": { + "node-notifier": "^8.0.1 || ^9.0.0 || ^10.0.0" + }, + "peerDependenciesMeta": { + "node-notifier": { + "optional": true + } + } + }, + "lambdas/file-scanner-lambda/node_modules/jest-mock-extended": { + "version": "3.0.7", + "resolved": "https://registry.npmjs.org/jest-mock-extended/-/jest-mock-extended-3.0.7.tgz", + "integrity": "sha512-7lsKdLFcW9B9l5NzZ66S/yTQ9k8rFtnwYdCNuRU/81fqDWicNDVhitTSPnrGmNeNm0xyw0JHexEOShrIKRCIRQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "ts-essentials": "^10.0.0" + }, + "peerDependencies": { + "jest": "^24.0.0 || ^25.0.0 || ^26.0.0 || ^27.0.0 || ^28.0.0 || ^29.0.0", + "typescript": "^3.0.0 || ^4.0.0 || ^5.0.0" + } + }, "lambdas/key-generation": { "version": "0.0.1", "dependencies": { @@ -12556,6 +12628,10 @@ "resolved": "lambdas/core-notifier-lambda", "link": true }, + "node_modules/nhs-notify-digital-letters-file-scanner-lambda": { + "resolved": "lambdas/file-scanner-lambda", + "link": true + }, "node_modules/nhs-notify-digital-letters-integration-tests": { "resolved": "tests/playwright", "link": true diff --git a/package.json b/package.json index 84f4f8a14..7a1ee60da 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ }, "version": "0.0.1", "workspaces": [ + "lambdas/file-scanner-lambda", "lambdas/key-generation", "lambdas/refresh-apim-access-token", "lambdas/pdm-mock-lambda", diff --git a/scripts/docker/examples/python/.tool-versions.example b/scripts/docker/examples/python/.tool-versions.example deleted file mode 100644 index 920931162..000000000 --- a/scripts/docker/examples/python/.tool-versions.example +++ /dev/null @@ -1,2 +0,0 @@ -# python, SEE: https://hub.docker.com/_/python/tags -# docker/python 3.11.4-alpine3.18@sha256:0135ae6442d1269379860b361760ad2cf6ab7c403d21935a8015b48d5bf78a86 diff --git a/scripts/docker/examples/python/Dockerfile b/scripts/docker/examples/python/Dockerfile deleted file mode 100644 index d0780aa41..000000000 --- a/scripts/docker/examples/python/Dockerfile +++ /dev/null @@ -1,33 +0,0 @@ -# `*:latest` will be replaced with a corresponding version stored in the '.tool-versions' file -# hadolint ignore=DL3007 -FROM python:latest as base - -# === Builder ================================================================== - -FROM base AS builder -COPY ./assets/hello_world/requirements.txt /requirements.txt -WORKDIR /packages -RUN set -eux; \ - \ - # Install dependencies - pip install \ - --requirement /requirements.txt \ - --prefix=/packages \ - --no-warn-script-location \ - --no-cache-dir - -# === Runtime ================================================================== - -FROM base -ENV \ - LANG="C.UTF-8" \ - LC_ALL="C.UTF-8" \ - PYTHONDONTWRITEBYTECODE="1" \ - PYTHONUNBUFFERED="1" \ - TZ="UTC" -COPY --from=builder /packages /usr/local -COPY ./assets/hello_world /hello_world -WORKDIR /hello_world -USER nobody -CMD [ "python", "app.py" ] -EXPOSE 8000 diff --git a/scripts/docker/examples/python/Dockerfile.effective b/scripts/docker/examples/python/Dockerfile.effective deleted file mode 100644 index 3f1ea6b07..000000000 --- a/scripts/docker/examples/python/Dockerfile.effective +++ /dev/null @@ -1,54 +0,0 @@ -# `*:latest` will be replaced with a corresponding version stored in the '.tool-versions' file -FROM python:3.11.4-alpine3.18@sha256:0135ae6442d1269379860b361760ad2cf6ab7c403d21935a8015b48d5bf78a86 as base - -# === Builder ================================================================== - -FROM base AS builder -COPY ./assets/hello_world/requirements.txt /requirements.txt -WORKDIR /packages -RUN set -eux; \ - \ - # Install dependencies - pip install \ - --requirement /requirements.txt \ - --prefix=/packages \ - --no-warn-script-location \ - --no-cache-dir - -# === Runtime ================================================================== - -FROM base -ENV \ - LANG="C.UTF-8" \ - LC_ALL="C.UTF-8" \ - PYTHONDONTWRITEBYTECODE="1" \ - PYTHONUNBUFFERED="1" \ - TZ="UTC" -COPY --from=builder /packages /usr/local -COPY ./assets/hello_world /hello_world -WORKDIR /hello_world -USER nobody -CMD [ "python", "app.py" ] -EXPOSE 8000 - -# === Metadata ================================================================= - -ARG IMAGE -ARG TITLE -ARG DESCRIPTION -ARG LICENCE -ARG GIT_URL -ARG GIT_BRANCH -ARG GIT_COMMIT_HASH -ARG BUILD_DATE -ARG BUILD_VERSION -LABEL \ - org.opencontainers.image.base.name=$IMAGE \ - org.opencontainers.image.title="$TITLE" \ - org.opencontainers.image.description="$DESCRIPTION" \ - org.opencontainers.image.licenses="$LICENCE" \ - org.opencontainers.image.url=$GIT_URL \ - org.opencontainers.image.ref.name=$GIT_BRANCH \ - org.opencontainers.image.revision=$GIT_COMMIT_HASH \ - org.opencontainers.image.created=$BUILD_DATE \ - org.opencontainers.image.version=$BUILD_VERSION diff --git a/scripts/docker/examples/python/VERSION b/scripts/docker/examples/python/VERSION deleted file mode 100644 index 8acdd82b7..000000000 --- a/scripts/docker/examples/python/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.0.1 diff --git a/scripts/docker/examples/python/assets/hello_world/app.py b/scripts/docker/examples/python/assets/hello_world/app.py deleted file mode 100644 index 4844e89cb..000000000 --- a/scripts/docker/examples/python/assets/hello_world/app.py +++ /dev/null @@ -1,12 +0,0 @@ -from flask import Flask -from flask_wtf.csrf import CSRFProtect - -app = Flask(__name__) -csrf = CSRFProtect() -csrf.init_app(app) - -@app.route("/") -def index(): - return "Hello World!" - -app.run(host='0.0.0.0', port=8000) diff --git a/scripts/docker/examples/python/assets/hello_world/requirements.txt b/scripts/docker/examples/python/assets/hello_world/requirements.txt deleted file mode 100644 index fd7fb3118..000000000 --- a/scripts/docker/examples/python/assets/hello_world/requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -blinker==1.6.2 -click==8.1.7 -Flask-WTF==1.2.0 -Flask==2.3.3 -itsdangerous==2.1.2 -Jinja2==3.1.6 -MarkupSafe==2.1.3 -pip==25.3 -setuptools==78.1.1 -Werkzeug==3.1.4 -wheel==0.46.2 -WTForms==3.0.1 diff --git a/scripts/docker/examples/python/tests/goss.yaml b/scripts/docker/examples/python/tests/goss.yaml deleted file mode 100644 index 589db37bf..000000000 --- a/scripts/docker/examples/python/tests/goss.yaml +++ /dev/null @@ -1,8 +0,0 @@ -package: - python: - installed: true - -command: - pip list | grep -i flask: - exit-status: 0 - timeout: 60000 diff --git a/tests/playwright/constants/backend-constants.ts b/tests/playwright/constants/backend-constants.ts index d9d789130..fae4a1df9 100644 --- a/tests/playwright/constants/backend-constants.ts +++ b/tests/playwright/constants/backend-constants.ts @@ -19,6 +19,7 @@ export const TTL_DLQ_NAME = `${CSI}-ttl-dlq`; export const PDM_UPLOADER_DLQ_NAME = `${CSI}-pdm-uploader-dlq`; export const PDM_POLL_DLQ_NAME = `${CSI}-pdm-poll-dlq`; export const CORE_NOTIFIER_DLQ_NAME = `${CSI}-core-notifier-dlq`; +export const FILE_SCANNER_DLQ_NAME = `${CSI}-scanner-dlq`; export const PRINT_STATUS_HANDLER_DLQ_NAME = `${CSI}-print-status-handler-dlq`; export const HANDLE_TTL_DLQ_NAME = `${CSI}-ttl-handle-expiry-errors-queue`; export const PRINT_ANALYSER_DLQ_NAME = `${CSI}-print-analyser-dlq`; @@ -42,5 +43,6 @@ export const FILE_SAFE_S3_BUCKET_NAME = `nhs-${process.env.AWS_ACCOUNT_ID}-${REG export const PDM_UPLOADER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-pdm-uploader`; export const PDM_POLL_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-pdm-poll`; export const CORE_NOTIFIER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-core-notifier`; +export const FILE_SCANNER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-file-scanner`; export const PRINT_STATUS_HANDLER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-print-status-handler`; export const PRINT_ANALYSER_LAMBDA_LOG_GROUP_NAME = `/aws/lambda/${CSI}-print-analyser`; diff --git a/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts b/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts new file mode 100644 index 000000000..8469b9a50 --- /dev/null +++ b/tests/playwright/digital-letters-component-tests/file-scanner.component.spec.ts @@ -0,0 +1,148 @@ +import { expect, test } from '@playwright/test'; +import { ENV, REGION } from 'constants/backend-constants'; +import itemDequeuedValidator from 'digital-letters-events/ItemDequeued.js'; +import eventPublisher from 'helpers/event-bus-helpers'; +import expectToPassEventually from 'helpers/expectations'; +import { v4 as uuidv4 } from 'uuid'; +import { + getS3ObjectBufferFromUri, + getS3ObjectMetadata, + putDataS3, +} from 'utils'; + +const DOCUMENT_REFERENCE_BUCKET = `nhs-${process.env.AWS_ACCOUNT_ID}-${REGION}-${ENV}-dl-pii-data`; +const UNSCANNED_FILES_BUCKET = `nhs-${process.env.AWS_ACCOUNT_ID}-${REGION}-main-acct-digi-unscanned-files`; + +test.describe('File Scanner', () => { + test.beforeAll(async () => { + test.setTimeout(250_000); + }); + + test('should extract PDF from DocumentReference and store in unscanned bucket with metadata', async () => { + const messageReference = uuidv4(); + const senderId = 'TEST_SENDER_001'; + const documentReferenceKey = messageReference; + + const pdfContent = Buffer.from('Sample PDF content for test'); + const documentReference = { + resourceType: 'DocumentReference', + id: messageReference, + content: [ + { + attachment: { + contentType: 'application/pdf', + data: pdfContent.toString('base64'), + }, + }, + ], + }; + + await putDataS3(documentReference, { + Bucket: DOCUMENT_REFERENCE_BUCKET, + Key: documentReferenceKey, + }); + + const eventId = uuidv4(); + const messageUri = `s3://${DOCUMENT_REFERENCE_BUCKET}/${documentReferenceKey}`; + const eventTime = new Date().toISOString(); + + await eventPublisher.sendEvents( + [ + { + id: eventId, + specversion: '1.0', + source: `/nhs/england/notify/development/dev-1/data-plane/digitalletters/queue`, + subject: `message/${messageReference}`, + type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1', + time: eventTime, + recordedtime: eventTime, + severitynumber: 2, + traceparent: + '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', + datacontenttype: 'application/json', + dataschema: + 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json', + severitytext: 'INFO', + data: { + messageReference, + senderId, + messageUri, + }, + }, + ], + itemDequeuedValidator, + ); + + await expectToPassEventually(async () => { + const expectedKey = `${ENV}/${messageReference}.pdf`; + const expectedUri = `s3://${UNSCANNED_FILES_BUCKET}/${expectedKey}`; + + const storedPdf = await getS3ObjectBufferFromUri(expectedUri); + expect(storedPdf).toBeDefined(); + expect(storedPdf.toString()).toEqual(pdfContent.toString()); + + const metadata = await getS3ObjectMetadata({ + Bucket: UNSCANNED_FILES_BUCKET, + Key: expectedKey, + }); + expect(metadata).toBeDefined(); + expect(metadata?.messagereference).toEqual(messageReference); + expect(metadata?.senderid).toEqual(senderId); + expect(metadata?.createdat).toBeDefined(); + }, 120); + }); + + test('should handle validation errors by sending messages to DLQ', async () => { + const messageReference = uuidv4(); + const senderId = 'TEST_SENDER_002'; + const documentReferenceKey = `document-reference/${messageReference}`; + + const documentReference = { + resourceType: 'DocumentReference', + id: messageReference, + content: [], + }; + + await putDataS3(documentReference, { + Bucket: DOCUMENT_REFERENCE_BUCKET, + Key: documentReferenceKey, + }); + + const eventId = uuidv4(); + const messageUri = `s3://${DOCUMENT_REFERENCE_BUCKET}/${documentReferenceKey}`; + const eventTime = new Date().toISOString(); + + await eventPublisher.sendEvents( + [ + { + id: eventId, + specversion: '1.0', + source: `/nhs/england/notify/development/dev-1/data-plane/digitalletters/queue`, + subject: `message/${messageReference}`, + type: 'uk.nhs.notify.digital.letters.queue.item.dequeued.v1', + time: eventTime, + recordedtime: eventTime, + severitynumber: 2, + traceparent: + '00-0af7651916cd43dd8448eb211c80319c-b7ad6b7169203331-01', + datacontenttype: 'application/json', + dataschema: + 'https://notify.nhs.uk/cloudevents/schemas/digital-letters/2025-10-draft/data/digital-letters-queue-item-dequeued-data.schema.json', + severitytext: 'INFO', + data: { + messageReference, + senderId, + messageUri, + }, + }, + ], + itemDequeuedValidator, + ); + + await expectToPassEventually(async () => { + const expectedKey = `${ENV}/${messageReference}.pdf`; + const expectedUri = `s3://${UNSCANNED_FILES_BUCKET}/${expectedKey}`; + await expect(getS3ObjectBufferFromUri(expectedUri)).rejects.toThrow(); + }, 120); + }); +}); diff --git a/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts b/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts index 5a2d99c49..557599464 100644 --- a/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts +++ b/tests/playwright/digital-letters-component-tests/pdm-uploader.component.spec.ts @@ -9,9 +9,9 @@ import messageDownloadedValidator from 'digital-letters-events/MESHInboxMessageD import { getLogsFromCloudwatch } from 'helpers/cloudwatch-helpers'; import eventPublisher from 'helpers/event-bus-helpers'; import expectToPassEventually from 'helpers/expectations'; -import { uploadToS3 } from 'helpers/s3-helpers'; import { expectMessageContainingString, purgeQueue } from 'helpers/sqs-helpers'; import { v4 as uuidv4 } from 'uuid'; +import { putDataS3 } from 'utils'; const pdmRequest = { resourceType: 'DocumentReference', @@ -64,7 +64,10 @@ test.describe('Digital Letters - Upload to PDM', () => { const messageReference = uuidv4(); const senderId = 'test-sender-1'; - uploadToS3(JSON.stringify(pdmRequest), LETTERS_S3_BUCKET_NAME, resourceKey); + await putDataS3(pdmRequest, { + Bucket: LETTERS_S3_BUCKET_NAME, + Key: resourceKey, + }); await eventPublisher.sendEvents( [ @@ -120,11 +123,10 @@ test.describe('Digital Letters - Upload to PDM', () => { unexpectedField: 'I should not be here', }; - uploadToS3( - JSON.stringify(invalidPdmRequest), - LETTERS_S3_BUCKET_NAME, - resourceKey, - ); + await putDataS3(invalidPdmRequest, { + Bucket: LETTERS_S3_BUCKET_NAME, + Key: resourceKey, + }); await eventPublisher.sendEvents( [ diff --git a/tests/playwright/helpers/s3-helpers.ts b/tests/playwright/helpers/s3-helpers.ts deleted file mode 100644 index 2b6ec2e66..000000000 --- a/tests/playwright/helpers/s3-helpers.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { - ListBucketsCommand, - PutObjectCommand, - S3Client, -} from '@aws-sdk/client-s3'; - -const s3 = new S3Client({ - region: process.env.AWS_REGION || 'eu-west-2', -}); - -async function listBuckets(substring: string): Promise { - const resp = await s3.send(new ListBucketsCommand({})); - const buckets = resp.Buckets ?? []; - if (!substring) { - return buckets.map((b) => b.Name!).filter(Boolean); - } - const needle = substring.toLowerCase(); - return buckets - .map((b) => b.Name) - .filter( - (name): name is string => !!name && name.toLowerCase().includes(needle), - ); -} - -async function uploadToS3( - content: string, - bucket: string, - key: string, -): Promise { - await s3.send( - new PutObjectCommand({ - Bucket: bucket, - Key: key, - Body: content, - }), - ); -} - -export { listBuckets, uploadToS3 }; diff --git a/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts b/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts index d9cce7e03..aa6e70143 100644 --- a/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts +++ b/utils/utils/src/__tests__/s3-utils/get-object-s3.test.ts @@ -3,6 +3,7 @@ import { getS3Object, getS3ObjectBufferFromUri, getS3ObjectFromUri, + getS3ObjectMetadata, s3Client, } from '../../s3-utils'; @@ -259,3 +260,85 @@ describe('getS3ObjectBufferFromUri', () => { ); }); }); + +describe('getS3ObjectMetadata', () => { + afterEach(jest.resetAllMocks); + + it('Should retrieve metadata for object', async () => { + const metadata = { + messagereference: 'test-ref-001', + senderid: 'SENDER_001', + createdat: '2026-01-19T12:00:00Z', + }; + + s3Client.send = jest.fn().mockReturnValueOnce({ Metadata: metadata }); + + const s3Location = { + Bucket: 'bucket-name', + Key: 'test-file.pdf', + }; + + const result = await getS3ObjectMetadata(s3Location); + + expect(s3Client.send).toHaveBeenCalledWith( + expect.objectContaining({ + input: s3Location, + }), + ); + expect(result).toEqual(metadata); + }); + + it('Should retrieve metadata with version ID', async () => { + const metadata = { + customkey: 'customvalue', + }; + + s3Client.send = jest.fn().mockReturnValueOnce({ Metadata: metadata }); + + const s3Location = { + Bucket: 'bucket-name', + Key: 'versioned-file.json', + VersionId: 'version-123', + }; + + const result = await getS3ObjectMetadata(s3Location); + + expect(s3Client.send).toHaveBeenCalledWith( + expect.objectContaining({ + input: s3Location, + }), + ); + expect(result).toEqual(metadata); + }); + + it('Should throw an error if object not found', async () => { + s3Client.send = jest.fn().mockImplementationOnce(() => { + throw new Error('NoSuchKey'); + }); + + await expect( + getS3ObjectMetadata({ + Bucket: 'bucket-name', + Key: 'nonexistent.pdf', + }), + ).rejects.toThrow( + "Could not retrieve metadata from bucket 's3://bucket-name/nonexistent.pdf' from S3: NoSuchKey", + ); + }); + + it('Should handle error objects', async () => { + const error = new Error('Access Denied'); + s3Client.send = jest.fn().mockImplementationOnce(() => { + throw error; + }); + + await expect( + getS3ObjectMetadata({ + Bucket: 'bucket-name', + Key: 'forbidden.pdf', + }), + ).rejects.toThrow( + "Could not retrieve metadata from bucket 's3://bucket-name/forbidden.pdf' from S3: Access Denied", + ); + }); +}); diff --git a/utils/utils/src/s3-utils/get-object-s3.ts b/utils/utils/src/s3-utils/get-object-s3.ts index 9430b4729..8d541ed3f 100644 --- a/utils/utils/src/s3-utils/get-object-s3.ts +++ b/utils/utils/src/s3-utils/get-object-s3.ts @@ -1,5 +1,9 @@ import { type Readable } from 'node:stream'; -import { GetObjectCommand, GetObjectCommandOutput } from '@aws-sdk/client-s3'; +import { + GetObjectCommand, + GetObjectCommandOutput, + HeadObjectCommand, +} from '@aws-sdk/client-s3'; import { s3Client } from './s3-client'; export function isReadable( @@ -122,3 +126,24 @@ export async function getS3ObjectBufferFromUri(uri: string): Promise { ); } } + +export async function getS3ObjectMetadata( + location: S3Location, +): Promise | undefined> { + const { Bucket, Key, VersionId } = location; + try { + const response = await s3Client.send( + new HeadObjectCommand({ + Bucket, + Key, + VersionId, + }), + ); + return response.Metadata; + } catch (error) { + const msg = error instanceof Error ? error.message : String(error); + throw new Error( + `Could not retrieve metadata from bucket 's3://${Bucket}/${Key}' from S3: ${msg}`, + ); + } +}