Skip to content

Commit 38dde99

Browse files
feat: [google-cloud-documentai] A new message FoundationModelTuningOptions is added (#12319)
- [ ] Regenerate this pull request now. BEGIN_COMMIT_OVERRIDE feat: A new message FoundationModelTuningOptions is added feat: A new field foundation_model_tuning_options is added to message TrainProcessorVersionRequest feat: A new field `labels` is added to messages `ProcessRequest` and `BatchProcessRequest` feat: A new field `display_name` is added to message `DocumentSchema` fix: deprecate `Dataset.document_warehouse_config` docs: updated comments END_COMMIT_OVERRIDE PiperOrigin-RevId: 607358355 Source-Link: googleapis/googleapis@1da5299 Source-Link: googleapis/googleapis-gen@a931f91 Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLWRvY3VtZW50YWkvLk93bEJvdC55YW1sIiwiaCI6ImE5MzFmOTFjMDVhZDZjNDk1OGM1NDZhNTdkYjQyOThkYzYyMjc1N2YifQ== --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
1 parent 1988e0d commit 38dde99

9 files changed

Lines changed: 121 additions & 34 deletions

File tree

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/async_client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1963,7 +1963,9 @@ async def sample_create_processor():
19631963
processor (:class:`google.cloud.documentai_v1beta3.types.Processor`):
19641964
Required. The processor to be created, requires
19651965
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
1966-
and [Processor.display_name]][] to be set. Also, the
1966+
and
1967+
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
1968+
to be set. Also, the
19671969
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
19681970
field must be set if the processor is under CMEK.
19691971

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/services/document_processor_service/client.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2443,7 +2443,9 @@ def sample_create_processor():
24432443
processor (google.cloud.documentai_v1beta3.types.Processor):
24442444
Required. The processor to be created, requires
24452445
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
2446-
and [Processor.display_name]][] to be set. Also, the
2446+
and
2447+
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
2448+
to be set. Also, the
24472449
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
24482450
field must be set if the processor is under CMEK.
24492451

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/dataset.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,8 +54,8 @@ class Dataset(proto.Message):
5454
5555
This field is a member of `oneof`_ ``storage_source``.
5656
document_warehouse_config (google.cloud.documentai_v1beta3.types.Dataset.DocumentWarehouseConfig):
57-
Optional. Document AI Warehouse-based dataset
58-
configuration.
57+
Optional. Deprecated. Warehouse-based dataset
58+
configuration is not supported.
5959
6060
This field is a member of `oneof`_ ``storage_source``.
6161
unmanaged_dataset_config (google.cloud.documentai_v1beta3.types.Dataset.UnmanagedDatasetConfig):

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1491,8 +1491,10 @@ class PageRef(proto.Message):
14911491
[PageRef.bounding_poly][google.cloud.documentai.v1beta3.Document.PageAnchor.PageRef.bounding_poly]
14921492
instead.
14931493
bounding_poly (google.cloud.documentai_v1beta3.types.BoundingPoly):
1494-
Optional. Identifies the bounding polygon of
1495-
a layout element on the page.
1494+
Optional. Identifies the bounding polygon of a layout
1495+
element on the page. If ``layout_type`` is set, the bounding
1496+
polygon must be exactly the same to the layout element it's
1497+
referring to.
14961498
confidence (float):
14971499
Optional. Confidence of detected page element, if
14981500
applicable. Range ``[0, 1]``.

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_processor_service.py

Lines changed: 74 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,9 @@ class ProcessOptions(proto.Message):
116116
117117
This field is a member of `oneof`_ ``page_range``.
118118
ocr_config (google.cloud.documentai_v1beta3.types.OcrConfig):
119-
Only applicable to ``OCR_PROCESSOR``. Returns error if set
120-
on other processor types.
119+
Only applicable to ``OCR_PROCESSOR`` and
120+
``FORM_PARSER_PROCESSOR``. Returns error if set on other
121+
processor types.
121122
schema_override (google.cloud.documentai_v1beta3.types.DocumentSchema):
122123
Optional. Override the schema of the
123124
[ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion].
@@ -223,6 +224,15 @@ class ProcessRequest(proto.Message):
223224
``pages.{page_field_name}``.
224225
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
225226
Inference-time options for the process API
227+
labels (MutableMapping[str, str]):
228+
Optional. The labels with user-defined
229+
metadata for the request.
230+
Label keys and values can be no longer than 63
231+
characters (Unicode codepoints) and can only
232+
contain lowercase letters, numeric characters,
233+
underscores, and dashes. International
234+
characters are allowed. Label values are
235+
optional. Label keys must start with a letter.
226236
"""
227237

228238
inline_document: gcd_document.Document = proto.Field(
@@ -266,6 +276,11 @@ class ProcessRequest(proto.Message):
266276
number=7,
267277
message="ProcessOptions",
268278
)
279+
labels: MutableMapping[str, str] = proto.MapField(
280+
proto.STRING,
281+
proto.STRING,
282+
number=10,
283+
)
269284

270285

271286
class HumanReviewStatus(proto.Message):
@@ -398,6 +413,15 @@ class BatchProcessRequest(proto.Message):
398413
Default to ``false``.
399414
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
400415
Inference-time options for the process API
416+
labels (MutableMapping[str, str]):
417+
Optional. The labels with user-defined
418+
metadata for the request.
419+
Label keys and values can be no longer than 63
420+
characters (Unicode codepoints) and can only
421+
contain lowercase letters, numeric characters,
422+
underscores, and dashes. International
423+
characters are allowed. Label values are
424+
optional. Label keys must start with a letter.
401425
"""
402426

403427
class BatchInputConfig(proto.Message):
@@ -476,6 +500,11 @@ class BatchOutputConfig(proto.Message):
476500
number=7,
477501
message="ProcessOptions",
478502
)
503+
labels: MutableMapping[str, str] = proto.MapField(
504+
proto.STRING,
505+
proto.STRING,
506+
number=9,
507+
)
479508

480509

481510
class BatchProcessResponse(proto.Message):
@@ -1022,7 +1051,9 @@ class CreateProcessorRequest(proto.Message):
10221051
processor (google.cloud.documentai_v1beta3.types.Processor):
10231052
Required. The processor to be created, requires
10241053
[Processor.type][google.cloud.documentai.v1beta3.Processor.type]
1025-
and [Processor.display_name]][] to be set. Also, the
1054+
and
1055+
[Processor.display_name][google.cloud.documentai.v1beta3.Processor.display_name]
1056+
to be set. Also, the
10261057
[Processor.kms_key_name][google.cloud.documentai.v1beta3.Processor.kms_key_name]
10271058
field must be set if the processor is under CMEK.
10281059
"""
@@ -1217,6 +1248,10 @@ class TrainProcessorVersionRequest(proto.Message):
12171248
[TrainProcessorVersion][google.cloud.documentai.v1beta3.DocumentProcessorService.TrainProcessorVersion]
12181249
method.
12191250
1251+
This message has `oneof`_ fields (mutually exclusive fields).
1252+
For each oneof, at most one member field can be set at the same time.
1253+
Setting any member of the oneof automatically clears all other
1254+
members.
12201255
12211256
.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields
12221257
@@ -1225,6 +1260,11 @@ class TrainProcessorVersionRequest(proto.Message):
12251260
Options to control Custom Document Extraction
12261261
(CDE) Processor.
12271262
1263+
This field is a member of `oneof`_ ``processor_flags``.
1264+
foundation_model_tuning_options (google.cloud.documentai_v1beta3.types.TrainProcessorVersionRequest.FoundationModelTuningOptions):
1265+
Options to control foundation model tuning of
1266+
a processor.
1267+
12281268
This field is a member of `oneof`_ ``processor_flags``.
12291269
parent (str):
12301270
Required. The parent (project, location and processor) to
@@ -1301,12 +1341,43 @@ class TrainingMethod(proto.Enum):
13011341
enum="TrainProcessorVersionRequest.CustomDocumentExtractionOptions.TrainingMethod",
13021342
)
13031343

1344+
class FoundationModelTuningOptions(proto.Message):
1345+
r"""Options to control foundation model tuning of the processor.
1346+
1347+
Attributes:
1348+
train_steps (int):
1349+
Optional. The number of steps to run for
1350+
model tuning. Valid values are between 1 and
1351+
400. If not provided, recommended steps will be
1352+
used.
1353+
learning_rate_multiplier (float):
1354+
Optional. The multiplier to apply to the
1355+
recommended learning rate. Valid values are
1356+
between 0.1 and 10. If not provided, recommended
1357+
learning rate will be used.
1358+
"""
1359+
1360+
train_steps: int = proto.Field(
1361+
proto.INT32,
1362+
number=2,
1363+
)
1364+
learning_rate_multiplier: float = proto.Field(
1365+
proto.FLOAT,
1366+
number=3,
1367+
)
1368+
13041369
custom_document_extraction_options: CustomDocumentExtractionOptions = proto.Field(
13051370
proto.MESSAGE,
13061371
number=5,
13071372
oneof="processor_flags",
13081373
message=CustomDocumentExtractionOptions,
13091374
)
1375+
foundation_model_tuning_options: FoundationModelTuningOptions = proto.Field(
1376+
proto.MESSAGE,
1377+
number=12,
1378+
oneof="processor_flags",
1379+
message=FoundationModelTuningOptions,
1380+
)
13101381
parent: str = proto.Field(
13111382
proto.STRING,
13121383
number=1,

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_schema.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -220,6 +220,8 @@ class Property(proto.Message):
220220
name (str):
221221
The name of the property. Follows the same
222222
guidelines as the EntityType name.
223+
display_name (str):
224+
User defined name for the property.
223225
value_type (str):
224226
A reference to the value type of the property. This type is
225227
subject to the same conventions as the ``Entity.base_types``
@@ -235,14 +237,14 @@ class Property(proto.Message):
235237

236238
class OccurrenceType(proto.Enum):
237239
r"""Types of occurrences of the entity type in the document. This
238-
represents the number of instances of instances of an entity, not
239-
number of mentions of an entity. For example, a bank statement may
240-
only have one ``account_number``, but this account number may be
241-
mentioned in several places on the document. In this case the
242-
'account_number' would be considered a ``REQUIRED_ONCE`` entity
243-
type. If, on the other hand, we expect a bank statement to contain
244-
the status of multiple different accounts for the customers, the
245-
occurrence type will be set to ``REQUIRED_MULTIPLE``.
240+
represents the number of instances, not mentions, of an entity. For
241+
example, a bank statement might only have one ``account_number``,
242+
but this account number can be mentioned in several places on the
243+
document. In this case, the ``account_number`` is considered a
244+
``REQUIRED_ONCE`` entity type. If, on the other hand, we expect a
245+
bank statement to contain the status of multiple different accounts
246+
for the customers, the occurrence type is set to
247+
``REQUIRED_MULTIPLE``.
246248
247249
Values:
248250
OCCURRENCE_TYPE_UNSPECIFIED (0):
@@ -272,6 +274,10 @@ class OccurrenceType(proto.Enum):
272274
proto.STRING,
273275
number=1,
274276
)
277+
display_name: str = proto.Field(
278+
proto.STRING,
279+
number=6,
280+
)
275281
value_type: str = proto.Field(
276282
proto.STRING,
277283
number=2,

packages/google-cloud-documentai/google/cloud/documentai_v1beta3/types/document_service.py

Lines changed: 17 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -71,17 +71,17 @@ class DatasetSplitType(proto.Enum):
7171

7272

7373
class DocumentLabelingState(proto.Enum):
74-
r"""Describes the labelling status of a document.
74+
r"""Describes the labeling status of a document.
7575
7676
Values:
7777
DOCUMENT_LABELING_STATE_UNSPECIFIED (0):
7878
Default value if the enum is not set.
7979
DOCUMENT_LABELED (1):
80-
Document has been labelled.
80+
Document has been labeled.
8181
DOCUMENT_UNLABELED (2):
82-
Document has not been labelled.
82+
Document has not been labeled.
8383
DOCUMENT_AUTO_LABELED (3):
84-
Document has been auto-labelled.
84+
Document has been auto-labeled.
8585
"""
8686
DOCUMENT_LABELING_STATE_UNSPECIFIED = 0
8787
DOCUMENT_LABELED = 1
@@ -417,9 +417,9 @@ class ListDocumentsRequest(proto.Message):
417417
- String match is case sensitive (for filter
418418
``DisplayName`` & ``EntityType``).
419419
return_total_size (bool):
420-
Optional. Controls if the ListDocuments request requires a
421-
total size of matched documents. See
422-
ListDocumentsResponse.total_size.
420+
Optional. Controls if the request requires a total size of
421+
matched documents. See
422+
[ListDocumentsResponse.total_size][google.cloud.documentai.v1beta3.ListDocumentsResponse.total_size].
423423
424424
Enabling this flag may adversely impact performance.
425425
@@ -428,11 +428,13 @@ class ListDocumentsRequest(proto.Message):
428428
Optional. Number of results to skip beginning from the
429429
``page_token`` if provided.
430430
https://google.aip.dev/158#skipping-results. It must be a
431-
non-negative integer. Negative values wil be rejected. Note
431+
non-negative integer. Negative values will be rejected. Note
432432
that this is not the number of pages to skip. If this value
433433
causes the cursor to move past the end of results,
434-
``ListDocumentsResponse.document_metadata`` and
435-
``ListDocumentsResponse.next_page_token`` will be empty.
434+
[ListDocumentsResponse.document_metadata][google.cloud.documentai.v1beta3.ListDocumentsResponse.document_metadata]
435+
and
436+
[ListDocumentsResponse.next_page_token][google.cloud.documentai.v1beta3.ListDocumentsResponse.next_page_token]
437+
will be empty.
436438
"""
437439

438440
dataset: str = proto.Field(
@@ -469,9 +471,10 @@ class ListDocumentsResponse(proto.Message):
469471
Document metadata corresponding to the listed
470472
documents.
471473
next_page_token (str):
472-
A token, which can be sent as ``page_token`` to retrieve the
473-
next page. If this field is omitted, there are no subsequent
474-
pages.
474+
A token, which can be sent as
475+
[ListDocumentsRequest.page_token][google.cloud.documentai.v1beta3.ListDocumentsRequest.page_token]
476+
to retrieve the next page. If this field is omitted, there
477+
are no subsequent pages.
475478
total_size (int):
476479
Total count of documents queried.
477480
"""
@@ -671,7 +674,7 @@ class DocumentMetadata(proto.Message):
671674
Type of the dataset split to which the
672675
document belongs.
673676
labeling_state (google.cloud.documentai_v1beta3.types.DocumentLabelingState):
674-
Labelling state of the document.
677+
Labeling state of the document.
675678
display_name (str):
676679
The display name of the document.
677680
"""

packages/google-cloud-documentai/scripts/fixup_documentai_v1beta3_keywords.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ class documentaiCallTransformer(cst.CSTTransformer):
4040
CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
4141
METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
4242
'batch_delete_documents': ('dataset', 'dataset_documents', ),
43-
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', 'process_options', ),
43+
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', 'process_options', 'labels', ),
4444
'create_processor': ('parent', 'processor', ),
4545
'delete_processor': ('name', ),
4646
'delete_processor_version': ('name', ),
@@ -62,10 +62,10 @@ class documentaiCallTransformer(cst.CSTTransformer):
6262
'list_processors': ('parent', 'page_size', 'page_token', ),
6363
'list_processor_types': ('parent', 'page_size', 'page_token', ),
6464
'list_processor_versions': ('parent', 'page_size', 'page_token', ),
65-
'process_document': ('name', 'inline_document', 'raw_document', 'gcs_document', 'document', 'skip_human_review', 'field_mask', 'process_options', ),
65+
'process_document': ('name', 'inline_document', 'raw_document', 'gcs_document', 'document', 'skip_human_review', 'field_mask', 'process_options', 'labels', ),
6666
'review_document': ('human_review_config', 'inline_document', 'document', 'enable_schema_validation', 'priority', 'document_schema', ),
6767
'set_default_processor_version': ('processor', 'default_processor_version', ),
68-
'train_processor_version': ('parent', 'processor_version', 'custom_document_extraction_options', 'document_schema', 'input_data', 'base_processor_version', ),
68+
'train_processor_version': ('parent', 'processor_version', 'custom_document_extraction_options', 'foundation_model_tuning_options', 'document_schema', 'input_data', 'base_processor_version', ),
6969
'undeploy_processor_version': ('name', ),
7070
'update_dataset': ('dataset', 'update_mask', ),
7171
'update_dataset_schema': ('dataset_schema', 'update_mask', ),

packages/google-cloud-documentai/tests/unit/gapic/documentai_v1beta3/test_document_service.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4897,6 +4897,7 @@ def test_update_dataset_schema_rest(request_type):
48974897
"properties": [
48984898
{
48994899
"name": "name_value",
4900+
"display_name": "display_name_value",
49004901
"value_type": "value_type_value",
49014902
"occurrence_type": 1,
49024903
"property_metadata": {

0 commit comments

Comments
 (0)