Closed holtskinner closed 6 months ago
Error Message appeared in Tests before document.py
commit was applied.
=================================== FAILURES ===================================
_______ test_quickstart_sample_batch_process_metadata_matching_prefixes ________
capsys = <_pytest.capture.CaptureFixture object at 0x7f3543707c50>
def test_quickstart_sample_batch_process_metadata_matching_prefixes(
capsys: pytest.CaptureFixture,
) -> None:
batch_process_metadata = documentai.BatchProcessMetadata(
state=documentai.BatchProcessMetadata.State.SUCCEEDED,
individual_process_statuses=[
documentai.BatchProcessMetadata.IndividualProcessStatus(
input_gcs_source="gs://test-directory/documentai/input.pdf",
output_gcs_destination="gs://documentai_toolbox_samples/output/matching-prefixes/1",
),
documentai.BatchProcessMetadata.IndividualProcessStatus(
input_gcs_source="gs://test-directory/documentai/input.pdf",
output_gcs_destination="gs://documentai_toolbox_samples/output/matching-prefixes/11",
),
],
)
wrapped_document = quickstart_sample.quickstart_sample(
> batch_process_metadata=batch_process_metadata
)
test_quickstart_sample.py:116:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
quickstart_sample.py:80: in quickstart_sample
metadata=batch_process_metadata
../../google/cloud/documentai_toolbox/wrappers/document.py:581: in from_batch_process_metadata
for process in list(metadata.individual_process_statuses)
../../google/cloud/documentai_toolbox/wrappers/document.py:581: in
for process in list(metadata.individual_process_statuses)
../../google/cloud/documentai_toolbox/wrappers/document.py:507: in from_gcs
shards = _get_shards(gcs_bucket_name=gcs_bucket_name, gcs_prefix=gcs_prefix)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
gcs_bucket_name = 'documentai_toolbox_samples'
gcs_prefix = 'output/matching-prefixes/1'
def _get_shards(gcs_bucket_name: str, gcs_prefix: str) -> List[documentai.Document]:
r"""Returns a list of `documentai.Document` shards from a Cloud Storage folder.
Args:
gcs_bucket_name (str):
Required. The name of the gcs bucket.
Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_bucket_name=`bucket`.
gcs_prefix (str):
Required. The prefix of the json files in the target_folder.
Format: `gs://{bucket_name}/{optional_folder}/{target_folder}/` where gcs_prefix=`{optional_folder}/{target_folder}`.
Returns:
List[google.cloud.documentai.Document]:
A list of documentai.Documents.
"""
file_check = re.match(constants.FILE_CHECK_REGEX, gcs_prefix)
if file_check is not None:
raise ValueError("gcs_prefix cannot contain file types")
byte_array = gcs_utilities.get_bytes(gcs_bucket_name, gcs_prefix)
shards = [
documentai.Document.from_json(byte, ignore_unknown_fields=True)
for byte in byte_array
]
if not shards:
raise ValueError("Incomplete Document - No JSON files found.")
total_shards = len(shards)
if total_shards > 1:
shards.sort(key=lambda x: int(x.shard_info.shard_index))
for shard in shards:
if int(shard.shard_info.shard_count) != total_shards:
raise ValueError(
> f"Invalid Document - shardInfo.shardCount ({shard.shard_info.shard_count}) does not match number of shards ({total_shards})."
)
E ValueError: Invalid Document - shardInfo.shardCount (1) does not match number of shards (6).
../../google/cloud/documentai_toolbox/wrappers/document.py:134: ValueError
-------- generated xml file: /workspace/samples/snippets/sponge_log.xml --------
Fixes #271 🦕