Describe
Model I am using (LayoutLM ):
i use the example code:
from transformers import AutoProcessor, LayoutLMv2Model, set_seed
from PIL import Image
import torch
from datasets import load_dataset
set_seed(0)
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased")
model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
Describe Model I am using (LayoutLM ): i use the example code: from transformers import AutoProcessor, LayoutLMv2Model, set_seed from PIL import Image import torch from datasets import load_dataset
set_seed(0)
processor = AutoProcessor.from_pretrained("microsoft/layoutlmv2-base-uncased") model = LayoutLMv2Model.from_pretrained("microsoft/layoutlmv2-base-uncased")
dataset = load_dataset("hf-internal-testing/fixtures_docvqa") image_path = dataset["test"][0]["file"] image = Image.open(image_path).convert("RGB") encoding = processor(image, return_tensors="pt") outputs = model(**encoding) last_hidden_states = outputs.last_hidden_state
input_ids.shape: torch.Size([1, 293]) last_hidden_states.shape:torch.Size([1, 342, 768])
why 293!=342