explosion / spacy-llm

🦙 Integrating LLMs into structured NLP pipelines
https://spacy.io/usage/large-language-models
MIT License
1.02k stars 81 forks source link

'<' not supported between instances of 'str' and 'int' #411

Open BaptisteLoquette opened 6 months ago

BaptisteLoquette commented 6 months ago

Hello, when nlp = assemble("config.cfg"), I get the following Traceback, I use the dolly config.cfg example from here : https://spacy.io/usage/large-language-models#usage. Thanks in advance !


File [/usr/local/lib/python3.9/dist-packages/spacy_llm/util.py:48](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy_llm/util.py:48), in assemble(config_path, overrides)
     46 config_path = Path(config_path)
     47 config = load_config(config_path, overrides=overrides, interpolate=False)
---> 48 return assemble_from_config(config)

File [/usr/local/lib/python3.9/dist-packages/spacy_llm/util.py:28](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy_llm/util.py:28), in assemble_from_config(config)
     22 def assemble_from_config(config: Config) -> Language:
     23     """Assemble a spaCy pipeline from a confection Config object.
     24 
     25     config (Config): Config to load spaCy pipeline from.
     26     RETURNS (Language): An initialized spaCy pipeline.
     27     """
---> 28     nlp = load_model_from_config(config, auto_fill=True)
     29     config = config.interpolate()
     30     sourced = get_sourced_components(config)

File [/usr/local/lib/python3.9/dist-packages/spacy/util.py:587](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy/util.py:587), in load_model_from_config(config, meta, vocab, disable, enable, exclude, auto_fill, validate)
    584 # This will automatically handle all codes registered via the languages
    585 # registry, including custom subclasses provided via entry points
    586 lang_cls = get_lang_class(nlp_config["lang"])
--> 587 nlp = lang_cls.from_config(
    588     config,
    589     vocab=vocab,
    590     disable=disable,
    591     enable=enable,
    592     exclude=exclude,
    593     auto_fill=auto_fill,
    594     validate=validate,
    595     meta=meta,
    596 )
    597 return nlp

File [/usr/local/lib/python3.9/dist-packages/spacy/language.py:1864](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy/language.py:1864), in Language.from_config(cls, config, vocab, disable, enable, exclude, meta, auto_fill, validate)
   1861     factory = pipe_cfg.pop("factory")
   1862     # The pipe name (key in the config) here is the unique name
   1863     # of the component, not necessarily the factory
-> 1864     nlp.add_pipe(
   1865         factory,
   1866         name=pipe_name,
   1867         config=pipe_cfg,
   1868         validate=validate,
   1869         raw_config=raw_config,
   1870     )
   1871 else:
   1872     assert "source" in pipe_cfg

File [/usr/local/lib/python3.9/dist-packages/spacy/language.py:821](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy/language.py:821), in Language.add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
    817     pipe_component, factory_name = self.create_pipe_from_source(
    818         factory_name, source, name=name
    819     )
    820 else:
--> 821     pipe_component = self.create_pipe(
    822         factory_name,
    823         name=name,
    824         config=config,
    825         raw_config=raw_config,
    826         validate=validate,
    827     )
    828 pipe_index = self._get_pipe_index(before, after, first, last)
    829 self._pipe_meta[name] = self.get_factory_meta(factory_name)

File [/usr/local/lib/python3.9/dist-packages/spacy/language.py:709](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy/language.py:709), in Language.create_pipe(self, factory_name, name, config, raw_config, validate)
    706 cfg = {factory_name: config}
    707 # We're calling the internal _fill here to avoid constructing the
    708 # registered functions twice
--> 709 resolved = registry.resolve(cfg, validate=validate)
    710 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
    711 filled = Config(filled)

File [/usr/local/lib/python3.9/dist-packages/confection/__init__.py:759](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/confection/__init__.py:759), in registry.resolve(cls, config, schema, overrides, validate)
    750 @classmethod
    751 def resolve(
    752     cls,
   (...)
    757     validate: bool = True,
    758 ) -> Dict[str, Any]:
--> 759     resolved, _ = cls._make(
    760         config, schema=schema, overrides=overrides, validate=validate, resolve=True
    761     )
    762     return resolved

File [/usr/local/lib/python3.9/dist-packages/confection/__init__.py:808](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/confection/__init__.py:808), in registry._make(cls, config, schema, overrides, resolve, validate)
    806 if not is_interpolated:
    807     config = Config(orig_config).interpolate()
--> 808 filled, _, resolved = cls._fill(
    809     config, schema, validate=validate, overrides=overrides, resolve=resolve
    810 )
    811 filled = Config(filled, section_order=section_order)
    812 # Check that overrides didn't include invalid properties not in config

File [/usr/local/lib/python3.9/dist-packages/confection/__init__.py:863](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/confection/__init__.py:863), in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
    861     schema.__fields__[key] = copy_model_field(field, Any)
    862 promise_schema = cls.make_promise_schema(value, resolve=resolve)
--> 863 filled[key], validation[v_key], final[key] = cls._fill(
    864     value,
    865     promise_schema,
    866     validate=validate,
    867     resolve=resolve,
    868     parent=key_parent,
    869     overrides=overrides,
    870 )
    871 reg_name, func_name = cls.get_constructor(final[key])
    872 args, kwargs = cls.parse_args(final[key])

File [/usr/local/lib/python3.9/dist-packages/confection/__init__.py:880](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/confection/__init__.py:880), in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
    877     getter = cls.get(reg_name, func_name)
    878     # We don't want to try/except this and raise our own error
    879     # here, because we want the traceback if the function fails.
--> 880     getter_result = getter(*args, **kwargs)
    881 else:
    882     # We're not resolving and calling the function, so replace
    883     # the getter_result with a Promise class
    884     getter_result = Promise(
    885         registry=reg_name, name=func_name, args=args, kwargs=kwargs
    886     )

File [/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/dolly.py:63](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/dolly.py:63), in dolly_hf(name, config_init, config_run)
     50 @registry.llm_models("spacy.Dolly.v1")
     51 def dolly_hf(
     52     name: Dolly.MODEL_NAMES,
     53     config_init: Optional[Dict[str, Any]] = SimpleFrozenDict(),
     54     config_run: Optional[Dict[str, Any]] = SimpleFrozenDict(),
     55 ) -> Callable[[Iterable[str]], Iterable[str]]:
     56     """Generates Dolly instance that can execute a set of prompts and return the raw responses.
     57     name (Literal): Name of the Dolly model. Has to be one of Dolly.get_model_names().
     58     config_init (Optional[Dict[str, Any]]): HF config for initializing the model.
   (...)
     61         the raw responses.
     62     """
---> 63     return Dolly(name=name, config_init=config_init, config_run=config_run)

File [/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/base.py:39](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/base.py:39), in HuggingFace.__init__(self, name, config_init, config_run)
     37 HuggingFace.check_installation()
     38 self._check_model()
---> 39 self._model = self.init_model()

File [/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/dolly.py:17](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/spacy_llm/models/hf/dolly.py:17), in Dolly.init_model(self)
     13 def init_model(self) -> Any:
     14     """Sets up HF model and needed utilities.
     15     RETURNS (Any): HF model.
     16     """
---> 17     return transformers.pipeline(
     18         model=self._name, return_full_text=False, **self._config_init
     19     )

File [/usr/local/lib/python3.9/dist-packages/transformers/pipelines/__init__.py:767](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/transformers/pipelines/__init__.py:767), in pipeline(task, model, config, tokenizer, feature_extractor, framework, revision, use_fast, use_auth_token, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)
    764 if feature_extractor is not None:
    765     kwargs["feature_extractor"] = feature_extractor
--> 767 return pipeline_class(model=model, framework=framework, task=task, **kwargs)

File [~/.cache/huggingface/modules/transformers_modules/databricks/dolly-v2-3b/f6c9be08f16fe4d3a719bee0a4a7c7415b5c65df/instruct_pipeline.py:74](https://file+.vscode-resource.vscode-cdn.net/Users/user/Documents/lili/nlp-bricks/synthetic_data_annotation/~/.cache/huggingface/modules/transformers_modules/databricks/dolly-v2-3b/f6c9be08f16fe4d3a719bee0a4a7c7415b5c65df/instruct_pipeline.py:74), in InstructionTextGenerationPipeline.__init__(self, do_sample, max_new_tokens, top_p, top_k, *args, **kwargs)
     61 def __init__(
     62     self, *args, do_sample: bool = True, max_new_tokens: int = 256, top_p: float = 0.92, top_k: int = 0, **kwargs
     63 ):
     64     """Initialize the pipeline
     65 
     66     Args:
   (...)
     72             Defaults to 0.
     73     """
---> 74     super().__init__(*args, do_sample=do_sample, max_new_tokens=max_new_tokens, top_p=top_p, top_k=top_k,
     75                      **kwargs)

File [/usr/local/lib/python3.9/dist-packages/transformers/pipelines/base.py:768](https://file+.vscode-resource.vscode-cdn.net/usr/local/lib/python3.9/dist-packages/transformers/pipelines/base.py:768), in Pipeline.__init__(self, model, tokenizer, feature_extractor, modelcard, framework, task, args_parser, device, binary_output, **kwargs)
    766     self.device = device
    767 else:
--> 768     self.device = device if framework == "tf" else torch.device("cpu" if device < 0 else f"cuda:{device}")
    769 self.binary_output = binary_output
    771 # Special handling

TypeError: '<' not supported between instances of 'str' and 'int'
svlandeg commented 6 months ago

Hi!

Can you provide a self-contained code snippet that shows the error? I can't reproduce this.

For me, the original code as documented on the link you shared, works:

from spacy_llm.util import assemble
nlp = assemble(config_file_path)
doc = nlp("You look gorgeous!")
print(doc.cats)

And the following works, too (attempting to replicate your code):

from spacy.util import load_config
from spacy.lang.en import English

config_path = Path(config_file_path)
config = load_config(config_path, interpolate=False)
nlp = English.from_config(config)
doc = nlp("You look gorgeous!")
print(doc.cats)

Which leads me to believe that in fact you're passing something invalid in your overrrides - more specifically it looks like you're passing the device as a string instead of an integer.

tombedor commented 2 months ago

Hi, I am getting this same issue. My goal is to get a basic setup running on Paperspace rented machine, the gist of my notebook is here:

https://gist.github.com/tombedor/1e988e238960a3c412304475c9bb96e8

tombedor commented 2 months ago

Using an image with Python 3.11 fixed the problem