materialsproject / api

New API client for the Materials Project
https://materialsproject.github.io/api/
Other
113 stars 41 forks source link

mpr.tasks.get_data_by_id show errors #624

Open simonnier opened 2 years ago

simonnier commented 2 years ago

It just happened today that mpr.tasks.get_data_by_id will pop up errors, and no result returned. Please take a look at it. Thank you.

---------------------------------------------------------------------------
ValidationError                           Traceback (most recent call last)
Input In [14], in <cell line: 1>()
----> 1 mpr.tasks.get_data_by_id(mp_id)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:772, in BaseRester.get_data_by_id(self, document_id, fields)
    769 results = []  # type: List
    771 try:
--> 772     results = self._query_resource_data(
    773         criteria=criteria,
    774         fields=fields,
    775         suburl=document_id,  # type: ignore
    776     )
    777 except MPRestError:
    779     if self.primary_key == "material_id":
    780         # see if the material_id has changed, perhaps a task_id was supplied
    781         # this should likely be re-thought

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:730, in BaseRester._query_resource_data(self, criteria, fields, suburl, use_document_model)
    709 def _query_resource_data(
    710     self,
    711     criteria: Optional[Dict] = None,
   (...)
    714     use_document_model: Optional[bool] = None,
    715 ) -> Union[List[T], List[Dict]]:
    716     """
    717     Query the endpoint for a list of documents without associated meta information. Only
    718     returns a single page of results.
   (...)
    727         A list of documents
    728     """
--> 730     return self._query_resource(  # type: ignore
    731         criteria=criteria,
    732         fields=fields,
    733         suburl=suburl,
    734         use_document_model=use_document_model,
    735         chunk_size=1000,
    736         num_chunks=1,
    737     ).get("data")

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:288, in BaseRester._query_resource(self, criteria, fields, suburl, use_document_model, parallel_param, num_chunks, chunk_size)
    285         if not url.endswith("/"):
    286             url += "/"
--> 288     data = self._submit_requests(
    289         url=url,
    290         criteria=criteria,
    291         use_document_model=use_document_model,
    292         parallel_param=parallel_param,
    293         num_chunks=num_chunks,
    294         chunk_size=chunk_size,
    295     )
    297     return data
    299 except RequestException as ex:

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:387, in BaseRester._submit_requests(self, url, criteria, use_document_model, parallel_param, num_chunks, chunk_size)
    383 remaining_docs_avail = {}
    385 initial_params_list = [{"url": url, "verify": True, "params": copy(crit)} for crit in new_criteria]
--> 387 initial_data_tuples = self._multi_thread(use_document_model, initial_params_list)
    389 for data, subtotal, crit_ind in initial_data_tuples:
    391     subtotals.append(subtotal)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:592, in BaseRester._multi_thread(self, use_document_model, params_list, progress_bar)
    588 finished, futures = wait(futures, return_when=FIRST_COMPLETED)
    590 for future in finished:
--> 592     data, subtotal = future.result()
    594     if progress_bar is not None:
    595         progress_bar.update(len(data["data"]))

File ~\AppData\Local\Programs\Python\Python310\lib\concurrent\futures\_base.py:439, in Future.result(self, timeout)
    437     raise CancelledError()
    438 elif self._state == FINISHED:
--> 439     return self.__get_result()
    441 self._condition.wait(timeout)
    443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~\AppData\Local\Programs\Python\Python310\lib\concurrent\futures\_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

File ~\AppData\Local\Programs\Python\Python310\lib\concurrent\futures\thread.py:58, in _WorkItem.run(self)
     55     return
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:
     60     self.future.set_exception(exc)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:641, in BaseRester._submit_request_and_process(self, url, verify, params, use_document_model)
    638 # other sub-urls may use different document models
    639 # the client does not handle this in a particularly smart way currently
    640 if self.document_model and use_document_model:
--> 641     raw_doc_list = [self.document_model.parse_obj(d) for d in data["data"]]  # type: ignore
    643     # Temporarily removed until user-testing completed
    644     # data["data"] = self._generate_returned_model(raw_doc_list)
    646     data["data"] = raw_doc_list

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\mp_api\core\client.py:641, in <listcomp>(.0)
    638 # other sub-urls may use different document models
    639 # the client does not handle this in a particularly smart way currently
    640 if self.document_model and use_document_model:
--> 641     raw_doc_list = [self.document_model.parse_obj(d) for d in data["data"]]  # type: ignore
    643     # Temporarily removed until user-testing completed
    644     # data["data"] = self._generate_returned_model(raw_doc_list)
    646     data["data"] = raw_doc_list

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pydantic\main.py:578, in BaseModel.parse_obj(cls, obj)
    576         exc = TypeError(f'{cls.__name__} expected dict not {obj.__class__.__name__}')
    577         raise ValidationError([ErrorWrapper(exc, loc=ROOT_KEY)], cls) from e
--> 578 return cls(**obj)

File ~\AppData\Local\Programs\Python\Python310\lib\site-packages\pydantic\main.py:406, in BaseModel.__init__(__pydantic_self__, **data)
    404 values, fields_set, validation_error = validate_model(__pydantic_self__.__class__, data)
    405 if validation_error:
--> 406     raise validation_error
    407 try:
    408     object_setattr(__pydantic_self__, '__dict__', values)

ValidationError: 2 validation errors for TaskDoc
custodian -> 0 -> job
  value is not a valid dict (type=type_error.dict)
custodian -> 1 -> job
  value is not a valid dict (type=type_error.dict)
pabigail commented 2 years ago

I am also having issues with mpr.tasks.get_data_by_id, where it seems to be calling a deprecated module (pymatgen.io.vaspio_set):

`ModuleNotFoundError                       Traceback (most recent call last)
Input In [3], in <cell line: 1>()
      1 with MPRester(MAPI_KEY) as mpr:
----> 2     task = mpr.tasks.get_data_by_id(TASK_ID)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:772, in BaseRester.get_data_by_id(self, document_id, fields)
    769 results = []  # type: List
    771 try:
--> 772     results = self._query_resource_data(
    773         criteria=criteria,
    774         fields=fields,
    775         suburl=document_id,  # type: ignore
    776     )
    777 except MPRestError:
    779     if self.primary_key == "material_id":
    780         # see if the material_id has changed, perhaps a task_id was supplied
    781         # this should likely be re-thought

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:730, in BaseRester._query_resource_data(self, criteria, fields, suburl, use_document_model)
    709 def _query_resource_data(
    710     self,
    711     criteria: Optional[Dict] = None,
   (...)
    714     use_document_model: Optional[bool] = None,
    715 ) -> Union[List[T], List[Dict]]:
    716     """
    717     Query the endpoint for a list of documents without associated meta information. Only
    718     returns a single page of results.
   (...)
    727         A list of documents
    728     """
--> 730     return self._query_resource(  # type: ignore
    731         criteria=criteria,
    732         fields=fields,
    733         suburl=suburl,
    734         use_document_model=use_document_model,
    735         chunk_size=1000,
    736         num_chunks=1,
    737     ).get("data")

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:288, in BaseRester._query_resource(self, criteria, fields, suburl, use_document_model, parallel_param, num_chunks, chunk_size)
    285         if not url.endswith("/"):
    286             url += "/"
--> 288     data = self._submit_requests(
    289         url=url,
    290         criteria=criteria,
    291         use_document_model=use_document_model,
    292         parallel_param=parallel_param,
    293         num_chunks=num_chunks,
    294         chunk_size=chunk_size,
    295     )
    297     return data
    299 except RequestException as ex:

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:387, in BaseRester._submit_requests(self, url, criteria, use_document_model, parallel_param, num_chunks, chunk_size)
    383 remaining_docs_avail = {}
    385 initial_params_list = [{"url": url, "verify": True, "params": copy(crit)} for crit in new_criteria]
--> 387 initial_data_tuples = self._multi_thread(use_document_model, initial_params_list)
    389 for data, subtotal, crit_ind in initial_data_tuples:
    391     subtotals.append(subtotal)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:592, in BaseRester._multi_thread(self, use_document_model, params_list, progress_bar)
    588 finished, futures = wait(futures, return_when=FIRST_COMPLETED)
    590 for future in finished:
--> 592     data, subtotal = future.result()
    594     if progress_bar is not None:
    595         progress_bar.update(len(data["data"]))

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/concurrent/futures/_base.py:439, in Future.result(self, timeout)
    437     raise CancelledError()
    438 elif self._state == FINISHED:
--> 439     return self.__get_result()
    441 self._condition.wait(timeout)
    443 if self._state in [CANCELLED, CANCELLED_AND_NOTIFIED]:

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/concurrent/futures/_base.py:391, in Future.__get_result(self)
    389 if self._exception:
    390     try:
--> 391         raise self._exception
    392     finally:
    393         # Break a reference cycle with the exception in self._exception
    394         self = None

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/concurrent/futures/thread.py:58, in _WorkItem.run(self)
     55     return
     57 try:
---> 58     result = self.fn(*self.args, **self.kwargs)
     59 except BaseException as exc:
     60     self.future.set_exception(exc)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/mp_api/core/client.py:634, in BaseRester._submit_request_and_process(self, url, verify, params, use_document_model)
    631 if response.status_code == 200:
    633     if self.monty_decode:
--> 634         data = json.loads(response.text, cls=MontyDecoder)
    635     else:
    636         data = json.loads(response.text)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/json/__init__.py:359, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    357 if parse_constant is not None:
    358     kw['parse_constant'] = parse_constant
--> 359 return cls(**kw).decode(s)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:454, in MontyDecoder.decode(self, s)
    452 else:
    453     d = json.loads(s)
--> 454 return self.process_decoded(d)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in MontyDecoder.process_decoded(self, d)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in <dictcomp>(.0)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:436, in MontyDecoder.process_decoded(self, d)
    433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
--> 436     return [self.process_decoded(x) for x in d]
    438 return d

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:436, in <listcomp>(.0)
    433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
--> 436     return [self.process_decoded(x) for x in d]
    438 return d

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in MontyDecoder.process_decoded(self, d)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in <dictcomp>(.0)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:436, in MontyDecoder.process_decoded(self, d)
    433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
--> 436     return [self.process_decoded(x) for x in d]
    438 return d

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:436, in <listcomp>(.0)
    433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
--> 436     return [self.process_decoded(x) for x in d]
    438 return d

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in MontyDecoder.process_decoded(self, d)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:433, in <dictcomp>(.0)
    430         elif (bson is not None) and modname == "bson.objectid" and classname == "ObjectId":
    431             return bson.objectid.ObjectId(d["oid"])
--> 433     return {self.process_decoded(k): self.process_decoded(v) for k, v in d.items()}
    435 if isinstance(d, list):
    436     return [self.process_decoded(x) for x in d]

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:413, in MontyDecoder.process_decoded(self, d)
    411 data = {k: v for k, v in d.items() if not k.startswith("@")}
    412 if hasattr(cls_, "from_dict"):
--> 413     return cls_.from_dict(data)
    414 if pydantic is not None and issubclass(cls_, pydantic.BaseModel):
    415     return cls_(**data)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:179, in MSONable.from_dict(cls, d)
    173 @classmethod
    174 def from_dict(cls, d):
    175     """
    176     :param d: Dict representation.
    177     :return: MSONable class.
    178     """
--> 179     decoded = {k: MontyDecoder().process_decoded(v) for k, v in d.items() if not k.startswith("@")}
    180     return cls(**decoded)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:179, in <dictcomp>(.0)
    173 @classmethod
    174 def from_dict(cls, d):
    175     """
    176     :param d: Dict representation.
    177     :return: MSONable class.
    178     """
--> 179     decoded = {k: MontyDecoder().process_decoded(v) for k, v in d.items() if not k.startswith("@")}
    180     return cls(**decoded)

File ~/opt/anaconda3/envs/branch_spline/lib/python3.9/site-packages/monty/json.py:408, in MontyDecoder.process_decoded(self, d)
    405 if modname == "uuid" and classname == "UUID":
    406     return UUID(d["string"])
--> 408 mod = __import__(modname, globals(), locals(), [classname], 0)
    409 if hasattr(mod, classname):
    410     cls_ = getattr(mod, classname)

ModuleNotFoundError: No module named 'pymatgen.io.vaspio_set'
munrojm commented 2 years ago

Thank you, this is something I will work on fixing. For now, if you specify the data you need specifically from the task document in fields, it should only project that out and you shouldn't run into these errors.