aphp / edsnlp

Modular, fast NLP framework, compatible with Pytorch and spaCy, offering tailored support for French clinical notes.
https://aphp.github.io/edsnlp/
BSD 3-Clause "New" or "Revised" License
111 stars 29 forks source link

Drop codecov #292

Closed percevalw closed 3 months ago

percevalw commented 3 months ago

Checklist

github-actions[bot] commented 3 months ago

Coverage Report

NameStmtsMissCover
edsnlp/connectors/omop.py

Missing coverage at line 69

         if not isinstance(row.ents, list):
-             continue
Missing coverage at line 87
             else:
-                 doc.spans[span.label_].append(span)
Missing coverage at line 127
     if df.note_id.isna().any():
-         df["note_id"] = range(len(df))
Missing coverage at line 171
         if i > 0:
-             df.term_modifiers += ";"
         df.term_modifiers += ext + "=" + df[ext].astype(str)

84495%
edsnlp/core/lazy_collection.py

Missing coverage at line 433

         for name, pipe, *_ in self.torch_components():
-             pipe.to(device)
         return self

146199%
edsnlp/core/pipeline.py

Missing coverage at line 533

             if name in exclude:
-                 continue
             if name not in components:

358199%
edsnlp/core/registries.py

Missing coverage at line 73

             if obj.error is not None:
-                 raise obj.error

157199%
edsnlp/core/torch_component.py

Missing coverage at line 42

     elif not isinstance(batch, dict):
-         return id(batch)
     if "__batch_hash__" in batch:
Missing coverage at line 82
         if cache_key in cache:
-             return cache[cache_key]
         res = fn(self, doc)
Missing coverage at line 397
             if hasattr(self, "compiled"):
-                 res = self.compiled(batch)
             else:
Missing coverage at line 473
         """
-         return self.batch_process([doc])[0]

200498%
edsnlp/data/base.py

Missing coverage at lines 174-180

     """
-     data = LazyCollection.ensure_lazy(data)
-     if converter:
-         converter, kwargs = get_doc2dict_converter(converter, kwargs)
-         data = data.map(converter, kwargs=kwargs)
- 
-     return data

39587%
edsnlp/data/converters.py

Missing coverage at line 654

     if isinstance(converter, type) or kwargs_to_init:
-         return converter(**kwargs), {}
     return converter, validate_kwargs(converter, kwargs)

191199%
edsnlp/data/json.py

Missing coverage at line 96

                     if not is_jsonl:
-                         obj[FILENAME] = filename
                     results.append(obj)
Missing coverage at line 98
                     results.append(obj)
-             except Exception:
                 raise Exception(f"Cannot parse {filename}")

107298%
edsnlp/data/polars.py

Missing coverage at line 26

         if hasattr(data, "collect"):
-             data = data.collect()
         assert isinstance(data, pl.DataFrame)

44198%
edsnlp/data/standoff.py

Missing coverage at line 43

     def __init__(self, ann_file, line):
-         super().__init__(f"File {ann_file}, unrecognized Brat line {line}")
Missing coverage at line 83
     if not len(ann_paths):
-         return {
             "text": text,
Missing coverage at line 197
                         )
-                 except Exception:
                     raise Exception(

172398%
edsnlp/extensions.py

Missing coverage at line 13

     elif isinstance(dt, pendulum.DateTime):
-         pass
     elif isinstance(dt, str):

24196%
edsnlp/language.py

Missing coverage at line 103

             if last != begin:
-                 logger.warning(
                     "Missed some characters during"

51198%
edsnlp/matchers/simstring.py

Missing coverage at line 280

     if custom:
-         attr = attr[1:].lower()
Missing coverage at line 295
             if custom:
-                 token_text = getattr(token._, attr)
             else:

146299%
edsnlp/optimization.py

Missing coverage at line 32

     def param_groups(self, value):
-         self.optim.param_groups = value
Missing coverage at line 36
     def state(self):
-         return self.optim.state
Missing coverage at line 40
     def state(self, value):
-         self.optim.state = value
Missing coverage at line 89
     def __init__(self, groups):
-         self.param_groups = groups

77495%
edsnlp/patch_spacy.py

Missing coverage at lines 67-69

             # if module is reloaded.
-             existing_func = registry.factories.get(internal_name)
-             if not util.is_same_func(factory_func, existing_func):
                 raise ValueError(

31294%
edsnlp/pipes/core/contextual_matcher/contextual_matcher.py

Missing coverage at line 94

             )
-             label = label_name
         if label is None:
Missing coverage at line 343
                 if assigned is None:
-                     continue
                 if replace_entity:

143299%
edsnlp/pipes/core/contextual_matcher/models.py

Missing coverage at lines 19-23

     if isinstance(v, list):
-         assert (
-             len(v) == 2
-         ), "`window` should be a tuple/list of two integer, or a single integer"
-         v = tuple(v)
     if isinstance(v, int):

115298%
edsnlp/pipes/core/endlines/endlines.py

Missing coverage at lines 151-155

         if end_lines_model is None:
-             path = build_path(__file__, "base_model.pkl")
- 
-             with open(path, "rb") as inp:
-                 self.model = pickle.load(inp)
         elif isinstance(end_lines_model, str):
Missing coverage at lines 158-160
                 self.model = pickle.load(inp)
-         elif isinstance(end_lines_model, EndLinesModel):
-             self.model = end_lines_model
         else:
Missing coverage at line 191
         ):
-             return "ENUMERATION"
Missing coverage at line 278
         if np.isnan(sigma):
-             sigma = 1

87792%
edsnlp/pipes/core/normalizer/__init__.py

Missing coverage at line 7

 def excluded_or_space_getter(t):
-     return t.is_space or t.tag_ == "EXCLUDED"

5180%
edsnlp/pipes/misc/consultation_dates/consultation_dates.py

Missing coverage at line 131

         else:
-             self.date_matcher = None
Missing coverage at line 134
         if not consultation_mention:
-             consultation_mention = []
         elif consultation_mention is True:

48296%
edsnlp/pipes/misc/dates/dates.py

Missing coverage at line 222

         if isinstance(absolute, str):
-             absolute = [absolute]
         if isinstance(relative, str):
Missing coverage at line 224
         if isinstance(relative, str):
-             relative = [relative]
         if isinstance(duration, str):
Missing coverage at line 226
         if isinstance(duration, str):
-             relative = [duration]
         if isinstance(false_positive, str):
Missing coverage at lines 327-336
             if self.merge_mode == "align":
-                 alignments = align_spans(matches, spans, sort_by_overlap=True)
-                 matches = []
-                 for span, aligned in zip(spans, alignments):
-                     if len(aligned):
-                         old = aligned[0]
-                         span.label_ = old.label_
-                         span._.set(self.date_label, old._.get(self.date_label))
-                         span._.set(self.duration_label, old._.get(self.duration_label))
-                         matches.append(span)
Missing coverage at line 417
             elif d1 in seen or v1.bound is None or v2.bound is None:
-                 continue
Missing coverage at lines 428-430
                 if v1.mode == Mode.DURATION:
-                     m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL
-                     m2 = v2.mode or Bound.FROM
                 elif v2.mode == Mode.DURATION:

1471590%
edsnlp/pipes/misc/dates/models.py

Missing coverage at line 123

                     else:
-                         d["month"] = note_datetime.month
                 if self.day is None:
Missing coverage at lines 127-133
             else:
-                 if self.year is None:
-                     return None
-                 if self.month is None:
-                     d["month"] = default_month
-                 if self.day is None:
-                     d["day"] = default_day
Missing coverage at lines 136-138
                 return pendulum.datetime(**d, tz=tz)
-             except ValueError:
-                 return None
Missing coverage at lines 146-153
     ) -> Optional[pendulum.Duration]:
-         if note_datetime and not isinstance(note_datetime, NaTType):
-             note_datetime = pendulum.instance(note_datetime)
-             dt = self.to_datetime(note_datetime=note_datetime, **kwargs)
-             delta = dt.diff(note_datetime)
-             return delta.as_interval()
-         else:
-             return None
Missing coverage at line 168
         if self.second:
-             norm += f"{self.second:02}s"
Missing coverage at line 181
     def __str__(self):
-         return self.norm()
Missing coverage at line 277
             if td.in_seconds() > 0:
-                 norm = f"+{norm}"
Missing coverage at line 307
     def __str__(self):
-         return self.norm()

1661989%
edsnlp/pipes/misc/measurements/measurements.py

Missing coverage at lines 146-148

     def __getitem__(self, item: int):
-         assert isinstance(item, int)
-         return [self][item]
Missing coverage at lines 159-162
     def __eq__(self, other: Any):
-         if isinstance(other, SimpleMeasurement):
-             return self.convert_to(other.unit) == other.value
-         return False
Missing coverage at line 165
         if other.unit == self.unit:
-             return self.__class__(self.value + other.value, self.unit, self.registry)
         return self.__class__(
Missing coverage at line 194
     def verify(cls, ent):
-         return True
Missing coverage at line 233
     def __lt__(self, other: Union[SimpleMeasurement, "RangeMeasurement"]):
-         return max(self.convert_to(other.unit)) < min((part.value for part in other))
Missing coverage at line 244
             return self.convert_to(other.unit) == other.value
-         return False
Missing coverage at line 258
     def verify(cls, ent):
-         return True
Missing coverage at line 498
         if isinstance(measurements, str):
-             measurements = [measurements]
         if isinstance(measurements, (list, tuple)):
Missing coverage at line 711
         if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "":
-             pseudo.append("w")
         pseudo = "".join(pseudo)
Missing coverage at lines 875-877
                         unit_norm = self.unit_followers[unit_before.label_]
-                 except (KeyError, AttributeError, IndexError):
-                     pass
Missing coverage at line 920
             ):
-                 ent = doc[unit_text.start: number.end]
             else:
Missing coverage at lines 927-929
                 dims = self.unit_registry.parse_unit(unit_norm)[0]
-             except KeyError:
-                 continue
Missing coverage at lines 1034-1036
                     last._.set(last.label_, new_value)
-                 except (AttributeError, TypeError):
-                     merged.append(ent)
             else:

3881995%
edsnlp/pipes/misc/sections/sections.py

Missing coverage at line 126

         if sections is None:
-             sections = patterns.sections
         sections = dict(sections)

45198%
edsnlp/pipes/misc/tables/tables.py

Missing coverage at line 129

         else:
-             self.tables_pattern = tables_pattern
Missing coverage at line 134
         else:
-             self.sep = sep_pattern

28293%
edsnlp/pipes/ner/adicap/models.py

Missing coverage at line 15

     def norm(self) -> str:
-         return self.code
Missing coverage at line 18
     def __str__(self):
-         return self.norm()

14286%
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py

Missing coverage at lines 110-112

             if span._.source == "ischemia":
-                 if "brain" not in span._.assigned.keys():
-                     continue

17288%
edsnlp/pipes/ner/disorders/ckd/ckd.py

Missing coverage at lines 119-122

             dfg_value = float(dfg_span.text.replace(",", ".").strip())
-         except ValueError:
-             logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}")
-             return False

29390%
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py

Missing coverage at line 102

                 # Huge change of FP / Title section
-                 continue

14193%
edsnlp/pipes/ner/disorders/diabetes/diabetes.py

Missing coverage at line 133

                 # Mostly FP
-                 continue
Missing coverage at line 136
             elif self.has_far_complications(span):
-                 span._.status = 2
Missing coverage at line 148
         if next(iter(self.complication_matcher(context)), None) is not None:
-             return True
         return False

31390%
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py

Missing coverage at line 106

                 if "peripheral" not in span._.assigned.keys():
-                     continue

15193%
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py

Missing coverage at lines 114-117

         if use_tnm:
-             from edsnlp.pipes.ner.tnm import TNM
- 
-             self.tnm = TNM(nlp, pattern=None, attr="TEXT")
Missing coverage at lines 119-129
     def process_tnm(self, doc):
-         spans = self.tnm.process(doc)
-         spans = self.tnm.parse(spans)
- 
-         for span in spans:
-             span.label_ = "solid_tumor"
-             span._.source = "tnm"
-             metastasis = span._.value.dict().get("metastasis", "0")
-             if metastasis == "1":
-                 span._.status = 2
-             yield span
Missing coverage at line 149
         if self.use_tnm:
-             yield from self.process_tnm(doc)

351266%
edsnlp/pipes/ner/scores/base_score.py

Missing coverage at line 154

             if value is None:
-                 continue
             normalized_value = self.score_normalization(value)

47198%
edsnlp/pipes/ner/scores/charlson/patterns.py

Missing coverage at lines 21-23

             return int(extracted_score)
-     except ValueError:
-         return None

13285%
edsnlp/pipes/ner/scores/elston_ellis/patterns.py

Missing coverage at line 26

         if x <= 5:
-             return 1
Missing coverage at lines 32-36
         else:
-             return 3
- 
-     except ValueError:
-         return None

21481%
edsnlp/pipes/ner/scores/sofa/sofa.py

Missing coverage at line 32

             if not assigned:
-                 continue
             if assigned.get("method_max") is not None:
Missing coverage at line 40
             else:
-                 method = "Non précisée"

25292%
edsnlp/pipes/ner/tnm/model.py

Missing coverage at line 139

     def __str__(self):
-         return self.norm()
Missing coverage at line 163
             )
-             exclude_unset = skip_defaults

104298%
edsnlp/pipes/ner/tnm/tnm.py

Missing coverage at lines 156-158

                 value = TNM.parse_obj(groupdict)
-             except ValidationError:
-                 value = TNM.parse_obj({})

44295%
edsnlp/pipes/qualifiers/base.py

Missing coverage at line 17

     if normalizer and not normalizer.lowercase:
-         logger.warning(
             "You have chosen the NORM attribute, but disabled lowercasing "

44198%
edsnlp/pipes/qualifiers/family/family.py

Missing coverage at line 21

     else:
-         return None

59198%
edsnlp/pipes/qualifiers/history/history.py

Missing coverage at lines 20-26

 def history_getter(token: Union[Token, Span]) -> Optional[str]:
-     if token._.history is True:
-         return "ATCD"
-     elif token._.history is False:
-         return "CURRENT"
-     else:
-         return None
Missing coverage at lines 302-308
                 note_datetime = note_datetime.set(tz="Europe/Paris")
-             except ValueError:
-                 logger.debug(
-                     "note_datetime must be a datetime objects. "
-                     "Skipping history qualification from note_datetime."
-                 )
-                 note_datetime = None
Missing coverage at lines 314-320
                 birth_datetime = birth_datetime.set(tz="Europe/Paris")
-             except ValueError:
-                 logger.debug(
-                     "birth_datetime must be a datetime objects. "
-                     "Skipping history qualification from birth date."
-                 )
-                 birth_datetime = None
Missing coverage at lines 386-389
                         )
-                     except ValueError as e:
-                         absolute_date = None
-                         logger.warning(
                             "In doc {}, the following date {} raises this error: {}. "

1551491%
edsnlp/pipes/qualifiers/hypothesis/hypothesis.py

Missing coverage at line 21

     else:
-         return None

74199%
edsnlp/pipes/qualifiers/negation/negation.py

Missing coverage at line 22

     else:
-         return None

77199%
edsnlp/pipes/qualifiers/reported_speech/reported_speech.py

Missing coverage at lines 18-22

         return "REPORTED"
-     elif token._.rspeech is False:
-         return "DIRECT"
-     else:
-         return None

73396%
edsnlp/pipes/trainable/embeddings/span_pooler/span_pooler.py

Missing coverage at line 99

         for qlf in self.qualifiers or ():
-             if not Span.has_extension(qlf):
                 Span.set_extension(qlf, default=None)

65198%
edsnlp/pipes/trainable/embeddings/transformer/transformer.py

Missing coverage at lines 152-169

         if new_tokens:
-             self.tokenizer.add_tokens(sorted(set(t[1] for t in new_tokens)))
-             original_normalizer = self.tokenizer.backend_tokenizer.normalizer
-             self.tokenizer.backend_tokenizer.normalizer = (
-                 tokenizers.normalizers.Sequence(
-                     [
-                         *(
-                             tokenizers.normalizers.Replace(
-                                 tokenizers.Regex(pattern), replacement
-                             )
-                             for pattern, replacement in new_tokens
-                         ),
-                         original_normalizer,
-                     ]
-                 )
-             )
-             # and add a new entry to the model's embeddings
-             self.transformer.resize_token_embeddings(
                 max(self.tokenizer.vocab.values()) + 1

140497%
edsnlp/pipes/trainable/layers/crf.py

Missing coverage at line 21

     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2)
Missing coverage at line 29
     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2)
Missing coverage at line 97
         if learnable_transitions:
-             self.transitions = torch.nn.Parameter(
                 torch.zeros_like(forbidden_transitions, dtype=torch.float)
Missing coverage at line 107
         if learnable_transitions and with_start_end_transitions:
-             self.start_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)
Missing coverage at line 116
         if learnable_transitions and with_start_end_transitions:
-             self.end_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)

136596%
edsnlp/pipes/trainable/ner_crf/ner_crf.py

Missing coverage at line 238

         if self.labels is not None and not self.infer_span_setter:
-             return
Missing coverage at lines 246-248
             if callable(self.target_span_getter):
-                 for ent in self.target_span_getter(doc):
-                     inferred_labels.add(ent.label_)
             else:

152398%
edsnlp/pipes/trainable/span_linker/span_linker.py

Missing coverage at lines 375-377

             if self.reference_mode == "synonym":
-                 embeds = embeds.to(new_lin.weight)
-                 new_lin.weight.data = embeds
             else:

165299%
edsnlp/processing/deprecated_pipe.py

Missing coverage at lines 162-164

         KoalasDataFrame = sys.modules["databricks.koalas.frame"].DataFrame
-     except (AttributeError, KeyError):
-         KoalasDataFrame = None
     is_koalas = KoalasDataFrame and isinstance(df, KoalasDataFrame)  # type: ignore
Missing coverage at lines 207-209
         def converter(doc):
-             res = results_extractor(doc)
-             return (
                 [{"note_id": doc._.note_id, **row} for row in res]

57493%
edsnlp/processing/multiprocessing.py

Missing coverage at lines 231-235

 if os.environ.get("TORCH_SHARING_STRATEGY"):
-     try:
-         torch.multiprocessing.set_sharing_strategy(os.environ["TORCH_SHARING_STRATEGY"])
-     except NameError:
-         pass
Missing coverage at line 253
         def save_align_devices_hook(pickler: Any, obj: Any):
-             pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj)
Missing coverage at lines 256-263
         def load_align_devices_hook(state):
-             state["execution_device"] = MAP_LOCATION
-             new_obj = AlignDevicesHook.__new__(AlignDevicesHook)
-             new_obj.__dict__.update(state)
-             return new_obj
- 
-     except ImportError:
-         AlignDevicesHook = None
Missing coverage at line 455

-             new_batch_iterator = None
Missing coverage at line 934
         if isinstance(outputs, BaseException):
-             raise outputs
Missing coverage at line 1002
                 if v is not None:
-                     os.environ[k] = v

4171497%
edsnlp/processing/simple.py

Missing coverage at lines 37-39

         no_grad = sys.modules["torch"].no_grad
-     except (KeyError, AttributeError):
-         no_grad = nullcontext
     reader = lc.reader

59297%
edsnlp/processing/spark.py

Missing coverage at line 50

         getActiveSession = SparkSession.getActiveSession
-     except AttributeError:

43198%
edsnlp/train.py

Missing coverage at line 195

         else:
-             sample_len = lambda idx, noise=True: 1  # noqa: E731
Missing coverage at lines 262-268
             if total + num_tokens > self.grad_accumulation_max_tokens:
-                 print(
-                     f"Mini batch size was becoming too large: {total} > "
-                     f"{self.grad_accumulation_max_tokens} so it was split"
-                 )
-                 total = 0
-                 mini_batches.append([])
             total += num_tokens
Missing coverage at line 354
             if 0 <= self.limit <= count:
-                 break
             if not (len(doc) and (filter_fn is None or filter_fn(doc))):
Missing coverage at line 356
             if not (len(doc) and (filter_fn is None or filter_fn(doc))):
-                 continue
             count += 1
Missing coverage at lines 390-392
             for ent in doc.ents:
-                 for token in ent:
-                     token.is_sent_start = False
             for sent in doc.sents if doc.has_annotation("SENT_START") else (doc[:],):

249897%
edsnlp/utils/bindings.py

Missing coverage at line 22

         return "." + path
-     return path

66198%
edsnlp/utils/filter.py

Missing coverage at line 206

     if isinstance(label, int):
-         return [span for span in spans if span.label == label]
     else:

74199%
edsnlp/utils/lazy_module.py

Missing coverage at line 46

             ):
-                 continue
             for import_node in node.body:

31197%
edsnlp/utils/numbers.py

Missing coverage at line 34

     else:
-         string = s
     string = string.lower().strip()
Missing coverage at lines 38-41
         return int(string)
-     except ValueError:
-         parsed = DIGITS_MAPPINGS.get(string, None)
-         return parsed

16475%
edsnlp/utils/package.py

Missing coverage at line 47

         if isinstance(obj, ModuleType):
-             module_name = obj.__name__
         else:
Missing coverage at line 59
         return package, version
-     except (ImportError, AttributeError):
         raise Exception(f"Cound not find package of type {obj}")
Missing coverage at lines 78-82
 def save_module(pickler, obj, *args, **kwargs):
-     package_name = get_package(obj)
-     if package_name is not None:
-         pickler.packages.add(package_name)
-     return dill_save_module(pickler, obj, *args, **kwargs)
Missing coverage at line 400
             # self.pipeline = edsnlp.load(self.pipeline)
-             shutil.copytree(
                 self.pipeline,

190796%
edsnlp/utils/resources.py

Missing coverage at line 33

     if not verbs:
-         return conjugated_verbs

24196%
edsnlp/utils/span_getters.py

Missing coverage at lines 52-55

         else:
-             for span in candidates:
-                 if span.label_ in span_filter:
-                     yield span
Missing coverage at lines 59-61
     if span_getter is None:
-         yield doc[:], None
-         return
     if callable(span_getter):
Missing coverage at lines 62-64
     if callable(span_getter):
-         yield from span_getter(doc)
-         return
     for key, span_filter in span_getter.items():
Missing coverage at line 66
         if key == "*":
-             candidates = (
                 (span, group) for group in doc.spans.values() for span in group
Missing coverage at lines 75-78
         else:
-             for span, group in candidates:
-                 if span.label_ in span_filter:
-                     yield span, group
Missing coverage at line 82
     if callable(span_setter):
-         span_setter(doc, matches)
     else:
Missing coverage at line 124
             elif isinstance(v, str):
-                 new_value[k] = [v]
             elif isinstance(v, list) and all(isinstance(i, str) for i in v):
Missing coverage at line 162
             elif isinstance(v, str):
-                 new_value[k] = [v]
             elif isinstance(v, list) and all(isinstance(i, str) for i in v):

1581491%
edsnlp/viz/quick_examples.py

Missing coverage at lines 88-91

                 if end > istart:
-                     interval = (start, iend)
-                     del intervals[idx]
-                     break

80396%
TOTAL902023797%

253 files skipped due to complete coverage.