Improve contextual matcher

Files	Patch %	Lines
...ipes/core/contextual_matcher/contextual_matcher.py	87.27%	7 Missing :warning:
edsnlp/pipes/core/contextual_matcher/models.py	89.18%	4 Missing :warning:

Coverage Report

Name	Stmts	Miss	∆ Miss	Cover
edsnlp/pipes/core/contextual_matcher/contextual_matcher.py Was already missing at line 111 ) - label = label_name if label is None: New missing coverage at lines 188-197 ! for include in p.include: - include.matcher = RegexMatcher( - attr=include.regex_attr or p.regex_attr or self.attr, - flags=include.regex_flags or p.regex_flags or self.regex_flags, - ignore_excluded=ignore_excluded, - ignore_space_tokens=ignore_space_tokens, - alignment_mode="expand", - ) - - include.matcher.build_patterns(regex={"include": include.regex}) New missing coverage at lines 262-268 ! for include in self.patterns[source].include: - snippet = include.window(span) - - if next(include.matcher(snippet, as_spans=True), None) is None: - to_keep = False - logger.trace(f"Entity {span} was filtered out") - break New missing coverage at line 329 ! else: - assigned_list = [ (matched_span, matched_span, assign.name, assign) New missing coverage at lines 341-343 ! if assign.required and not assigned_list: - logger.trace(f"Entity {span} was filtered out") - return New missing coverage at line 348 ! if assigned is None: - continue group_span, full_match_span, value_key, assign = assigned	150	12	18	92.00%
edsnlp/utils/span_getters.py Was already missing at lines 53-56 else: - for span in candidates: - if span.label_ in span_filter: - yield span Was already missing at lines 60-62 if span_getter is None: - yield doc[:], None - return if callable(span_getter): Was already missing at lines 63-65 if callable(span_getter): - yield from span_getter(doc) - return for key, span_filter in span_getter.items(): Was already missing at line 67 if key == "": - candidates = ( (span, group) for group in doc.spans.values() for span in group Was already missing at lines 76-79 else: - for span, group in candidates: - if span.label_ in span_filter: - yield span, group Was already missing at line 83 if callable(span_setter): - span_setter(doc, matches) else: Was already missing at line 125 elif isinstance(v, str): - new_value[k] = [v] elif isinstance(v, list) and all(isinstance(i, str) for i in v): Was already missing at line 163 elif isinstance(v, str): - new_value[k] = [v] elif isinstance(v, list) and all(isinstance(i, str) for i in v): New missing coverage at line 328 ! def __rand__(self, other: "Context"): - return self & other if other is not None else self New missing coverage at line 332 ! # fmt: off - return UnionContext([ (self.contexts if isinstance(self, UnionContext) else (self,)), New missing coverage at line 339 ! def __ror__(self, other: "Context"): - return self & other if other is not None else self New missing coverage at line 343 ! def parse(cls, query): - return eval( query, New missing coverage at line 357 ! if isinstance(obj, str): - return cls.parse(obj) if isinstance(obj, tuple): New missing coverage at lines 373-377 ! def __getitem__(cls, item) -> Span: - assert isinstance(item, slice) - before = item.start - after = item.stop - return cls(before, after) New missing coverage at line 437 ! ): - self.contexts = contexts New missing coverage at lines 440-444 ! def __call__(self, span): - results = [context(span) for context in self.contexts] - min_word = min([span.start for span in results]) - max_word = max([span.end for span in results]) - return span.doc[min_word:max_word]	220	28	14	87.27%
edsnlp/package.py New missing coverage at line 55 ! if isinstance(obj, ModuleType): - module_name = obj.__name__ else: New missing coverage at line 67 ! return package, version - except (ImportError, AttributeError): raise Exception(f"Cound not find package of type {obj}") New missing coverage at lines 86-90 ! def save_module(pickler, obj, args, kwargs): - package_name = get_package(obj) - if package_name is not None: - pickler.packages.add(package_name) - return dill_save_module(pickler, obj, args, **kwargs) New missing coverage at line 473 ! if isinstance(pipeline, Path): - pipeline = edsnlp.load(pipeline) dependencies = get_deep_dependencies(pipeline)	202	7	7	96.53%
TOTAL	9380	245	33	97.39%

Files without new missing coverage

Name	Stmts	Miss	∆ Miss	Cover
edsnlp/viz/quick_examples.py Was already missing at lines 88-91 if end > istart: - interval = (start, iend) - del intervals[idx] - break	80	3	0	96.25%
edsnlp/utils/resources.py Was already missing at line 33 if not verbs: - return conjugated_verbs	24	1	0	95.83%
edsnlp/utils/numbers.py Was already missing at line 34 else: - string = s string = string.lower().strip() Was already missing at lines 38-41 return int(string) - except ValueError: - parsed = DIGITS_MAPPINGS.get(string, None) - return parsed	16	4	0	75.00%
edsnlp/utils/lazy_module.py Was already missing at line 46 ): - continue for import_node in node.body:	31	1	0	96.77%
edsnlp/utils/filter.py Was already missing at line 206 if isinstance(label, int): - return [span for span in spans if span.label == label] else:	74	1	0	98.65%
edsnlp/utils/bindings.py Was already missing at line 22 return "." + path - return path	66	1	0	98.48%
edsnlp/train.py Was already missing at line 190 else: - sample_len = lambda idx, noise=True: 1 # noqa: E731 Was already missing at lines 257-263 if total + num_tokens > self.grad_accumulation_max_tokens: - print( ... - mini_batches.append([]) total += num_tokens Was already missing at line 349 if 0 <= self.limit <= count: - break if not (len(doc) and (filter_fn is None or filter_fn(doc))): Was already missing at line 351 if not (len(doc) and (filter_fn is None or filter_fn(doc))): - continue count += 1 Was already missing at lines 385-387 for ent in doc.ents: - for token in ent: - token.is_sent_start = False for sent in doc.sents if doc.has_annotation("SENT_START") else (doc[:],):	257	8	0	96.89%
edsnlp/processing/spark.py Was already missing at line 51 getActiveSession = SparkSession.getActiveSession - except AttributeError:	43	1	0	97.67%
edsnlp/processing/simple.py Was already missing at lines 28-30 no_grad = sys.modules["torch"].no_grad - except (KeyError, AttributeError): - no_grad = nullcontext reader = lc.reader	52	2	0	96.15%
edsnlp/processing/multiprocessing.py Was already missing at lines 227-231 if os.environ.get("TORCH_SHARING_STRATEGY"): - try: - torch.multiprocessing.set_sharing_strategy(os.environ["TORCH_SHARING_STRATEGY"]) - except NameError: - pass Was already missing at line 249 def save_align_devices_hook(pickler: Any, obj: Any): - pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj) Was already missing at lines 252-259 def load_align_devices_hook(state): - state["execution_device"] = MAP_LOCATION ... - AlignDevicesHook = None Was already missing at line 452 - new_batch_iterator = None Was already missing at lines 573-575 else: - batch = gpu_pipe.prepare_batch(docs, device=device) - inputs = None active_batches[batch_id] = (docs, task_id, inputs) Was already missing at line 939 if isinstance(outputs, BaseException): - raise outputs Was already missing at line 1007 if v is not None: - os.environ[k] = v	417	16	0	96.16%
edsnlp/pipes/trainable/span_linker/span_linker.py Was already missing at lines 401-403 if self.reference_mode == "synonym": - embeds = embeds.to(new_lin.weight) - new_lin.weight.data = embeds else:	172	2	0	98.84%
edsnlp/pipes/trainable/ner_crf/ner_crf.py Was already missing at line 250 if self.labels is not None and not self.infer_span_setter: - return Was already missing at lines 258-260 if callable(self.target_span_getter): - for span in get_spans(doc, self.target_span_getter): - inferred_labels.add(span.label_) else:	157	3	0	98.09%
edsnlp/pipes/trainable/layers/crf.py Was already missing at line 21 # out: 2 * N * O - return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2) Was already missing at line 29 # out: 2 * N * O - return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2) Was already missing at line 97 if learnable_transitions: - self.transitions = torch.nn.Parameter( torch.zeros_like(forbidden_transitions, dtype=torch.float) Was already missing at line 107 if learnable_transitions and with_start_end_transitions: - self.start_transitions = torch.nn.Parameter( torch.zeros(num_tags, dtype=torch.float) Was already missing at line 116 if learnable_transitions and with_start_end_transitions: - self.end_transitions = torch.nn.Parameter( torch.zeros(num_tags, dtype=torch.float)	136	5	0	96.32%
edsnlp/pipes/trainable/embeddings/span_pooler/span_pooler.py Was already missing at line 200 if len(batch["begins"]) == 0: - return { "embeddings": torch.empty(0, self.output_size, device=device),	67	1	0	98.51%
edsnlp/pipes/qualifiers/reported_speech/reported_speech.py Was already missing at lines 18-22 return "REPORTED" - elif token._.rspeech is False: - return "DIRECT" - else: - return None	74	3	0	95.95%
edsnlp/pipes/qualifiers/negation/negation.py Was already missing at line 22 else: - return None	78	1	0	98.72%
edsnlp/pipes/qualifiers/hypothesis/hypothesis.py Was already missing at line 21 else: - return None	75	1	0	98.67%
edsnlp/pipes/qualifiers/history/history.py Was already missing at lines 20-26 def history_getter(token: Union[Token, Span]) -> Optional[str]: - if token._.history is True: - return "ATCD" - elif token._.history is False: - return "CURRENT" - else: - return None Was already missing at lines 312-318 ) - except ValueError: ... - note_datetime = None Was already missing at lines 327-333 ) - except ValueError: ... - birth_datetime = None Was already missing at lines 399-402 ) - except ValueError as e: - absolute_date = None - logger.warning( "In doc {}, the following date {} raises this error: {}. "	155	14	0	90.97%
edsnlp/pipes/qualifiers/family/family.py Was already missing at line 21 else: - return None	60	1	0	98.33%
edsnlp/pipes/qualifiers/base.py Was already missing at line 21 if normalizer and not normalizer.lowercase: - logger.warning( "You have chosen the NORM attribute, but disabled lowercasing "	44	1	0	97.73%
edsnlp/pipes/ner/tnm/tnm.py Was already missing at lines 156-158 value = TNM.parse_obj(groupdict) - except ValidationError: - value = TNM.parse_obj({})	44	2	0	95.45%
edsnlp/pipes/ner/tnm/model.py Was already missing at line 139 def __str__(self): - return self.norm() Was already missing at line 163 ) - exclude_unset = skip_defaults	104	2	0	98.08%
edsnlp/pipes/ner/scores/sofa/sofa.py Was already missing at line 32 if not assigned: - continue if assigned.get("method_max") is not None: Was already missing at line 40 else: - method = "Non précisée"	25	2	0	92.00%
edsnlp/pipes/ner/scores/elston_ellis/patterns.py Was already missing at line 26 if x <= 5: - return 1 Was already missing at lines 32-36 else: - return 3 - - except ValueError: - return None	21	4	0	80.95%
edsnlp/pipes/ner/scores/charlson/patterns.py Was already missing at lines 21-23 return int(extracted_score) - except ValueError: - return None	13	2	0	84.62%
edsnlp/pipes/ner/scores/base_score.py Was already missing at line 154 if value is None: - continue normalized_value = self.score_normalization(value)	47	1	0	97.87%
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py Was already missing at lines 114-117 if use_tnm: - from edsnlp.pipes.ner.tnm import TNM - - self.tnm = TNM(nlp, pattern=None, attr="TEXT") Was already missing at lines 119-129 def process_tnm(self, doc): - spans = self.tnm.process(doc) ... - yield span Was already missing at line 149 if self.use_tnm: - yield from self.process_tnm(doc)	35	12	0	65.71%
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py Was already missing at line 106 if "peripheral" not in span._.assigned.keys(): - continue	15	1	0	93.33%
edsnlp/pipes/ner/disorders/diabetes/diabetes.py Was already missing at line 133 # Mostly FP - continue Was already missing at line 136 elif self.has_far_complications(span): - span._.status = 2 Was already missing at line 148 if next(iter(self.complication_matcher(context)), None) is not None: - return True return False	31	3	0	90.32%
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py Was already missing at line 102 # Huge change of FP / Title section - continue	14	1	0	92.86%
edsnlp/pipes/ner/disorders/ckd/ckd.py Was already missing at lines 119-122 dfg_value = float(dfg_span.text.replace(",", ".").strip()) - except ValueError: - logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}") - return False	29	3	0	89.66%
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py Was already missing at lines 110-112 if span._.source == "ischemia": - if "brain" not in span._.assigned.keys(): - continue	17	2	0	88.24%
edsnlp/pipes/ner/adicap/models.py Was already missing at line 15 def norm(self) -> str: - return self.code Was already missing at line 18 def __str__(self): - return self.norm()	14	2	0	85.71%
edsnlp/pipes/misc/tables/tables.py Was already missing at line 129 else: - self.tables_pattern = tables_pattern Was already missing at line 134 else: - self.sep = sep_pattern	28	2	0	92.86%
edsnlp/pipes/misc/sections/sections.py Was already missing at line 126 if sections is None: - sections = patterns.sections sections = dict(sections)	45	1	0	97.78%
edsnlp/pipes/misc/measurements/measurements.py Was already missing at lines 146-148 def __getitem__(self, item: int): - assert isinstance(item, int) - return [self][item] Was already missing at lines 159-162 def __eq__(self, other: Any): - if isinstance(other, SimpleMeasurement): - return self.convert_to(other.unit) == other.value - return False Was already missing at line 165 if other.unit == self.unit: - return self.__class__(self.value + other.value, self.unit, self.registry) return self.__class__( Was already missing at line 194 def verify(cls, ent): - return True Was already missing at line 233 def __lt__(self, other: Union[SimpleMeasurement, "RangeMeasurement"]): - return max(self.convert_to(other.unit)) < min((part.value for part in other)) Was already missing at line 244 return self.convert_to(other.unit) == other.value - return False Was already missing at line 258 def verify(cls, ent): - return True Was already missing at line 498 if isinstance(measurements, str): - measurements = [measurements] if isinstance(measurements, (list, tuple)): Was already missing at line 711 if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "": - pseudo.append("w") pseudo = "".join(pseudo) Was already missing at lines 875-877 unit_norm = self.unit_followers[unit_before.label_] - except (KeyError, AttributeError, IndexError): - pass Was already missing at line 920 ): - ent = doc[unit_text.start: number.end] else: Was already missing at lines 927-929 dims = self.unit_registry.parse_unit(unit_norm)[0] - except KeyError: - continue Was already missing at lines 1034-1036 last._.set(last.label_, new_value) - except (AttributeError, TypeError): - merged.append(ent) else:	388	19	0	95.10%
edsnlp/pipes/misc/dates/models.py Was already missing at line 152 else: - d["month"] = note_datetime.month if self.day is None: Was already missing at lines 156-162 else: - if self.year is None: ... - d["day"] = default_day Was already missing at lines 170-172 return dt - except ValueError: - return None Was already missing at line 188 else: - return None Was already missing at line 204 if self.second: - norm += f"{self.second:02}s"	196	11	0	94.39%
edsnlp/pipes/misc/dates/dates.py Was already missing at line 243 if isinstance(absolute, str): - absolute = [absolute] if isinstance(relative, str): Was already missing at line 245 if isinstance(relative, str): - relative = [relative] if isinstance(duration, str): Was already missing at line 247 if isinstance(duration, str): - relative = [duration] if isinstance(false_positive, str): Was already missing at lines 348-357 if self.merge_mode == "align": - alignments = align_spans(matches, spans, sort_by_overlap=True) ... - matches.append(span) Was already missing at line 439 elif d1 in seen or v1.bound is None or v2.bound is None: - continue Was already missing at lines 450-452 if v1.mode == Mode.DURATION: - m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL - m2 = v2.mode or Bound.FROM elif v2.mode == Mode.DURATION:	148	15	0	89.86%
edsnlp/pipes/misc/consultation_dates/consultation_dates.py Was already missing at line 131 else: - self.date_matcher = None Was already missing at line 134 if not consultation_mention: - consultation_mention = [] elif consultation_mention is True:	48	2	0	95.83%
edsnlp/pipes/core/normalizer/__init__.py Was already missing at line 7 def excluded_or_space_getter(t): - return t.is_space or t.tag_ == "EXCLUDED"	5	1	0	80.00%
edsnlp/pipes/core/endlines/endlines.py Was already missing at lines 151-155 if end_lines_model is None: - path = build_path(__file__, "base_model.pkl") - - with open(path, "rb") as inp: - self.model = pickle.load(inp) elif isinstance(end_lines_model, str): Was already missing at lines 158-160 self.model = pickle.load(inp) - elif isinstance(end_lines_model, EndLinesModel): - self.model = end_lines_model else: Was already missing at line 191 ): - return "ENUMERATION" Was already missing at line 278 if np.isnan(sigma): - sigma = 1	87	7	0	91.95%
edsnlp/patch_spacy.py Was already missing at lines 67-69 # if module is reloaded. - existing_func = registry.factories.get(internal_name) - if not util.is_same_func(factory_func, existing_func): raise ValueError(	31	2	0	93.55%
edsnlp/optimization.py Was already missing at line 32 def param_groups(self, value): - self.optim.param_groups = value Was already missing at line 36 def state(self): - return self.optim.state Was already missing at line 40 def state(self, value): - self.optim.state = value Was already missing at line 89 def __init__(self, groups): - self.param_groups = groups	77	4	0	94.81%
edsnlp/matchers/simstring.py Was already missing at line 280 if custom: - attr = attr[1:].lower() Was already missing at line 295 if custom: - token_text = getattr(token._, attr) else:	146	2	0	98.63%
edsnlp/language.py Was already missing at line 103 if last != begin: - logger.warning( "Missed some characters during"	51	1	0	98.04%
edsnlp/data/standoff.py Was already missing at line 43 def __init__(self, ann_file, line): - super().__init__(f"File {ann_file}, unrecognized Brat line {line}") Was already missing at line 83 if not len(ann_paths): - return { "text": text, Was already missing at line 197 ) - except Exception: raise Exception(	172	3	0	98.26%
edsnlp/data/polars.py Was already missing at line 26 if hasattr(data, "collect"): - data = data.collect() assert isinstance(data, pl.DataFrame)	44	1	0	97.73%
edsnlp/data/json.py Was already missing at line 94 if not is_jsonl: - obj[FILENAME] = filename results.append(obj) Was already missing at line 96 results.append(obj) - except Exception: raise Exception(f"Cannot parse {filename}")	107	2	0	98.13%
edsnlp/data/converters.py Was already missing at line 659 if isinstance(converter, type) or kwargs_to_init: - return converter(**kwargs), {} return converter, validate_kwargs(converter, kwargs)	192	1	0	99.48%
edsnlp/data/base.py Was already missing at lines 174-180 """ - data = LazyCollection.ensure_lazy(data) - if converter: - converter, kwargs = get_doc2dict_converter(converter, kwargs) - data = data.map(converter, kwargs=kwargs) - - return data	39	5	0	87.18%
edsnlp/core/torch_component.py Was already missing at line 390 if hasattr(self, "compiled"): - res = self.compiled(batch) else: Was already missing at line 436 """ - return self.preprocess(doc)	179	2	0	98.88%
edsnlp/core/registries.py Was already missing at line 76 if obj.error is not None: - raise obj.error	165	1	0	99.39%
edsnlp/core/pipeline.py Was already missing at line 550 if name in exclude: - continue if name not in components:	404	1	0	99.75%
edsnlp/core/lazy_collection.py Was already missing at line 51 def __call__(self, args, kwargs): - return self.forward(args, *kwargs) Was already missing at line 436 for name, pipe, _ in self.torch_components(): - pipe.to(device) return self	149	2	0	98.66%
edsnlp/connectors/omop.py Was already missing at line 69 if not isinstance(row.ents, list): - continue Was already missing at line 87 else: - doc.spans[span.label_].append(span) Was already missing at line 127 if df.note_id.isna().any(): - df["note_id"] = range(len(df)) Was already missing at line 171 if i > 0: - df.term_modifiers += ";" df.term_modifiers += ext + "=" + df[ext].astype(str)	84	4	0	95.24%
edsnlp/processing/deprecated_pipe.py Was already missing at lines 207-209 def converter(doc): - res = results_extractor(doc) - return ( [{"note_id": doc._.note_id, **row} for row in res]	57	2	-2	96.49%

258 files skipped due to complete coverage.

Coverage failure: total of 97.39% is less than 97.58% ❌

aphp / edsnlp

Improve contextual matcher #289

Added

Checklist

Codecov Report

Coverage Report

Quality Gate passed