Open percevalw opened 4 months ago
Attention: Patch coverage is 89.81481%
with 11 lines
in your changes are missing coverage. Please review.
Project coverage is 96.97%. Comparing base (
ad16e9b
) to head (53bcc0e
). Report is 23 commits behind head on master.:exclamation: Current head 53bcc0e differs from pull request most recent head 465ba39
Please upload reports for the commit 465ba39 to get more accurate results.
Files | Patch % | Lines |
---|---|---|
...ipes/core/contextual_matcher/contextual_matcher.py | 87.27% | 7 Missing :warning: |
edsnlp/pipes/core/contextual_matcher/models.py | 89.18% | 4 Missing :warning: |
:umbrella: View full report in Codecov by Sentry.
:loudspeaker: Have feedback on the report? Share it here.
Name | Stmts | Miss | ∆ Miss | Cover |
---|---|---|---|---|
edsnlp/pipes/core/contextual_matcher/contextual_matcher.pyWas already missing at line 111 )
- label = label_name
if label is None: New missing coverage at lines 188-197 ! for include in p.include:
- include.matcher = RegexMatcher(
- attr=include.regex_attr or p.regex_attr or self.attr,
- flags=include.regex_flags or p.regex_flags or self.regex_flags,
- ignore_excluded=ignore_excluded,
- ignore_space_tokens=ignore_space_tokens,
- alignment_mode="expand",
- )
-
- include.matcher.build_patterns(regex={"include": include.regex})
New missing coverage at lines 262-268 ! for include in self.patterns[source].include:
- snippet = include.window(span)
-
- if next(include.matcher(snippet, as_spans=True), None) is None:
- to_keep = False
- logger.trace(f"Entity {span} was filtered out")
- break
New missing coverage at line 329 ! else:
- assigned_list = [
(matched_span, matched_span, assign.name, assign) New missing coverage at lines 341-343 ! if assign.required and not assigned_list:
- logger.trace(f"Entity {span} was filtered out")
- return
New missing coverage at line 348 ! if assigned is None:
- continue
group_span, full_match_span, value_key, assign = assigned | 150 | 12 | 18 | 92.00% |
edsnlp/utils/span_getters.pyWas already missing at lines 53-56 else:
- for span in candidates:
- if span.label_ in span_filter:
- yield span
Was already missing at lines 60-62 if span_getter is None:
- yield doc[:], None
- return
if callable(span_getter): Was already missing at lines 63-65 if callable(span_getter):
- yield from span_getter(doc)
- return
for key, span_filter in span_getter.items(): Was already missing at line 67 if key == "*":
- candidates = (
(span, group) for group in doc.spans.values() for span in group Was already missing at lines 76-79 else:
- for span, group in candidates:
- if span.label_ in span_filter:
- yield span, group
Was already missing at line 83 if callable(span_setter):
- span_setter(doc, matches)
else: Was already missing at line 125 elif isinstance(v, str):
- new_value[k] = [v]
elif isinstance(v, list) and all(isinstance(i, str) for i in v): Was already missing at line 163 elif isinstance(v, str):
- new_value[k] = [v]
elif isinstance(v, list) and all(isinstance(i, str) for i in v): New missing coverage at line 328 ! def __rand__(self, other: "Context"):
- return self & other if other is not None else self
New missing coverage at line 332 ! # fmt: off
- return UnionContext([
*(self.contexts if isinstance(self, UnionContext) else (self,)), New missing coverage at line 339 ! def __ror__(self, other: "Context"):
- return self & other if other is not None else self
New missing coverage at line 343 ! def parse(cls, query):
- return eval(
query, New missing coverage at line 357 ! if isinstance(obj, str):
- return cls.parse(obj)
if isinstance(obj, tuple): New missing coverage at lines 373-377 ! def __getitem__(cls, item) -> Span:
- assert isinstance(item, slice)
- before = item.start
- after = item.stop
- return cls(before, after)
New missing coverage at line 437 ! ):
- self.contexts = contexts
New missing coverage at lines 440-444 ! def __call__(self, span):
- results = [context(span) for context in self.contexts]
- min_word = min([span.start for span in results])
- max_word = max([span.end for span in results])
- return span.doc[min_word:max_word]
| 220 | 28 | 14 | 87.27% |
edsnlp/package.pyNew missing coverage at line 55 ! if isinstance(obj, ModuleType):
- module_name = obj.__name__
else: New missing coverage at line 67 ! return package, version
- except (ImportError, AttributeError):
raise Exception(f"Cound not find package of type {obj}") New missing coverage at lines 86-90 ! def save_module(pickler, obj, *args, **kwargs):
- package_name = get_package(obj)
- if package_name is not None:
- pickler.packages.add(package_name)
- return dill_save_module(pickler, obj, *args, **kwargs)
New missing coverage at line 473 ! if isinstance(pipeline, Path):
- pipeline = edsnlp.load(pipeline)
dependencies = get_deep_dependencies(pipeline) | 202 | 7 | 7 | 96.53% |
TOTAL | 9380 | 245 | 33 | 97.39% |
Name | Stmts | Miss | ∆ Miss | Cover |
---|---|---|---|---|
edsnlp/viz/quick_examples.pyWas already missing at lines 88-91 if end > istart:
- interval = (start, iend)
- del intervals[idx]
- break
| 80 | 3 | 0 | 96.25% |
edsnlp/utils/resources.pyWas already missing at line 33 if not verbs:
- return conjugated_verbs
| 24 | 1 | 0 | 95.83% |
edsnlp/utils/numbers.pyWas already missing at line 34 else:
- string = s
string = string.lower().strip() Was already missing at lines 38-41 return int(string)
- except ValueError:
- parsed = DIGITS_MAPPINGS.get(string, None)
- return parsed | 16 | 4 | 0 | 75.00% |
edsnlp/utils/lazy_module.pyWas already missing at line 46 ):
- continue
for import_node in node.body: | 31 | 1 | 0 | 96.77% |
edsnlp/utils/filter.pyWas already missing at line 206 if isinstance(label, int):
- return [span for span in spans if span.label == label]
else: | 74 | 1 | 0 | 98.65% |
edsnlp/utils/bindings.pyWas already missing at line 22 return "." + path
- return path
| 66 | 1 | 0 | 98.48% |
edsnlp/train.pyWas already missing at line 190 else:
- sample_len = lambda idx, noise=True: 1 # noqa: E731
Was already missing at lines 257-263 if total + num_tokens > self.grad_accumulation_max_tokens:
- print(
...
- mini_batches.append([])
total += num_tokens Was already missing at line 349 if 0 <= self.limit <= count:
- break
if not (len(doc) and (filter_fn is None or filter_fn(doc))): Was already missing at line 351 if not (len(doc) and (filter_fn is None or filter_fn(doc))):
- continue
count += 1 Was already missing at lines 385-387 for ent in doc.ents:
- for token in ent:
- token.is_sent_start = False
for sent in doc.sents if doc.has_annotation("SENT_START") else (doc[:],): | 257 | 8 | 0 | 96.89% |
edsnlp/processing/spark.pyWas already missing at line 51 getActiveSession = SparkSession.getActiveSession
- except AttributeError:
| 43 | 1 | 0 | 97.67% |
edsnlp/processing/simple.pyWas already missing at lines 28-30 no_grad = sys.modules["torch"].no_grad
- except (KeyError, AttributeError):
- no_grad = nullcontext
reader = lc.reader | 52 | 2 | 0 | 96.15% |
edsnlp/processing/multiprocessing.pyWas already missing at lines 227-231 if os.environ.get("TORCH_SHARING_STRATEGY"):
- try:
- torch.multiprocessing.set_sharing_strategy(os.environ["TORCH_SHARING_STRATEGY"])
- except NameError:
- pass
Was already missing at line 249 def save_align_devices_hook(pickler: Any, obj: Any):
- pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj)
Was already missing at lines 252-259 def load_align_devices_hook(state):
- state["execution_device"] = MAP_LOCATION
...
- AlignDevicesHook = None
Was already missing at line 452
- new_batch_iterator = None
Was already missing at lines 573-575 else:
- batch = gpu_pipe.prepare_batch(docs, device=device)
- inputs = None
active_batches[batch_id] = (docs, task_id, inputs) Was already missing at line 939 if isinstance(outputs, BaseException):
- raise outputs
Was already missing at line 1007 if v is not None:
- os.environ[k] = v
| 417 | 16 | 0 | 96.16% |
edsnlp/pipes/trainable/span_linker/span_linker.pyWas already missing at lines 401-403 if self.reference_mode == "synonym":
- embeds = embeds.to(new_lin.weight)
- new_lin.weight.data = embeds
else: | 172 | 2 | 0 | 98.84% |
edsnlp/pipes/trainable/ner_crf/ner_crf.pyWas already missing at line 250 if self.labels is not None and not self.infer_span_setter:
- return
Was already missing at lines 258-260 if callable(self.target_span_getter):
- for span in get_spans(doc, self.target_span_getter):
- inferred_labels.add(span.label_)
else: | 157 | 3 | 0 | 98.09% |
edsnlp/pipes/trainable/layers/crf.pyWas already missing at line 21 # out: 2 * N * O
- return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2)
Was already missing at line 29 # out: 2 * N * O
- return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2)
Was already missing at line 97 if learnable_transitions:
- self.transitions = torch.nn.Parameter(
torch.zeros_like(forbidden_transitions, dtype=torch.float) Was already missing at line 107 if learnable_transitions and with_start_end_transitions:
- self.start_transitions = torch.nn.Parameter(
torch.zeros(num_tags, dtype=torch.float) Was already missing at line 116 if learnable_transitions and with_start_end_transitions:
- self.end_transitions = torch.nn.Parameter(
torch.zeros(num_tags, dtype=torch.float) | 136 | 5 | 0 | 96.32% |
edsnlp/pipes/trainable/embeddings/span_pooler/span_pooler.pyWas already missing at line 200 if len(batch["begins"]) == 0:
- return {
"embeddings": torch.empty(0, self.output_size, device=device), | 67 | 1 | 0 | 98.51% |
edsnlp/pipes/qualifiers/reported_speech/reported_speech.pyWas already missing at lines 18-22 return "REPORTED"
- elif token._.rspeech is False:
- return "DIRECT"
- else:
- return None
| 74 | 3 | 0 | 95.95% |
78 | 1 | 0 | 98.72% | |
75 | 1 | 0 | 98.67% | |
edsnlp/pipes/qualifiers/history/history.pyWas already missing at lines 20-26 def history_getter(token: Union[Token, Span]) -> Optional[str]:
- if token._.history is True:
- return "ATCD"
- elif token._.history is False:
- return "CURRENT"
- else:
- return None
Was already missing at lines 312-318 )
- except ValueError:
...
- note_datetime = None
Was already missing at lines 327-333 )
- except ValueError:
...
- birth_datetime = None
Was already missing at lines 399-402 )
- except ValueError as e:
- absolute_date = None
- logger.warning(
"In doc {}, the following date {} raises this error: {}. " | 155 | 14 | 0 | 90.97% |
60 | 1 | 0 | 98.33% | |
edsnlp/pipes/qualifiers/base.pyWas already missing at line 21 if normalizer and not normalizer.lowercase:
- logger.warning(
"You have chosen the NORM attribute, but disabled lowercasing " | 44 | 1 | 0 | 97.73% |
edsnlp/pipes/ner/tnm/tnm.pyWas already missing at lines 156-158 value = TNM.parse_obj(groupdict)
- except ValidationError:
- value = TNM.parse_obj({})
| 44 | 2 | 0 | 95.45% |
edsnlp/pipes/ner/tnm/model.pyWas already missing at line 139 def __str__(self):
- return self.norm()
Was already missing at line 163 )
- exclude_unset = skip_defaults
| 104 | 2 | 0 | 98.08% |
edsnlp/pipes/ner/scores/sofa/sofa.pyWas already missing at line 32 if not assigned:
- continue
if assigned.get("method_max") is not None: Was already missing at line 40 else:
- method = "Non précisée"
| 25 | 2 | 0 | 92.00% |
edsnlp/pipes/ner/scores/elston_ellis/patterns.pyWas already missing at line 26 if x <= 5:
- return 1
Was already missing at lines 32-36 else:
- return 3
-
- except ValueError:
- return None | 21 | 4 | 0 | 80.95% |
edsnlp/pipes/ner/scores/charlson/patterns.pyWas already missing at lines 21-23 return int(extracted_score)
- except ValueError:
- return None | 13 | 2 | 0 | 84.62% |
edsnlp/pipes/ner/scores/base_score.pyWas already missing at line 154 if value is None:
- continue
normalized_value = self.score_normalization(value) | 47 | 1 | 0 | 97.87% |
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.pyWas already missing at lines 114-117 if use_tnm:
- from edsnlp.pipes.ner.tnm import TNM
-
- self.tnm = TNM(nlp, pattern=None, attr="TEXT")
Was already missing at lines 119-129 def process_tnm(self, doc):
- spans = self.tnm.process(doc)
...
- yield span
Was already missing at line 149 if self.use_tnm:
- yield from self.process_tnm(doc) | 35 | 12 | 0 | 65.71% |
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.pyWas already missing at line 106 if "peripheral" not in span._.assigned.keys():
- continue
| 15 | 1 | 0 | 93.33% |
edsnlp/pipes/ner/disorders/diabetes/diabetes.pyWas already missing at line 133 # Mostly FP
- continue
Was already missing at line 136 elif self.has_far_complications(span):
- span._.status = 2
Was already missing at line 148 if next(iter(self.complication_matcher(context)), None) is not None:
- return True
return False | 31 | 3 | 0 | 90.32% |
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.pyWas already missing at line 102 # Huge change of FP / Title section
- continue
| 14 | 1 | 0 | 92.86% |
edsnlp/pipes/ner/disorders/ckd/ckd.pyWas already missing at lines 119-122 dfg_value = float(dfg_span.text.replace(",", ".").strip())
- except ValueError:
- logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}")
- return False
| 29 | 3 | 0 | 89.66% |
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.pyWas already missing at lines 110-112 if span._.source == "ischemia":
- if "brain" not in span._.assigned.keys():
- continue
| 17 | 2 | 0 | 88.24% |
edsnlp/pipes/ner/adicap/models.pyWas already missing at line 15 def norm(self) -> str:
- return self.code
Was already missing at line 18 def __str__(self):
- return self.norm() | 14 | 2 | 0 | 85.71% |
edsnlp/pipes/misc/tables/tables.pyWas already missing at line 129 else:
- self.tables_pattern = tables_pattern
Was already missing at line 134 else:
- self.sep = sep_pattern
| 28 | 2 | 0 | 92.86% |
edsnlp/pipes/misc/sections/sections.pyWas already missing at line 126 if sections is None:
- sections = patterns.sections
sections = dict(sections) | 45 | 1 | 0 | 97.78% |
edsnlp/pipes/misc/measurements/measurements.pyWas already missing at lines 146-148 def __getitem__(self, item: int):
- assert isinstance(item, int)
- return [self][item]
Was already missing at lines 159-162 def __eq__(self, other: Any):
- if isinstance(other, SimpleMeasurement):
- return self.convert_to(other.unit) == other.value
- return False
Was already missing at line 165 if other.unit == self.unit:
- return self.__class__(self.value + other.value, self.unit, self.registry)
return self.__class__( Was already missing at line 194 def verify(cls, ent):
- return True
Was already missing at line 233 def __lt__(self, other: Union[SimpleMeasurement, "RangeMeasurement"]):
- return max(self.convert_to(other.unit)) < min((part.value for part in other))
Was already missing at line 244 return self.convert_to(other.unit) == other.value
- return False
Was already missing at line 258 def verify(cls, ent):
- return True
Was already missing at line 498 if isinstance(measurements, str):
- measurements = [measurements]
if isinstance(measurements, (list, tuple)): Was already missing at line 711 if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "":
- pseudo.append("w")
pseudo = "".join(pseudo) Was already missing at lines 875-877 unit_norm = self.unit_followers[unit_before.label_]
- except (KeyError, AttributeError, IndexError):
- pass
Was already missing at line 920 ):
- ent = doc[unit_text.start: number.end]
else: Was already missing at lines 927-929 dims = self.unit_registry.parse_unit(unit_norm)[0]
- except KeyError:
- continue
Was already missing at lines 1034-1036 last._.set(last.label_, new_value)
- except (AttributeError, TypeError):
- merged.append(ent)
else: | 388 | 19 | 0 | 95.10% |
edsnlp/pipes/misc/dates/models.pyWas already missing at line 152 else:
- d["month"] = note_datetime.month
if self.day is None: Was already missing at lines 156-162 else:
- if self.year is None:
...
- d["day"] = default_day
Was already missing at lines 170-172 return dt
- except ValueError:
- return None
Was already missing at line 188 else:
- return None
Was already missing at line 204 if self.second:
- norm += f"{self.second:02}s"
| 196 | 11 | 0 | 94.39% |
edsnlp/pipes/misc/dates/dates.pyWas already missing at line 243 if isinstance(absolute, str):
- absolute = [absolute]
if isinstance(relative, str): Was already missing at line 245 if isinstance(relative, str):
- relative = [relative]
if isinstance(duration, str): Was already missing at line 247 if isinstance(duration, str):
- relative = [duration]
if isinstance(false_positive, str): Was already missing at lines 348-357 if self.merge_mode == "align":
- alignments = align_spans(matches, spans, sort_by_overlap=True)
...
- matches.append(span)
Was already missing at line 439 elif d1 in seen or v1.bound is None or v2.bound is None:
- continue
Was already missing at lines 450-452 if v1.mode == Mode.DURATION:
- m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL
- m2 = v2.mode or Bound.FROM
elif v2.mode == Mode.DURATION: | 148 | 15 | 0 | 89.86% |
edsnlp/pipes/misc/consultation_dates/consultation_dates.pyWas already missing at line 131 else:
- self.date_matcher = None
Was already missing at line 134 if not consultation_mention:
- consultation_mention = []
elif consultation_mention is True: | 48 | 2 | 0 | 95.83% |
edsnlp/pipes/core/normalizer/__init__.pyWas already missing at line 7 def excluded_or_space_getter(t):
- return t.is_space or t.tag_ == "EXCLUDED"
| 5 | 1 | 0 | 80.00% |
edsnlp/pipes/core/endlines/endlines.pyWas already missing at lines 151-155 if end_lines_model is None:
- path = build_path(__file__, "base_model.pkl")
-
- with open(path, "rb") as inp:
- self.model = pickle.load(inp)
elif isinstance(end_lines_model, str): Was already missing at lines 158-160 self.model = pickle.load(inp)
- elif isinstance(end_lines_model, EndLinesModel):
- self.model = end_lines_model
else: Was already missing at line 191 ):
- return "ENUMERATION"
Was already missing at line 278 if np.isnan(sigma):
- sigma = 1
| 87 | 7 | 0 | 91.95% |
edsnlp/patch_spacy.pyWas already missing at lines 67-69 # if module is reloaded.
- existing_func = registry.factories.get(internal_name)
- if not util.is_same_func(factory_func, existing_func):
raise ValueError( | 31 | 2 | 0 | 93.55% |
edsnlp/optimization.pyWas already missing at line 32 def param_groups(self, value):
- self.optim.param_groups = value
Was already missing at line 36 def state(self):
- return self.optim.state
Was already missing at line 40 def state(self, value):
- self.optim.state = value
Was already missing at line 89 def __init__(self, groups):
- self.param_groups = groups
| 77 | 4 | 0 | 94.81% |
edsnlp/matchers/simstring.pyWas already missing at line 280 if custom:
- attr = attr[1:].lower()
Was already missing at line 295 if custom:
- token_text = getattr(token._, attr)
else: | 146 | 2 | 0 | 98.63% |
edsnlp/language.pyWas already missing at line 103 if last != begin:
- logger.warning(
"Missed some characters during" | 51 | 1 | 0 | 98.04% |
edsnlp/data/standoff.pyWas already missing at line 43 def __init__(self, ann_file, line):
- super().__init__(f"File {ann_file}, unrecognized Brat line {line}")
Was already missing at line 83 if not len(ann_paths):
- return {
"text": text, Was already missing at line 197 )
- except Exception:
raise Exception( | 172 | 3 | 0 | 98.26% |
edsnlp/data/polars.pyWas already missing at line 26 if hasattr(data, "collect"):
- data = data.collect()
assert isinstance(data, pl.DataFrame) | 44 | 1 | 0 | 97.73% |
edsnlp/data/json.pyWas already missing at line 94 if not is_jsonl:
- obj[FILENAME] = filename
results.append(obj) Was already missing at line 96 results.append(obj)
- except Exception:
raise Exception(f"Cannot parse {filename}") | 107 | 2 | 0 | 98.13% |
edsnlp/data/converters.pyWas already missing at line 659 if isinstance(converter, type) or kwargs_to_init:
- return converter(**kwargs), {}
return converter, validate_kwargs(converter, kwargs) | 192 | 1 | 0 | 99.48% |
edsnlp/data/base.pyWas already missing at lines 174-180 """
- data = LazyCollection.ensure_lazy(data)
- if converter:
- converter, kwargs = get_doc2dict_converter(converter, kwargs)
- data = data.map(converter, kwargs=kwargs)
-
- return data | 39 | 5 | 0 | 87.18% |
edsnlp/core/torch_component.pyWas already missing at line 390 if hasattr(self, "compiled"):
- res = self.compiled(batch)
else: Was already missing at line 436 """
- return self.preprocess(doc)
| 179 | 2 | 0 | 98.88% |
165 | 1 | 0 | 99.39% | |
edsnlp/core/pipeline.pyWas already missing at line 550 if name in exclude:
- continue
if name not in components: | 404 | 1 | 0 | 99.75% |
edsnlp/core/lazy_collection.pyWas already missing at line 51 def __call__(self, *args, **kwargs):
- return self.forward(*args, **kwargs)
Was already missing at line 436 for name, pipe, *_ in self.torch_components():
- pipe.to(device)
return self | 149 | 2 | 0 | 98.66% |
edsnlp/connectors/omop.pyWas already missing at line 69 if not isinstance(row.ents, list):
- continue
Was already missing at line 87 else:
- doc.spans[span.label_].append(span)
Was already missing at line 127 if df.note_id.isna().any():
- df["note_id"] = range(len(df))
Was already missing at line 171 if i > 0:
- df.term_modifiers += ";"
df.term_modifiers += ext + "=" + df[ext].astype(str) | 84 | 4 | 0 | 95.24% |
edsnlp/processing/deprecated_pipe.pyWas already missing at lines 207-209 def converter(doc):
- res = results_extractor(doc)
- return (
[{"note_id": doc._.note_id, **row} for row in res] | 57 | 2 | -2 | 96.49% |
258 files skipped due to complete coverage.
Coverage failure: total of 97.39% is less than 97.58% ❌
Issues
6 New issues
0 Accepted issues
Measures
0 Security Hotspots
0.0% Coverage on New Code
0.0% Duplication on New Code
Added
eds.negation
,eds.hypothesis
,eds.family
,eds.history
andeds.reported_speech
under aeds.negation.default_patterns
attributecontext_getter
SpanGetter argument to theeds.matcher
class to only retrieve entities inside the spans returned by the getterfilter_expr
parameter to scorers to filter the documents to scorerequired
field toeds.contextual_matcher
assign patterns to only match if the required field has been found, and aninclude
parameter (similar toexclude
) to search for required patterns without assigning them to the entityChecklist