Closed percevalw closed 3 months ago
Issues
0 New issues
0 Accepted issues
Measures
0 Security Hotspots
0.0% Coverage on New Code
0.0% Duplication on New Code
Name | Stmts | Miss | ∆ Miss | Cover |
---|---|---|---|---|
TOTAL | 9313 | 209 | 0 | 97.76% |
Name | Stmts | Miss | ∆ Miss | Cover |
---|---|---|---|---|
edsnlp/utils/span_getters.pyWas already missing at lines 52-55 else:
- for span in candidates:
- if span.label_ in span_filter:
- yield span
Was already missing at lines 59-61 if span_getter is None:
- yield doc[:], None
- return
if callable(span_getter): Was already missing at lines 62-64 if callable(span_getter):
- yield from span_getter(doc)
- return
for key, span_filter in span_getter.items(): Was already missing at line 66 if key == "*":
- candidates = (
(span, group) for group in doc.spans.values() for span in group Was already missing at lines 75-78 else:
- for span, group in candidates:
- if span.label_ in span_filter:
- yield span, group
Was already missing at line 82 if callable(span_setter):
- span_setter(doc, matches)
else: Was already missing at line 124 elif isinstance(v, str):
- new_value[k] = [v]
elif isinstance(v, list) and all(isinstance(i, str) for i in v): Was already missing at line 162 elif isinstance(v, str):
- new_value[k] = [v]
elif isinstance(v, list) and all(isinstance(i, str) for i in v): | 153 | 14 | 0 | 90.85% |
edsnlp/utils/resources.pyWas already missing at line 33 if not verbs:
- return conjugated_verbs
| 24 | 1 | 0 | 95.83% |
edsnlp/utils/numbers.pyWas already missing at line 34 else:
- string = s
string = string.lower().strip() Was already missing at lines 38-41 return int(string)
- except ValueError:
- parsed = DIGITS_MAPPINGS.get(string, None)
- return parsed | 16 | 4 | 0 | 75.00% |
edsnlp/utils/lazy_module.pyWas already missing at line 46 ):
- continue
for import_node in node.body: | 31 | 1 | 0 | 96.77% |
edsnlp/utils/filter.pyWas already missing at line 206 if isinstance(label, int):
- return [span for span in spans if span.label == label]
else: | 74 | 1 | 0 | 98.65% |
edsnlp/utils/bindings.pyWas already missing at line 22 return "." + path
- return path
| 66 | 1 | 0 | 98.48% |
edsnlp/train.pyWas already missing at line 190 else:
- sample_len = lambda idx, noise=True: 1 # noqa: E731
Was already missing at lines 257-263 if total + num_tokens > self.grad_accumulation_max_tokens:
- print(
...
- mini_batches.append([])
total += num_tokens Was already missing at line 349 if 0 <= self.limit <= count:
- break
if not (len(doc) and (filter_fn is None or filter_fn(doc))): Was already missing at line 351 if not (len(doc) and (filter_fn is None or filter_fn(doc))):
- continue
count += 1 Was already missing at lines 385-387 for ent in doc.ents:
- for token in ent:
- token.is_sent_start = False
for sent in doc.sents if doc.has_annotation("SENT_START") else (doc[:],): | 257 | 8 | 0 | 96.89% |
edsnlp/processing/spark.pyWas already missing at line 51 getActiveSession = SparkSession.getActiveSession
- except AttributeError:
| 43 | 1 | 0 | 97.67% |
edsnlp/processing/multiprocessing.pyWas already missing at lines 227-231 if os.environ.get("TORCH_SHARING_STRATEGY"):
- try:
- torch.multiprocessing.set_sharing_strategy(os.environ["TORCH_SHARING_STRATEGY"])
- except NameError:
- pass
Was already missing at line 249 def save_align_devices_hook(pickler: Any, obj: Any):
- pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj)
Was already missing at lines 252-259 def load_align_devices_hook(state):
- state["execution_device"] = MAP_LOCATION
...
- AlignDevicesHook = None
Was already missing at line 452
- new_batch_iterator = None
Was already missing at lines 570-572 else:
- batch = gpu_pipe.prepare_batch(docs, device=device)
- inputs = None
active_batches[batch_id] = (docs, task_id, inputs) Was already missing at line 949 if isinstance(outputs, BaseException):
- raise outputs
Was already missing at line 1017 if v is not None:
- os.environ[k] = v
| 420 | 16 | 0 | 96.19% |
edsnlp/processing/deprecated_pipe.pyWas already missing at lines 207-209 def converter(doc):
- res = results_extractor(doc)
- return (
[{"note_id": doc._.note_id, **row} for row in res] | 57 | 2 | 0 | 96.49% |
edsnlp/pipes/trainable/span_linker/span_linker.pyWas already missing at lines 401-403 if self.reference_mode == "synonym":
- embeds = embeds.to(new_lin.weight)
- new_lin.weight.data = embeds
else: | 172 | 2 | 0 | 98.84% |
edsnlp/pipes/trainable/ner_crf/ner_crf.pyWas already missing at line 250 if self.labels is not None and not self.infer_span_setter:
- return
Was already missing at lines 258-260 if callable(self.target_span_getter):
- for span in get_spans(doc, self.target_span_getter):
- inferred_labels.add(span.label_)
else: | 157 | 3 | 0 | 98.09% |
edsnlp/pipes/trainable/layers/crf.pyWas already missing at line 21 # out: 2 * N * O
- return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2)
Was already missing at line 29 # out: 2 * N * O
- return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2)
Was already missing at line 97 if learnable_transitions:
- self.transitions = torch.nn.Parameter(
torch.zeros_like(forbidden_transitions, dtype=torch.float) Was already missing at line 107 if learnable_transitions and with_start_end_transitions:
- self.start_transitions = torch.nn.Parameter(
torch.zeros(num_tags, dtype=torch.float) Was already missing at line 116 if learnable_transitions and with_start_end_transitions:
- self.end_transitions = torch.nn.Parameter(
torch.zeros(num_tags, dtype=torch.float) | 137 | 5 | 0 | 96.35% |
edsnlp/pipes/trainable/embeddings/transformer/transformer.pyWas already missing at line 165 if quantization is not None:
- kwargs["quantization_config"] = quantization
| 157 | 1 | 0 | 99.36% |
edsnlp/pipes/qualifiers/reported_speech/reported_speech.pyWas already missing at lines 18-22 return "REPORTED"
- elif token._.rspeech is False:
- return "DIRECT"
- else:
- return None
| 73 | 3 | 0 | 95.89% |
edsnlp/pipes/qualifiers/negation/negation.pyWas already missing at line 22 else:
- return None
| 77 | 1 | 0 | 98.70% |
edsnlp/pipes/qualifiers/hypothesis/hypothesis.pyWas already missing at line 21 else:
- return None
| 74 | 1 | 0 | 98.65% |
edsnlp/pipes/qualifiers/history/history.pyWas already missing at lines 20-26 def history_getter(token: Union[Token, Span]) -> Optional[str]:
- if token._.history is True:
- return "ATCD"
- elif token._.history is False:
- return "CURRENT"
- else:
- return None
Was already missing at lines 310-316 )
- except ValueError:
...
- note_datetime = None
Was already missing at lines 325-331 )
- except ValueError:
...
- birth_datetime = None
Was already missing at lines 397-400 )
- except ValueError as e:
- absolute_date = None
- logger.warning(
"In doc {}, the following date {} raises this error: {}. " | 154 | 14 | 0 | 90.91% |
edsnlp/pipes/qualifiers/family/family.pyWas already missing at line 21 else:
- return None
| 59 | 1 | 0 | 98.31% |
edsnlp/pipes/ner/tnm/tnm.pyWas already missing at lines 156-158 value = TNM.parse_obj(groupdict)
- except ValidationError:
- value = TNM.parse_obj({})
| 44 | 2 | 0 | 95.45% |
edsnlp/pipes/ner/tnm/model.pyWas already missing at line 139 def __str__(self):
- return self.norm()
Was already missing at line 163 )
- exclude_unset = skip_defaults
| 104 | 2 | 0 | 98.08% |
edsnlp/pipes/ner/scores/sofa/sofa.pyWas already missing at line 32 if not assigned:
- continue
if assigned.get("method_max") is not None: Was already missing at line 40 else:
- method = "Non précisée"
| 25 | 2 | 0 | 92.00% |
edsnlp/pipes/ner/scores/elston_ellis/patterns.pyWas already missing at line 26 if x <= 5:
- return 1
Was already missing at lines 32-36 else:
- return 3
-
- except ValueError:
- return None | 21 | 4 | 0 | 80.95% |
edsnlp/pipes/ner/scores/charlson/patterns.pyWas already missing at lines 21-23 return int(extracted_score)
- except ValueError:
- return None | 13 | 2 | 0 | 84.62% |
edsnlp/pipes/ner/scores/base_score.pyWas already missing at line 154 if value is None:
- continue
normalized_value = self.score_normalization(value) | 47 | 1 | 0 | 97.87% |
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.pyWas already missing at lines 114-117 if use_tnm:
- from edsnlp.pipes.ner.tnm import TNM
-
- self.tnm = TNM(nlp, pattern=None, attr="TEXT")
Was already missing at lines 119-129 def process_tnm(self, doc):
- spans = self.tnm.process(doc)
...
- yield span
Was already missing at line 149 if self.use_tnm:
- yield from self.process_tnm(doc) | 35 | 12 | 0 | 65.71% |
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.pyWas already missing at line 106 if "peripheral" not in span._.assigned.keys():
- continue
| 15 | 1 | 0 | 93.33% |
edsnlp/pipes/ner/disorders/diabetes/diabetes.pyWas already missing at line 133 # Mostly FP
- continue
Was already missing at line 136 elif self.has_far_complications(span):
- span._.status = 2
Was already missing at line 148 if next(iter(self.complication_matcher(context)), None) is not None:
- return True
return False | 31 | 3 | 0 | 90.32% |
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.pyWas already missing at line 102 # Huge change of FP / Title section
- continue
| 14 | 1 | 0 | 92.86% |
edsnlp/pipes/ner/disorders/ckd/ckd.pyWas already missing at lines 119-122 dfg_value = float(dfg_span.text.replace(",", ".").strip())
- except ValueError:
- logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}")
- return False
| 29 | 3 | 0 | 89.66% |
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.pyWas already missing at lines 110-112 if span._.source == "ischemia":
- if "brain" not in span._.assigned.keys():
- continue
| 17 | 2 | 0 | 88.24% |
edsnlp/pipes/ner/adicap/models.pyWas already missing at line 15 def norm(self) -> str:
- return self.code
Was already missing at line 18 def __str__(self):
- return self.norm() | 14 | 2 | 0 | 85.71% |
edsnlp/pipes/misc/sections/sections.pyWas already missing at line 126 if sections is None:
- sections = patterns.sections
sections = dict(sections) | 45 | 1 | 0 | 97.78% |
edsnlp/pipes/misc/measurements/measurements.pyWas already missing at lines 147-149 def __getitem__(self, item: int):
- assert isinstance(item, int)
- return [self][item]
Was already missing at lines 160-163 def __eq__(self, other: Any):
- if isinstance(other, SimpleMeasurement):
- return self.convert_to(other.unit) == other.value
- return False
Was already missing at line 166 if other.unit == self.unit:
- return self.__class__(self.value + other.value, self.unit, self.registry)
return self.__class__( Was already missing at line 193 return self.convert_to(other_unit)
- except KeyError:
raise AttributeError(f"Unit {other_unit} not found") Was already missing at line 198 def verify(cls, ent):
- return True
Was already missing at line 237 def __lt__(self, other: Union[SimpleMeasurement, "RangeMeasurement"]):
- return max(self.convert_to(other.unit)) < min((part.value for part in other))
Was already missing at line 248 return self.convert_to(other.unit) == other.value
- return False
Was already missing at line 262 def verify(cls, ent):
- return True
Was already missing at line 857 if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "":
- pseudo.append("w")
pseudo = "".join(pseudo) Was already missing at line 1031 if start_line is None:
- continue
Was already missing at lines 1062-1064 unit_norm = self.unit_followers[unit_before.label_]
- except (KeyError, AttributeError, IndexError):
- pass
Was already missing at line 1107 ):
- ent = doc[unit_text.start: number.end]
else: Was already missing at lines 1114-1116 dims = self.unit_registry.parse_unit(unit_norm)[0]
- except KeyError:
- continue
Was already missing at lines 1227-1229 last._.set(last.label_, new_value)
- except (AttributeError, TypeError):
- merged.append(ent)
else: | 429 | 20 | 0 | 95.34% |
edsnlp/pipes/misc/dates/models.pyWas already missing at line 152 else:
- d["month"] = note_datetime.month
if self.day is None: Was already missing at lines 156-162 else:
- if self.year is None:
...
- d["day"] = default_day
Was already missing at lines 170-172 return dt
- except ValueError:
- return None
Was already missing at line 188 else:
- return None
Was already missing at line 204 if self.second:
- norm += f"{self.second:02}s"
| 196 | 11 | 0 | 94.39% |
edsnlp/pipes/misc/dates/dates.pyWas already missing at line 248 if isinstance(absolute, str):
- absolute = [absolute]
if isinstance(relative, str): Was already missing at line 250 if isinstance(relative, str):
- relative = [relative]
if isinstance(duration, str): Was already missing at line 252 if isinstance(duration, str):
- relative = [duration]
if isinstance(false_positive, str): Was already missing at lines 356-365 if self.merge_mode == "align":
- alignments = align_spans(matches, spans, sort_by_overlap=True)
...
- matches.append(span)
Was already missing at line 450 elif d1 in seen or v1.bound is None or v2.bound is None:
- continue
Was already missing at lines 461-463 if v1.mode == Mode.DURATION:
- m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL
- m2 = v2.mode or Bound.FROM
elif v2.mode == Mode.DURATION: | 152 | 15 | 0 | 90.13% |
edsnlp/pipes/misc/consultation_dates/consultation_dates.pyWas already missing at line 131 else:
- self.date_matcher = None
Was already missing at line 134 if not consultation_mention:
- consultation_mention = []
elif consultation_mention is True: | 48 | 2 | 0 | 95.83% |
edsnlp/pipes/core/normalizer/__init__.pyWas already missing at line 7 def excluded_or_space_getter(t):
- return t.is_space or t.tag_ == "EXCLUDED"
| 5 | 1 | 0 | 80.00% |
edsnlp/pipes/core/endlines/endlines.pyWas already missing at lines 151-155 if end_lines_model is None:
- path = build_path(__file__, "base_model.pkl")
-
- with open(path, "rb") as inp:
- self.model = pickle.load(inp)
elif isinstance(end_lines_model, str): Was already missing at lines 158-160 self.model = pickle.load(inp)
- elif isinstance(end_lines_model, EndLinesModel):
- self.model = end_lines_model
else: Was already missing at line 191 ):
- return "ENUMERATION"
Was already missing at line 278 if np.isnan(sigma):
- sigma = 1
| 87 | 7 | 0 | 91.95% |
edsnlp/pipes/core/contextual_matcher/models.pyWas already missing at lines 19-23 if isinstance(v, list):
- assert (
- len(v) == 2
- ), "`window` should be a tuple/list of two integer, or a single integer"
- v = tuple(v)
if isinstance(v, int): | 115 | 2 | 0 | 98.26% |
edsnlp/pipes/core/contextual_matcher/contextual_matcher.pyWas already missing at line 94 )
- label = label_name
if label is None: Was already missing at line 343 if assigned is None:
- continue
if replace_entity: | 143 | 2 | 0 | 98.60% |
edsnlp/patch_spacy.pyWas already missing at lines 67-69 # if module is reloaded.
- existing_func = registry.factories.get(internal_name)
- if not util.is_same_func(factory_func, existing_func):
raise ValueError( | 31 | 2 | 0 | 93.55% |
edsnlp/optimization.pyWas already missing at line 32 def param_groups(self, value):
- self.optim.param_groups = value
Was already missing at line 36 def state(self):
- return self.optim.state
Was already missing at line 40 def state(self, value):
- self.optim.state = value
Was already missing at line 89 def __init__(self, groups):
- self.param_groups = groups
| 77 | 4 | 0 | 94.81% |
edsnlp/matchers/simstring.pyWas already missing at line 280 if custom:
- attr = attr[1:].lower()
Was already missing at line 295 if custom:
- token_text = getattr(token._, attr)
else: | 146 | 2 | 0 | 98.63% |
edsnlp/language.pyWas already missing at line 103 if last != begin:
- logger.warning(
"Missed some characters during" | 51 | 1 | 0 | 98.04% |
edsnlp/data/standoff.pyWas already missing at line 43 def __init__(self, ann_file, line):
- super().__init__(f"File {ann_file}, unrecognized Brat line {line}")
Was already missing at line 83 if not len(ann_paths):
- return {
"text": text, Was already missing at line 197 )
- except Exception:
raise Exception( | 172 | 3 | 0 | 98.26% |
edsnlp/data/polars.pyWas already missing at line 26 if hasattr(data, "collect"):
- data = data.collect()
assert isinstance(data, pl.DataFrame) | 44 | 1 | 0 | 97.73% |
edsnlp/data/json.pyWas already missing at line 94 if not is_jsonl:
- obj[FILENAME] = filename
results.append(obj) Was already missing at line 96 results.append(obj)
- except Exception:
raise Exception(f"Cannot parse {filename}") | 107 | 2 | 0 | 98.13% |
edsnlp/data/converters.pyWas already missing at line 659 if isinstance(converter, type) or kwargs_to_init:
- return converter(**kwargs), {}
return converter, validate_kwargs(converter, kwargs) | 192 | 1 | 0 | 99.48% |
edsnlp/data/base.pyWas already missing at lines 174-180 """
- data = LazyCollection.ensure_lazy(data)
- if converter:
- converter, kwargs = get_doc2dict_converter(converter, kwargs)
- data = data.map(converter, kwargs=kwargs)
-
- return data | 39 | 5 | 0 | 87.18% |
edsnlp/core/torch_component.pyWas already missing at line 390 if hasattr(self, "compiled"):
- res = self.compiled(batch)
else: Was already missing at line 436 """
- return self.preprocess(doc)
| 179 | 2 | 0 | 98.88% |
edsnlp/core/registries.pyWas already missing at line 78 if obj.error is not None:
- raise obj.error
| 164 | 1 | 0 | 99.39% |
edsnlp/core/pipeline.pyWas already missing at line 552 if name in exclude:
- continue
if name not in components: | 410 | 1 | 0 | 99.76% |
edsnlp/core/lazy_collection.pyWas already missing at line 51 def __call__(self, *args, **kwargs):
- return self.forward(*args, **kwargs)
Was already missing at line 448 for name, pipe, *_ in self.torch_components():
- pipe.to(device)
return self | 151 | 2 | 0 | 98.68% |
edsnlp/connectors/omop.pyWas already missing at line 69 if not isinstance(row.ents, list):
- continue
Was already missing at line 87 else:
- doc.spans[span.label_].append(span)
Was already missing at line 127 if df.note_id.isna().any():
- df["note_id"] = range(len(df))
Was already missing at line 171 if i > 0:
- df.term_modifiers += ";"
df.term_modifiers += ext + "=" + df[ext].astype(str) | 84 | 4 | 0 | 95.24% |
264 files skipped due to complete coverage.
Coverage success: total of 97.76% is above 97.75% 🎉
Description
Fixes #315 Error was caused by a pollution spans being between two numbers. We obviously don't want to match numbers that are not contiguous spans of text, even under the assumption that the text extraction was faulty so I disabled the ignore_pollution of the number regex matcher.
Checklist