Skip to content

Commit

Permalink
fix(glossary): skip variants inclusion for machinery and checks
Browse files Browse the repository at this point in the history
Only matching source strings should be included, the variants are useful
for display only.

Fixes #11883
  • Loading branch information
nijel committed Jun 18, 2024
1 parent 4a0416f commit 67b2509
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 20 deletions.
2 changes: 1 addition & 1 deletion weblate/checks/glossary.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def check_single(self, source, target, unit):
mismatched = set()
matched = set()
boundary = r"\b" if unit.translation.language.uses_whitespace() else ""
for term in get_glossary_terms(unit):
for term in get_glossary_terms(unit, include_variants=False):
term_source = term.source
flags = term.all_flags
expected = term_source if "read-only" in flags else term.target
Expand Down
2 changes: 1 addition & 1 deletion weblate/checks/same.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def should_ignore(self, source, unit) -> bool:
# Extract untranslatable terms
terms = [
re.escape(term.source)
for term in get_glossary_terms(unit)
for term in get_glossary_terms(unit, include_variants=False)
if "read-only" in term.all_flags
]
if terms:
Expand Down
36 changes: 20 additions & 16 deletions weblate/glossary/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ def get_glossary_units(project, source_language, target_language):
)


def get_glossary_terms(unit: Unit, *, full: bool = False) -> list[Unit]:
def get_glossary_terms(
unit: Unit, *, full: bool = False, include_variants: bool = True
) -> list[Unit]:
"""Return list of term pairs for an unit."""
from weblate.trans.models.component import Component

Expand Down Expand Up @@ -146,21 +148,23 @@ def get_glossary_terms(unit: Unit, *, full: bool = False) -> list[Unit]:
# Add variants manually. This could be done by adding filtering on
# variant__unit__source in the above query, but this slows down the query
# considerably and variants are rarely used.
existing = {match.pk for match in units}
variants = set()
extra = []
for match in units:
if not match.variant or match.variant.pk in variants:
continue
variants.add(match.variant.pk)
for child in match.variant.unit_set.filter(
translation__language=language
).select_related("source_unit"):
if child.pk not in existing:
existing.add(child.pk)
extra.append(child)

units.extend(extra)
if include_variants:
existing = {match.pk for match in units}
variants = set()
extra = []

for match in units:
if not match.variant or match.variant.pk in variants:
continue
variants.add(match.variant.pk)
for child in match.variant.unit_set.filter(
translation__language=language
).select_related("source_unit"):
if child.pk not in existing:
existing.add(child.pk)
extra.append(child)

units.extend(extra)

# Order results, this is Python reimplementation of:
units.sort(key=lambda x: x.glossary_sort_key)
Expand Down
2 changes: 1 addition & 1 deletion weblate/machinery/microsoft.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def format_replacement(
def get_highlights(self, text, unit):
result = list(super().get_highlights(text, unit))

for term in get_glossary_terms(unit):
for term in get_glossary_terms(unit, include_variants=False):
for start, end in term.glossary_positions:
glossary_highlight = (start, end, text[start:end], term)
handled = False
Expand Down
4 changes: 3 additions & 1 deletion weblate/machinery/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,9 @@ def get_prompt(
glossary = ""
if any(units):
glossary = render_glossary_units_tsv(
chain.from_iterable(get_glossary_terms(unit) for unit in units)
chain.from_iterable(
get_glossary_terms(unit, include_variants=False) for unit in units
)
)
if glossary:
glossary = GLOSSARY_PROMPT.format(glossary)
Expand Down

0 comments on commit 67b2509

Please sign in to comment.