feat: exercise_aliases table + lookup_exercise() alias-aware wrapper

New SQLite table `exercise_aliases (alias, canonical, source)` seeded
with ~40 common gym shorthand entries (OHP, RDL, "Bench", "Squat",
plural/singular drifts, slang). Lookups go through this table first,
then fall through to the strict exercise_db matcher — so the strict
matcher's "false negative for ambiguous single tokens" property is
preserved while still resolving every-day vocabulary.

Schema decision: every seed row is tagged `source='seed'` and re-seeded
on every init_db (deleted-then-reinserted), so editing the seed dict
in code is the one source of truth. User-inserted rows are tagged
`source='user'` and never touched by re-seeding. Migration path covers
existing DBs where the `source` column didn't exist (those rows tagged
'seed' on first migration, then refreshed from the current seed).

New helper db.lookup_exercise(name) wraps the alias resolution + the
exercise_db.lookup() call.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Danny 2026-06-01 10:49:24 +03:00
parent ebd0016a62
commit 214596e26f
4 changed files with 233 additions and 26 deletions

128
db.py
View file

@ -86,8 +86,29 @@ def init_db():
data TEXT NOT NULL DEFAULT '{}',
updated_at TEXT NOT NULL DEFAULT (datetime('now'))
);
-- Maps user-typed exercise slang (lowercased) canonical name
-- in the Free-Exercise-DB. Lookups in exercise_db go through
-- this table first so "OHP", "RDL", "Bench" etc. resolve to
-- the right entry instead of returning None.
-- `source` distinguishes 'seed' (managed by db.py, refreshed
-- on every init) from 'user' (preserved across re-seeding).
CREATE TABLE IF NOT EXISTS exercise_aliases (
alias TEXT PRIMARY KEY,
canonical TEXT NOT NULL,
source TEXT NOT NULL DEFAULT 'user'
);
""")
# Migration: existing exercise_aliases rows (from before the `source`
# column existed) were all seed-managed. Tag them so re-seeding
# refreshes them instead of treating them as protected user edits.
alias_cols = {r[1] for r in conn.execute("PRAGMA table_info(exercise_aliases)").fetchall()}
if "source" not in alias_cols:
conn.execute("ALTER TABLE exercise_aliases ADD COLUMN source TEXT NOT NULL DEFAULT 'seed'")
_seed_exercise_aliases(conn)
# Migrations
cols = {r[1] for r in conn.execute("PRAGMA table_info(workouts)").fetchall()}
if "raw_text" not in cols:
@ -100,6 +121,113 @@ def init_db():
conn.execute("ALTER TABLE exercises ADD COLUMN sets_detail TEXT")
# Common gym shorthand → canonical name in the Free-Exercise-DB.
# Keys are lowercase. Add/edit at runtime with INSERT OR REPLACE INTO
# exercise_aliases; new entries here are inserted on the next init_db
# but never clobber existing rows (INSERT OR IGNORE).
_EXERCISE_ALIAS_SEED: dict[str, str] = {
# Acronyms
"ohp": "Standing Military Press",
"rdl": "Romanian Deadlift",
"bb row": "Bent Over Barbell Row",
"db press": "Dumbbell Bench Press",
"db bench": "Dumbbell Bench Press",
# Single-word generics — pick the most "default" canonical variant.
"bench": "Barbell Bench Press - Medium Grip",
"squat": "Barbell Squat",
"deadlift": "Barbell Deadlift",
"press": "Standing Military Press",
"row": "Bent Over Barbell Row",
"curl": "Barbell Curl",
"shrug": "Barbell Shrug",
"pulldown": "Wide-Grip Lat Pulldown",
"dip": "Parallel Bar Dip",
# Plural / singular drift
"bench presses": "Barbell Bench Press - Medium Grip",
"chinups": "Chin-Up",
"chin ups": "Chin-Up",
"pullup": "Pullups",
"pushup": "Pushups",
"push-up": "Pushups",
"push up": "Pushups",
"sit-ups": "Sit-Up",
"situp": "Sit-Up",
"tricep pushdown": "Triceps Pushdown",
"tricep pushdowns": "Triceps Pushdown",
"leg curls": "Lying Leg Curls",
"leg extensions": "Leg Extensions",
"lateral raises": "Side Lateral Raise",
"lat raise": "Side Lateral Raise",
"lat raises": "Side Lateral Raise",
"front raises": "Front Dumbbell Raise",
"face pulls": "Face Pull",
"box jumps": "Front Box Jump",
# Slang / brand variants
"pendlay row": "Bent Over Barbell Row",
"conventional deadlift": "Barbell Deadlift",
"bulgarian split squat": "Bodyweight Squat", # closest single-leg in DB; tweak later
"good morning": "Good Morning",
"good mornings": "Good Morning",
"hip thrust": "Barbell Hip Thrust",
"hip thrusts": "Barbell Hip Thrust",
"calf raises": "Standing Calf Raises",
"calf raise": "Standing Calf Raises",
"skullcrushers": "EZ-Bar Skullcrusher",
"skull crushers": "EZ-Bar Skullcrusher",
"skull crusher": "EZ-Bar Skullcrusher",
"skullcrusher": "EZ-Bar Skullcrusher",
}
def _seed_exercise_aliases(conn) -> None:
"""Refresh the seed-managed alias rows. Wipes existing seed rows and
rewrites them from the current `_EXERCISE_ALIAS_SEED` dict. Rows with
`source = 'user'` are never touched.
"""
conn.execute("DELETE FROM exercise_aliases WHERE source = 'seed'")
# Skip seed entries whose alias is already claimed by a user row — the
# user's choice wins, no surprise overwrite.
user_aliases = {r[0] for r in conn.execute(
"SELECT alias FROM exercise_aliases WHERE source = 'user'"
).fetchall()}
rows = [
(a, c, "seed")
for a, c in _EXERCISE_ALIAS_SEED.items()
if a not in user_aliases
]
conn.executemany(
"INSERT INTO exercise_aliases (alias, canonical, source) VALUES (?, ?, ?)",
rows,
)
def resolve_exercise_alias(name: str) -> str | None:
"""Return the canonical DB name for a slang/alias input, or None."""
if not name:
return None
with get_db() as conn:
row = conn.execute(
"SELECT canonical FROM exercise_aliases WHERE alias = ?",
(name.strip().lower(),),
).fetchone()
return row["canonical"] if row else None
def lookup_exercise(name: str) -> dict | None:
"""Alias-aware Free-Exercise-DB lookup. Resolve user slang to a canonical
name first; then run the exercise_db matcher. Returns the slim entry
(name + primary/secondary muscles + equipment) or None.
"""
import exercise_db
if not name:
return None
canonical = resolve_exercise_alias(name) or name
return exercise_db.lookup(canonical)
def _save_exercises(conn, workout_id: int, superset_groups: list[list[dict]]):
"""Insert superset groups and exercises for a workout."""
for group_pos, group in enumerate(superset_groups):

View file

@ -82,7 +82,19 @@ def _slim(entry: dict) -> dict:
def lookup(name: str) -> Optional[dict]:
"""Return the slim entry for the best name match, or None."""
"""Return the slim entry for the best name match, or None.
Tiers (priority order):
1. exact (case-insensitive)
2. compressed exact collapses hyphens/spaces ("Pull-ups" "Pullups")
3. word-boundary substring (only for multi-token inputs)
4. token overlap requiring 100% coverage of the user's tokens
(only for multi-token inputs)
Single-token inputs (e.g. "Bench", "Squat", "RDL", "OHP") that don't hit
tier 1 or 2 return None there's no robust way to disambiguate without
an alias table.
"""
if not name:
return None
needle = name.strip()
@ -93,48 +105,50 @@ def lookup(name: str) -> Optional[dict]:
if not compressed:
return None
# 1. Exact (case-insensitive)
# 1. Exact (case-insensitive).
hit = _BY_LOWER_NAME.get(lower)
if hit:
return _slim(hit)
# 2. Compressed exact — catches "Pull-ups" → "Pullups", etc.
# 2. Compressed exact — "Pull-ups" → "Pullups".
hit = _BY_COMPRESSED.get(compressed)
if hit:
return _slim(hit)
# 3. Compressed substring (either direction).
substring_candidates: list[dict] = [
e for e, c in _COMPRESSED if compressed in c or c in compressed
]
if substring_candidates:
# Single-token generics ("Bench", "Squat", "Deadlift") match too many
# specific DB entries. Refuse rather than confidently mislead the
# user — the planned alias table will handle these properly.
needle_toks = _tokens(needle)
if len(needle_toks) == 1 and len(substring_candidates) > 2:
# Below here, partial matches only — and only for multi-token inputs.
# A single token is too easily ambiguous (and short acronyms accidentally
# hit character-level substrings of unrelated names).
if len(needle_toks) < 2:
return None
# 3. Word-boundary substring (lowercase). "bench press" in "bench press
# with chains" — yes. "rdl" in "hurdle" — no (no word break).
substring_candidates: list[dict] = []
for entry in ALL:
n = entry.get("name", "")
if not n:
continue
nl = n.lower()
if lower in nl or nl in lower:
substring_candidates.append(entry)
if substring_candidates:
# Prefer the shortest DB name (most specific to the typed input).
substring_candidates.sort(key=lambda e: len(e["name"]))
return _slim(substring_candidates[0])
# 4. Token overlap (Jaccard-ish). Require ≥1 shared token AND that the
# shared portion covers ≥50% of the user's tokens, so "row" doesn't
# match "single arm cable row machine" via one stop-token.
needle_toks = _tokens(needle)
if not needle_toks:
return None
# 4. Token overlap, 100% coverage of the user's tokens. So "BB Row" with
# tokens {bb, row} only matches a DB entry that contains both — it never
# silently latches onto a "row" entry that doesn't share the "bb" cue.
best: tuple[float, dict] | None = None
for entry, db_toks in _TOKENS:
if not db_toks:
continue
overlap = needle_toks & db_toks
if not overlap:
if not needle_toks <= db_toks:
continue
coverage = len(overlap) / len(needle_toks)
if coverage < 0.5:
continue
# Score = coverage, tiebreak by DB-name length (shorter wins).
score = coverage - 0.001 * len(entry["name"])
# All user tokens matched; tiebreak by DB-name length (shorter wins,
# i.e. the most specific variant).
score = -len(entry["name"])
if best is None or score > best[0]:
best = (score, entry)

View file

@ -253,6 +253,60 @@ class TestAllExerciseNames:
assert db.get_all_exercise_names() == ["Apple", "Mango", "Zebra"]
# ── exercise aliases ─────────────────────────────────────────────
class TestExerciseAliases:
def test_seed_loaded(self, tmp_db):
assert db.resolve_exercise_alias("OHP") == "Standing Military Press"
assert db.resolve_exercise_alias("rdl") == "Romanian Deadlift"
assert db.resolve_exercise_alias("Bench") == "Barbell Bench Press - Medium Grip"
def test_case_insensitive(self, tmp_db):
assert db.resolve_exercise_alias("ohp") == "Standing Military Press"
assert db.resolve_exercise_alias("OHP") == "Standing Military Press"
assert db.resolve_exercise_alias(" Squat ") == "Barbell Squat"
def test_unknown_returns_none(self, tmp_db):
assert db.resolve_exercise_alias("not-a-thing") is None
assert db.resolve_exercise_alias("") is None
assert db.resolve_exercise_alias(None) is None
def test_lookup_exercise_uses_aliases(self, tmp_db):
info = db.lookup_exercise("OHP")
assert info is not None
assert info["name"] == "Standing Military Press"
assert "shoulders" in info["primary_muscles"]
def test_lookup_exercise_falls_through_when_no_alias(self, tmp_db):
# No alias for "Plank" → goes straight to exercise_db, which has it.
info = db.lookup_exercise("Plank")
assert info is not None
assert info["name"] == "Plank"
def test_user_overrides_survive_init(self, tmp_db):
# A row inserted with source='user' is preserved across init_db calls;
# seed rows get refreshed but user rows don't.
with db.get_db() as conn:
conn.execute(
"INSERT OR REPLACE INTO exercise_aliases (alias, canonical, source) VALUES (?, ?, 'user')",
("ohp", "Push Press"),
)
db.init_db() # re-runs the seed
assert db.resolve_exercise_alias("ohp") == "Push Press"
def test_seed_rows_refreshed_on_reinit(self, tmp_db):
# Manually corrupt a seed row → next init_db should rewrite it from
# the seed dict (without needing INSERT OR REPLACE acrobatics).
with db.get_db() as conn:
conn.execute(
"UPDATE exercise_aliases SET canonical = 'WRONG' WHERE alias = 'ohp'"
)
assert db.resolve_exercise_alias("ohp") == "WRONG"
db.init_db()
assert db.resolve_exercise_alias("ohp") == "Standing Military Press"
# ── get_last_exercise ────────────────────────────────────────────

View file

@ -44,6 +44,17 @@ class TestLookup:
assert exercise_db.lookup("") is None
assert exercise_db.lookup(" ") is None
def test_short_acronyms_dont_substring_match_characters(self):
# Regression: "RDL" used to match "Hurdle Hops" because "rdl"
# appears as a character substring inside "hu**rdl**ehops".
assert exercise_db.lookup("RDL") is None
assert exercise_db.lookup("OHP") is None
def test_multi_token_requires_full_coverage(self):
# Regression: "BB Row" used to match "Sled Row" because both share
# the "row" token. Strict 100% coverage prevents this.
assert exercise_db.lookup("BB Row") is None
def test_returned_shape(self):
m = exercise_db.lookup("Pullups")
# The slim view drops `instructions` and `images`.