From 214596e26f8ac43cdd656c0345535b230fb9f7b9 Mon Sep 17 00:00:00 2001 From: Danny Date: Mon, 1 Jun 2026 10:49:24 +0300 Subject: [PATCH] feat: exercise_aliases table + lookup_exercise() alias-aware wrapper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New SQLite table `exercise_aliases (alias, canonical, source)` seeded with ~40 common gym shorthand entries (OHP, RDL, "Bench", "Squat", plural/singular drifts, slang). Lookups go through this table first, then fall through to the strict exercise_db matcher — so the strict matcher's "false negative for ambiguous single tokens" property is preserved while still resolving every-day vocabulary. Schema decision: every seed row is tagged `source='seed'` and re-seeded on every init_db (deleted-then-reinserted), so editing the seed dict in code is the one source of truth. User-inserted rows are tagged `source='user'` and never touched by re-seeding. Migration path covers existing DBs where the `source` column didn't exist (those rows tagged 'seed' on first migration, then refreshed from the current seed). New helper db.lookup_exercise(name) wraps the alias resolution + the exercise_db.lookup() call. Co-Authored-By: Claude Opus 4.7 (1M context) --- db.py | 128 ++++++++++++++++++++++++++++++++++++++ exercise_db.py | 66 ++++++++++++-------- tests/test_db.py | 54 ++++++++++++++++ tests/test_exercise_db.py | 11 ++++ 4 files changed, 233 insertions(+), 26 deletions(-) diff --git a/db.py b/db.py index 410097b..62db795 100644 --- a/db.py +++ b/db.py @@ -86,8 +86,29 @@ def init_db(): data TEXT NOT NULL DEFAULT '{}', updated_at TEXT NOT NULL DEFAULT (datetime('now')) ); + + -- Maps user-typed exercise slang (lowercased) → canonical name + -- in the Free-Exercise-DB. Lookups in exercise_db go through + -- this table first so "OHP", "RDL", "Bench" etc. resolve to + -- the right entry instead of returning None. + -- `source` distinguishes 'seed' (managed by db.py, refreshed + -- on every init) from 'user' (preserved across re-seeding). + CREATE TABLE IF NOT EXISTS exercise_aliases ( + alias TEXT PRIMARY KEY, + canonical TEXT NOT NULL, + source TEXT NOT NULL DEFAULT 'user' + ); """) + # Migration: existing exercise_aliases rows (from before the `source` + # column existed) were all seed-managed. Tag them so re-seeding + # refreshes them instead of treating them as protected user edits. + alias_cols = {r[1] for r in conn.execute("PRAGMA table_info(exercise_aliases)").fetchall()} + if "source" not in alias_cols: + conn.execute("ALTER TABLE exercise_aliases ADD COLUMN source TEXT NOT NULL DEFAULT 'seed'") + + _seed_exercise_aliases(conn) + # Migrations cols = {r[1] for r in conn.execute("PRAGMA table_info(workouts)").fetchall()} if "raw_text" not in cols: @@ -100,6 +121,113 @@ def init_db(): conn.execute("ALTER TABLE exercises ADD COLUMN sets_detail TEXT") +# Common gym shorthand → canonical name in the Free-Exercise-DB. +# Keys are lowercase. Add/edit at runtime with INSERT OR REPLACE INTO +# exercise_aliases; new entries here are inserted on the next init_db +# but never clobber existing rows (INSERT OR IGNORE). +_EXERCISE_ALIAS_SEED: dict[str, str] = { + # Acronyms + "ohp": "Standing Military Press", + "rdl": "Romanian Deadlift", + "bb row": "Bent Over Barbell Row", + "db press": "Dumbbell Bench Press", + "db bench": "Dumbbell Bench Press", + + # Single-word generics — pick the most "default" canonical variant. + "bench": "Barbell Bench Press - Medium Grip", + "squat": "Barbell Squat", + "deadlift": "Barbell Deadlift", + "press": "Standing Military Press", + "row": "Bent Over Barbell Row", + "curl": "Barbell Curl", + "shrug": "Barbell Shrug", + "pulldown": "Wide-Grip Lat Pulldown", + "dip": "Parallel Bar Dip", + + # Plural / singular drift + "bench presses": "Barbell Bench Press - Medium Grip", + "chinups": "Chin-Up", + "chin ups": "Chin-Up", + "pullup": "Pullups", + "pushup": "Pushups", + "push-up": "Pushups", + "push up": "Pushups", + "sit-ups": "Sit-Up", + "situp": "Sit-Up", + "tricep pushdown": "Triceps Pushdown", + "tricep pushdowns": "Triceps Pushdown", + "leg curls": "Lying Leg Curls", + "leg extensions": "Leg Extensions", + "lateral raises": "Side Lateral Raise", + "lat raise": "Side Lateral Raise", + "lat raises": "Side Lateral Raise", + "front raises": "Front Dumbbell Raise", + "face pulls": "Face Pull", + "box jumps": "Front Box Jump", + + # Slang / brand variants + "pendlay row": "Bent Over Barbell Row", + "conventional deadlift": "Barbell Deadlift", + "bulgarian split squat": "Bodyweight Squat", # closest single-leg in DB; tweak later + "good morning": "Good Morning", + "good mornings": "Good Morning", + "hip thrust": "Barbell Hip Thrust", + "hip thrusts": "Barbell Hip Thrust", + "calf raises": "Standing Calf Raises", + "calf raise": "Standing Calf Raises", + "skullcrushers": "EZ-Bar Skullcrusher", + "skull crushers": "EZ-Bar Skullcrusher", + "skull crusher": "EZ-Bar Skullcrusher", + "skullcrusher": "EZ-Bar Skullcrusher", +} + + +def _seed_exercise_aliases(conn) -> None: + """Refresh the seed-managed alias rows. Wipes existing seed rows and + rewrites them from the current `_EXERCISE_ALIAS_SEED` dict. Rows with + `source = 'user'` are never touched. + """ + conn.execute("DELETE FROM exercise_aliases WHERE source = 'seed'") + # Skip seed entries whose alias is already claimed by a user row — the + # user's choice wins, no surprise overwrite. + user_aliases = {r[0] for r in conn.execute( + "SELECT alias FROM exercise_aliases WHERE source = 'user'" + ).fetchall()} + rows = [ + (a, c, "seed") + for a, c in _EXERCISE_ALIAS_SEED.items() + if a not in user_aliases + ] + conn.executemany( + "INSERT INTO exercise_aliases (alias, canonical, source) VALUES (?, ?, ?)", + rows, + ) + + +def resolve_exercise_alias(name: str) -> str | None: + """Return the canonical DB name for a slang/alias input, or None.""" + if not name: + return None + with get_db() as conn: + row = conn.execute( + "SELECT canonical FROM exercise_aliases WHERE alias = ?", + (name.strip().lower(),), + ).fetchone() + return row["canonical"] if row else None + + +def lookup_exercise(name: str) -> dict | None: + """Alias-aware Free-Exercise-DB lookup. Resolve user slang to a canonical + name first; then run the exercise_db matcher. Returns the slim entry + (name + primary/secondary muscles + equipment) or None. + """ + import exercise_db + if not name: + return None + canonical = resolve_exercise_alias(name) or name + return exercise_db.lookup(canonical) + + def _save_exercises(conn, workout_id: int, superset_groups: list[list[dict]]): """Insert superset groups and exercises for a workout.""" for group_pos, group in enumerate(superset_groups): diff --git a/exercise_db.py b/exercise_db.py index 0621ab2..3ad1b98 100644 --- a/exercise_db.py +++ b/exercise_db.py @@ -82,7 +82,19 @@ def _slim(entry: dict) -> dict: def lookup(name: str) -> Optional[dict]: - """Return the slim entry for the best name match, or None.""" + """Return the slim entry for the best name match, or None. + + Tiers (priority order): + 1. exact (case-insensitive) + 2. compressed exact — collapses hyphens/spaces ("Pull-ups" → "Pullups") + 3. word-boundary substring (only for multi-token inputs) + 4. token overlap requiring 100% coverage of the user's tokens + (only for multi-token inputs) + + Single-token inputs (e.g. "Bench", "Squat", "RDL", "OHP") that don't hit + tier 1 or 2 return None — there's no robust way to disambiguate without + an alias table. + """ if not name: return None needle = name.strip() @@ -93,48 +105,50 @@ def lookup(name: str) -> Optional[dict]: if not compressed: return None - # 1. Exact (case-insensitive) + # 1. Exact (case-insensitive). hit = _BY_LOWER_NAME.get(lower) if hit: return _slim(hit) - # 2. Compressed exact — catches "Pull-ups" → "Pullups", etc. + # 2. Compressed exact — "Pull-ups" → "Pullups". hit = _BY_COMPRESSED.get(compressed) if hit: return _slim(hit) - # 3. Compressed substring (either direction). - substring_candidates: list[dict] = [ - e for e, c in _COMPRESSED if compressed in c or c in compressed - ] + needle_toks = _tokens(needle) + # Below here, partial matches only — and only for multi-token inputs. + # A single token is too easily ambiguous (and short acronyms accidentally + # hit character-level substrings of unrelated names). + if len(needle_toks) < 2: + return None + + # 3. Word-boundary substring (lowercase). "bench press" in "bench press + # with chains" — yes. "rdl" in "hurdle" — no (no word break). + substring_candidates: list[dict] = [] + for entry in ALL: + n = entry.get("name", "") + if not n: + continue + nl = n.lower() + if lower in nl or nl in lower: + substring_candidates.append(entry) if substring_candidates: - # Single-token generics ("Bench", "Squat", "Deadlift") match too many - # specific DB entries. Refuse rather than confidently mislead the - # user — the planned alias table will handle these properly. - needle_toks = _tokens(needle) - if len(needle_toks) == 1 and len(substring_candidates) > 2: - return None + # Prefer the shortest DB name (most specific to the typed input). substring_candidates.sort(key=lambda e: len(e["name"])) return _slim(substring_candidates[0]) - # 4. Token overlap (Jaccard-ish). Require ≥1 shared token AND that the - # shared portion covers ≥50% of the user's tokens, so "row" doesn't - # match "single arm cable row machine" via one stop-token. - needle_toks = _tokens(needle) - if not needle_toks: - return None + # 4. Token overlap, 100% coverage of the user's tokens. So "BB Row" with + # tokens {bb, row} only matches a DB entry that contains both — it never + # silently latches onto a "row" entry that doesn't share the "bb" cue. best: tuple[float, dict] | None = None for entry, db_toks in _TOKENS: if not db_toks: continue - overlap = needle_toks & db_toks - if not overlap: + if not needle_toks <= db_toks: continue - coverage = len(overlap) / len(needle_toks) - if coverage < 0.5: - continue - # Score = coverage, tiebreak by DB-name length (shorter wins). - score = coverage - 0.001 * len(entry["name"]) + # All user tokens matched; tiebreak by DB-name length (shorter wins, + # i.e. the most specific variant). + score = -len(entry["name"]) if best is None or score > best[0]: best = (score, entry) diff --git a/tests/test_db.py b/tests/test_db.py index d770e8e..d84ca6d 100644 --- a/tests/test_db.py +++ b/tests/test_db.py @@ -253,6 +253,60 @@ class TestAllExerciseNames: assert db.get_all_exercise_names() == ["Apple", "Mango", "Zebra"] +# ── exercise aliases ───────────────────────────────────────────── + + +class TestExerciseAliases: + def test_seed_loaded(self, tmp_db): + assert db.resolve_exercise_alias("OHP") == "Standing Military Press" + assert db.resolve_exercise_alias("rdl") == "Romanian Deadlift" + assert db.resolve_exercise_alias("Bench") == "Barbell Bench Press - Medium Grip" + + def test_case_insensitive(self, tmp_db): + assert db.resolve_exercise_alias("ohp") == "Standing Military Press" + assert db.resolve_exercise_alias("OHP") == "Standing Military Press" + assert db.resolve_exercise_alias(" Squat ") == "Barbell Squat" + + def test_unknown_returns_none(self, tmp_db): + assert db.resolve_exercise_alias("not-a-thing") is None + assert db.resolve_exercise_alias("") is None + assert db.resolve_exercise_alias(None) is None + + def test_lookup_exercise_uses_aliases(self, tmp_db): + info = db.lookup_exercise("OHP") + assert info is not None + assert info["name"] == "Standing Military Press" + assert "shoulders" in info["primary_muscles"] + + def test_lookup_exercise_falls_through_when_no_alias(self, tmp_db): + # No alias for "Plank" → goes straight to exercise_db, which has it. + info = db.lookup_exercise("Plank") + assert info is not None + assert info["name"] == "Plank" + + def test_user_overrides_survive_init(self, tmp_db): + # A row inserted with source='user' is preserved across init_db calls; + # seed rows get refreshed but user rows don't. + with db.get_db() as conn: + conn.execute( + "INSERT OR REPLACE INTO exercise_aliases (alias, canonical, source) VALUES (?, ?, 'user')", + ("ohp", "Push Press"), + ) + db.init_db() # re-runs the seed + assert db.resolve_exercise_alias("ohp") == "Push Press" + + def test_seed_rows_refreshed_on_reinit(self, tmp_db): + # Manually corrupt a seed row → next init_db should rewrite it from + # the seed dict (without needing INSERT OR REPLACE acrobatics). + with db.get_db() as conn: + conn.execute( + "UPDATE exercise_aliases SET canonical = 'WRONG' WHERE alias = 'ohp'" + ) + assert db.resolve_exercise_alias("ohp") == "WRONG" + db.init_db() + assert db.resolve_exercise_alias("ohp") == "Standing Military Press" + + # ── get_last_exercise ──────────────────────────────────────────── diff --git a/tests/test_exercise_db.py b/tests/test_exercise_db.py index ac562dc..ca00209 100644 --- a/tests/test_exercise_db.py +++ b/tests/test_exercise_db.py @@ -44,6 +44,17 @@ class TestLookup: assert exercise_db.lookup("") is None assert exercise_db.lookup(" ") is None + def test_short_acronyms_dont_substring_match_characters(self): + # Regression: "RDL" used to match "Hurdle Hops" because "rdl" + # appears as a character substring inside "hu**rdl**ehops". + assert exercise_db.lookup("RDL") is None + assert exercise_db.lookup("OHP") is None + + def test_multi_token_requires_full_coverage(self): + # Regression: "BB Row" used to match "Sled Row" because both share + # the "row" token. Strict 100% coverage prevents this. + assert exercise_db.lookup("BB Row") is None + def test_returned_shape(self): m = exercise_db.lookup("Pullups") # The slim view drops `instructions` and `images`.