feat: bundle Free-Exercise-DB + name matcher (step 1)
Adds the static exercise reference data (~870 entries, public
domain, source: github.com/yuhonas/free-exercise-db) plus a
conservative name matcher. New endpoint:
GET /api/exercises/lookup?name=<name>
→ {"match": {"name", "primary_muscles", "secondary_muscles",
"equipment", "category", "level", ...}}
→ {"match": null} when nothing plausibly matches.
Matcher tiers (priority order):
1. exact (case-insensitive)
2. compressed exact ("Pull-ups" → "Pullups")
3. compressed substring, with a guard: single-token generics
like "Bench"/"Squat" return null instead of misleading the
user — the planned alias table will handle these properly.
4. token-overlap with ≥50% coverage of the user's tokens.
UI integration ("Trains: chest · shoulders") comes in step 2.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9e50686983
commit
ebd0016a62
4 changed files with 22824 additions and 0 deletions
22617
data/exercises.json
Normal file
22617
data/exercises.json
Normal file
File diff suppressed because it is too large
Load diff
141
exercise_db.py
Normal file
141
exercise_db.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""Static exercise reference data from the Free-Exercise-DB.
|
||||
|
||||
Source: https://github.com/yuhonas/free-exercise-db (public domain).
|
||||
Bundled at data/exercises.json (~870 entries). Loaded once at import.
|
||||
|
||||
Exports:
|
||||
- `lookup(name)` — best-effort fuzzy name match → dict with primary/secondary
|
||||
muscles, equipment, etc. Returns None if no plausible match.
|
||||
- `ALL` — the raw list (for ad-hoc queries).
|
||||
|
||||
Matching, in priority order:
|
||||
1. exact case-insensitive name match
|
||||
2. case-insensitive substring (either way)
|
||||
3. token-overlap score above a small threshold
|
||||
|
||||
Keep this conservative — a wrong match is worse than no match for the user.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import pathlib
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
_DATA_PATH = pathlib.Path(__file__).parent / "data" / "exercises.json"
|
||||
|
||||
|
||||
def _load() -> list[dict]:
|
||||
try:
|
||||
with _DATA_PATH.open() as f:
|
||||
return json.load(f)
|
||||
except (OSError, json.JSONDecodeError):
|
||||
return []
|
||||
|
||||
|
||||
ALL: list[dict] = _load()
|
||||
|
||||
# Normalised name → entry (case-insensitive exact key)
|
||||
_BY_LOWER_NAME: dict[str, dict] = {e["name"].lower(): e for e in ALL if e.get("name")}
|
||||
|
||||
|
||||
_TOKEN_RE = re.compile(r"[a-z0-9]+")
|
||||
_NON_ALNUM = re.compile(r"[^a-z0-9]")
|
||||
|
||||
|
||||
def _tokens(s: str) -> set[str]:
|
||||
return set(_TOKEN_RE.findall(s.lower()))
|
||||
|
||||
|
||||
def _compress(s: str) -> str:
|
||||
"""Collapse to lowercase alphanumeric, no separators.
|
||||
|
||||
"Pull-Ups", "Pull Ups", "Pullups" all → "pullups".
|
||||
"""
|
||||
return _NON_ALNUM.sub("", s.lower())
|
||||
|
||||
|
||||
# Pre-compute token sets and compressed forms (one-time at import).
|
||||
_TOKENS: list[tuple[dict, set[str]]] = [
|
||||
(e, _tokens(e["name"])) for e in ALL if e.get("name")
|
||||
]
|
||||
_COMPRESSED: list[tuple[dict, str]] = [
|
||||
(e, _compress(e["name"])) for e in ALL if e.get("name")
|
||||
]
|
||||
_BY_COMPRESSED: dict[str, dict] = {
|
||||
_compress(e["name"]): e for e in ALL if e.get("name")
|
||||
}
|
||||
|
||||
|
||||
# Public-facing slim shape — drop instructions/images for now (heavy).
|
||||
def _slim(entry: dict) -> dict:
|
||||
return {
|
||||
"name": entry.get("name"),
|
||||
"primary_muscles": entry.get("primaryMuscles") or [],
|
||||
"secondary_muscles": entry.get("secondaryMuscles") or [],
|
||||
"equipment": entry.get("equipment"),
|
||||
"category": entry.get("category"),
|
||||
"level": entry.get("level"),
|
||||
"force": entry.get("force"),
|
||||
"mechanic": entry.get("mechanic"),
|
||||
}
|
||||
|
||||
|
||||
def lookup(name: str) -> Optional[dict]:
|
||||
"""Return the slim entry for the best name match, or None."""
|
||||
if not name:
|
||||
return None
|
||||
needle = name.strip()
|
||||
if not needle:
|
||||
return None
|
||||
lower = needle.lower()
|
||||
compressed = _compress(needle)
|
||||
if not compressed:
|
||||
return None
|
||||
|
||||
# 1. Exact (case-insensitive)
|
||||
hit = _BY_LOWER_NAME.get(lower)
|
||||
if hit:
|
||||
return _slim(hit)
|
||||
|
||||
# 2. Compressed exact — catches "Pull-ups" → "Pullups", etc.
|
||||
hit = _BY_COMPRESSED.get(compressed)
|
||||
if hit:
|
||||
return _slim(hit)
|
||||
|
||||
# 3. Compressed substring (either direction).
|
||||
substring_candidates: list[dict] = [
|
||||
e for e, c in _COMPRESSED if compressed in c or c in compressed
|
||||
]
|
||||
if substring_candidates:
|
||||
# Single-token generics ("Bench", "Squat", "Deadlift") match too many
|
||||
# specific DB entries. Refuse rather than confidently mislead the
|
||||
# user — the planned alias table will handle these properly.
|
||||
needle_toks = _tokens(needle)
|
||||
if len(needle_toks) == 1 and len(substring_candidates) > 2:
|
||||
return None
|
||||
substring_candidates.sort(key=lambda e: len(e["name"]))
|
||||
return _slim(substring_candidates[0])
|
||||
|
||||
# 4. Token overlap (Jaccard-ish). Require ≥1 shared token AND that the
|
||||
# shared portion covers ≥50% of the user's tokens, so "row" doesn't
|
||||
# match "single arm cable row machine" via one stop-token.
|
||||
needle_toks = _tokens(needle)
|
||||
if not needle_toks:
|
||||
return None
|
||||
best: tuple[float, dict] | None = None
|
||||
for entry, db_toks in _TOKENS:
|
||||
if not db_toks:
|
||||
continue
|
||||
overlap = needle_toks & db_toks
|
||||
if not overlap:
|
||||
continue
|
||||
coverage = len(overlap) / len(needle_toks)
|
||||
if coverage < 0.5:
|
||||
continue
|
||||
# Score = coverage, tiebreak by DB-name length (shorter wins).
|
||||
score = coverage - 0.001 * len(entry["name"])
|
||||
if best is None or score > best[0]:
|
||||
best = (score, entry)
|
||||
|
||||
return _slim(best[1]) if best else None
|
||||
11
server.py
11
server.py
|
|
@ -18,6 +18,7 @@ from urllib.parse import parse_qs
|
|||
from aiohttp import web
|
||||
|
||||
from db import init_db, save_workout, get_workouts, get_workout_count, get_stats_sql, delete_workout, update_workout, export_workouts, get_user_workout_number, get_all_exercise_names, log_event, get_settings, update_settings, get_last_exercise
|
||||
import exercise_db
|
||||
from parser import parse_workout, format_workout
|
||||
|
||||
logging.basicConfig(
|
||||
|
|
@ -293,6 +294,15 @@ async def api_get_last_exercise(request: web.Request):
|
|||
return web.json_response({"last": last})
|
||||
|
||||
|
||||
@require_auth
|
||||
async def api_lookup_exercise(request: web.Request):
|
||||
"""Look up an exercise in the static Free-Exercise-DB reference data."""
|
||||
name = request.query.get("name", "").strip()
|
||||
if not name:
|
||||
return web.json_response({"error": "Missing name"}, status=400)
|
||||
return web.json_response({"match": exercise_db.lookup(name)})
|
||||
|
||||
|
||||
@require_auth
|
||||
async def api_get_stats(request: web.Request):
|
||||
"""Return summary stats for the user."""
|
||||
|
|
@ -377,6 +387,7 @@ def create_app() -> web.Application:
|
|||
app.router.add_delete("/api/workouts/{workout_id}", api_delete_workout)
|
||||
app.router.add_get("/api/exercises", api_get_exercise_names)
|
||||
app.router.add_get("/api/exercises/last", api_get_last_exercise)
|
||||
app.router.add_get("/api/exercises/lookup", api_lookup_exercise)
|
||||
app.router.add_get("/api/stats", api_get_stats)
|
||||
app.router.add_get("/api/export/json", api_export_json)
|
||||
app.router.add_get("/api/export/csv", api_export_csv)
|
||||
|
|
|
|||
55
tests/test_exercise_db.py
Normal file
55
tests/test_exercise_db.py
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
"""Tests for the static exercise reference matcher."""
|
||||
import exercise_db
|
||||
|
||||
|
||||
class TestLookup:
|
||||
def test_loads_bundled_data(self):
|
||||
# Free-Exercise-DB ships ~870 entries; just sanity-check it's non-empty.
|
||||
assert len(exercise_db.ALL) > 500
|
||||
|
||||
def test_exact_match(self):
|
||||
m = exercise_db.lookup("Pullups")
|
||||
assert m is not None
|
||||
assert m["name"] == "Pullups"
|
||||
assert "lats" in m["primary_muscles"]
|
||||
|
||||
def test_case_insensitive(self):
|
||||
a = exercise_db.lookup("PULLUPS")
|
||||
b = exercise_db.lookup("pullups")
|
||||
assert a == b
|
||||
assert a["name"] == "Pullups"
|
||||
|
||||
def test_hyphen_matches_compressed(self):
|
||||
# User types "Pull-ups", DB has "Pullups". Compressed form catches it.
|
||||
m = exercise_db.lookup("Pull-ups")
|
||||
assert m is not None
|
||||
assert m["name"] == "Pullups"
|
||||
|
||||
def test_multi_word_substring(self):
|
||||
m = exercise_db.lookup("Romanian deadlift")
|
||||
assert m is not None
|
||||
assert m["name"] == "Romanian Deadlift"
|
||||
assert "hamstrings" in m["primary_muscles"]
|
||||
|
||||
def test_ambiguous_single_token_returns_none(self):
|
||||
# Lots of DB entries contain "Bench" / "Squat" / "Deadlift" as one
|
||||
# token. Returning the shortest would mislead ("Bench" → "Bench Dips"
|
||||
# → triceps). Refuse instead.
|
||||
assert exercise_db.lookup("Bench") is None
|
||||
assert exercise_db.lookup("Squat") is None
|
||||
assert exercise_db.lookup("Deadlift") is None
|
||||
|
||||
def test_nonsense_returns_none(self):
|
||||
assert exercise_db.lookup("flarbenstompf") is None
|
||||
assert exercise_db.lookup("") is None
|
||||
assert exercise_db.lookup(" ") is None
|
||||
|
||||
def test_returned_shape(self):
|
||||
m = exercise_db.lookup("Pullups")
|
||||
# The slim view drops `instructions` and `images`.
|
||||
assert set(m.keys()) >= {
|
||||
"name", "primary_muscles", "secondary_muscles",
|
||||
"equipment", "category", "level",
|
||||
}
|
||||
assert "instructions" not in m
|
||||
assert "images" not in m
|
||||
Loading…
Add table
Add a link
Reference in a new issue