diff --git a/notebooks/html-parse.ipynb b/notebooks/html-parse.ipynb
new file mode 100644
index 0000000..af5a7c4
--- /dev/null
+++ b/notebooks/html-parse.ipynb
@@ -0,0 +1,378 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "23283830-032b-484a-b599-66ba8e7c2001",
+ "metadata": {},
+ "source": [
+ "# HTML parse\n",
+ "\n",
+ "in this notebook:\n",
+ "* prepare jupyter environment\n",
+ "* download [cable bible](https://amiaopensource.github.io/cable-bible/) index.html file\n",
+ "* parse html"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ae76266b-b137-43ec-80f7-3f2c992215d9",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## prepare jupyter environment"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "6360c8e3-8958-41c5-938a-f713af2ae715",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Note: you may need to restart the kernel to use updated packages.\n",
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ },
+ {
+ "data": {
+ "application/javascript": [
+ "if (!(\"Notification\" in window)) {\n",
+ " alert(\"This browser does not support desktop notifications, so the %%notify magic will not work.\");\n",
+ "} else if (Notification.permission !== 'granted' && Notification.permission !== 'denied') {\n",
+ " Notification.requestPermission(function (permission) {\n",
+ " if(!('permission' in Notification)) {\n",
+ " Notification.permission = permission;\n",
+ " }\n",
+ " })\n",
+ "}\n",
+ "\n",
+ "if(!window.jQuery) {\n",
+ " var jq = document.createElement('script');\n",
+ " jq.src = \"//ajax.googleapis.com/ajax/libs/jquery/2.1.4/jquery.min.js\";\n",
+ " document.getElementsByTagName('head')[0].appendChild(jq);\n",
+ "}\n",
+ "\n",
+ "// Detect if the window is out of focus.\n",
+ "window.jupyterNotifyIsInBackground = undefined;\n",
+ "(function() {\n",
+ " // Check document.hidden support\n",
+ " var hidden;\n",
+ " if (typeof document.hidden !== \"undefined\") { // Opera 12.10 and Firefox 18 and later support\n",
+ " hidden = \"hidden\";\n",
+ " } else if (typeof document.msHidden !== \"undefined\") {\n",
+ " hidden = \"msHidden\";\n",
+ " } else if (typeof document.webkitHidden !== \"undefined\") {\n",
+ " hidden = \"webkitHidden\";\n",
+ " }\n",
+ "\n",
+ " // Set initial background state\n",
+ " if (document[hidden]) {\n",
+ " window.jupyterNotifyIsInBackground = true;\n",
+ " } else {\n",
+ " window.jupyterNotifyIsInBackground = false;\n",
+ " }\n",
+ "\n",
+ " window.addEventListener('blur', function() { window.jupyterNotifyIsInBackground = true; }, false);\n",
+ " window.addEventListener('focus', function() { window.jupyterNotifyIsInBackground = false; }, false);\n",
+ "})();\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%pip install jupyterlab-vim -q\n",
+ "%pip install git+https://github.com/cphyc/jupyter-notify.git -q\n",
+ "#%reload_ext jupyternotify\n",
+ "%load_ext jupyternotify"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "d5946431-9f33-4caa-835b-ab09fb73d0e2",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Note: you may need to restart the kernel to use updated packages.\n"
+ ]
+ }
+ ],
+ "source": [
+ "%pip install pandas -q \\\n",
+ " html5lib -q \\\n",
+ " beautifulsoup4 -q"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "bcbc54b8-7e77-446c-bb9d-4753bb61bdf9",
+ "metadata": {
+ "jp-MarkdownHeadingCollapsed": true,
+ "tags": []
+ },
+ "source": [
+ "## download cable bible"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "c245e2c1-a5f1-4fa2-9037-7e1773208854",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "mkdir: cannot create directory ‘/tmp/scrape-demo’: File exists\n",
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "100 150k 100 150k 0 0 1089k 0 --:--:-- --:--:-- --:--:-- 1089k\n",
+ "\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "!mkdir /tmp/scrape-demo\n",
+ "!curl \\\n",
+ " https://raw.githubusercontent.com/amiaopensource/cable-bible/master/index.html \\\n",
+ " -o /tmp/scrape-demo/index.html\n",
+ "!head -2 /tmp/scrape-demo/index.html\n",
+ "#!cp /tmp/scrape-demo/index.html .\n",
+ "#!code ."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e7e4c6d-6d1b-44fa-8527-dd847cd63895",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## parse html"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "b28f8b7c-05c0-462f-93ab-10ff0b35176f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "from bs4 import BeautifulSoup\n",
+ "html = Path('/tmp/scrape-demo/index.html').read_text()\n",
+ "soup = BeautifulSoup(html, 'html.parser')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "81767551-c9b6-4e98-80cf-1fc49b3a4262",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "central_div = soup.find(\"div\", {\"class\": \"well col-md-8 col-md-offset-0\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "e134d6be-e32a-4cd7-91da-9ba359870e3f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['Video', 'Audio', 'Data', 'Power'])"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "a = {\n",
+ " e.h2.string: e for e\n",
+ " in central_div.find_all('div', id=lambda x: x != 'table_of_contents')\n",
+ " if e.h2 is not None}\n",
+ "a.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "80ba6ebb-d679-4767-8917-348fe441f185",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['Analog Video', 'Digital Video', 'Integrated Video'])"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "b = {e.h3.string: e for e in\n",
+ " a['Video'].find_all(\"div\", {\"class\": \"well\"})\n",
+ " if e.find_all('h3', id=lambda x: x is not None)}\n",
+ "b.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "d414e989-ba00-4a60-a357-571286f36276",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['composite', 'component_ypbpr', 's-video', 'yc-688', 'rgbs', 'rgbvh'])"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c = {\n",
+ " e.h4['id']:e for e in\n",
+ " b['Analog Video'].find_all(\"div\", {\"class\": \"well\"})\n",
+ " if e.h4 is not None}\n",
+ "c.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "97b1f814-c16e-42d3-96ff-b2faa27d5830",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "dict_keys(['Composite RCA', 'Composite BNC', 'Composite UHF', 'Composite F-Type', 'Composite Video Patch (MUSA)', 'Composite 8-pin EIAJ', 'Composite SCART'])"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "d = {e.h3.string: e.p.string for e\n",
+ " in c['composite'].find_all(\"div\", {\"class\": \"modal fade\"})}\n",
+ "d.keys()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "2fca9ace-a92f-4eda-adbe-d20c845f2cd5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "application/javascript": [
+ "$(document).ready(\n",
+ " function() {\n",
+ " function appendUniqueDiv(){\n",
+ " // append a div with our uuid so we can check that it's already\n",
+ " // been sent and avoid duplicates on page reload\n",
+ " var notifiedDiv = document.createElement(\"div\")\n",
+ " notifiedDiv.id = \"53b8a038-c6fd-4ea1-b291-5403fb3b742d\"\n",
+ " element.append(notifiedDiv)\n",
+ " }\n",
+ "\n",
+ " // only send notifications if the pageload is complete; this will\n",
+ " // help stop extra notifications when a saved notebook is loaded,\n",
+ " // which during testing gives us state \"interactive\", not \"complete\"\n",
+ " if (document.readyState === 'complete') {\n",
+ " // check for the div that signifies that the notification\n",
+ " // was already sent\n",
+ " if (document.getElementById(\"53b8a038-c6fd-4ea1-b291-5403fb3b742d\") === null) {\n",
+ " var notificationPayload = {\"requireInteraction\": false, \"icon\": \"/static/base/images/favicon.ico\", \"body\": \"Cell execution has finished!\", \"only_in_background\": false};\n",
+ "\n",
+ " // We have a notification but the window is active\n",
+ " if (notificationPayload.only_in_background && !window.jupyterNotifyIsInBackground) {\n",
+ " appendUniqueDiv();\n",
+ " return;\n",
+ " }\n",
+ " if (Notification.permission !== 'denied') {\n",
+ " if (Notification.permission !== 'granted') { \n",
+ " Notification.requestPermission(function (permission) {\n",
+ " if(!('permission' in Notification)) {\n",
+ " Notification.permission = permission\n",
+ " }\n",
+ " })\n",
+ " }\n",
+ " if (Notification.permission === 'granted') {\n",
+ " var notification = new Notification(\"Jupyter Notebook\", notificationPayload)\n",
+ " appendUniqueDiv()\n",
+ " notification.onclick = function () {\n",
+ " window.focus();\n",
+ " this.close();\n",
+ " };\n",
+ " } \n",
+ " } \n",
+ " }\n",
+ " }\n",
+ " }\n",
+ ")\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "%notify"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.10"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}