1#!/usr/bin/env python3 2# SPDX-License-Identifier: GPL-2.0-or-later 3# SPDX-FileCopyrightText: 2024-2025 Yamada, M. 4# -------------------------------------------------------------- 5# miz_parser.py ―― Mizar 依存グラフ可視化 6# -------------------------------------------------------------- 7# theorem : yellow (Th…) 8# auto-theorem : lavender (Th_auto…) 9# definition / auto : teal (Def… / Def_auto…) 10# lemma : coral (Lm…) 11# external article : grey (environ 節に列挙された語) 12# -------------------------------------------------------------- 13from __future__ import annotations 14import re 15import sys 16from pathlib import Path 17from typing import Iterable 18 19import networkx as nx 20# from pyvis.network import Network 21 22 23# ─── 色定義 ────────────────────────────────────────── 24COL_TH = "#ffc107" 25COL_AUTO = "#b39ddb" 26COL_DEF = "#17a2b8" 27COL_LM = "#ff7f50" 28COL_EXT = "#9e9e9e" 29 30# ─── 基本正規表現 ────────────────────────────────── 31COMMENT_RE = re.compile(r"::.*?$", re.M) 32THEOREM_RE = re.compile(r"\btheorem\b(?:\s+([A-Za-z0-9_]+)\s*:)?", re.I) 33DEF_HDR_RE = re.compile(r"\bdefinition\b(?:\s+([A-Za-z0-9_]+)\s*:)?", re.I) 34 35# 行頭にある Def/Lm ラベル ( :Def1: や Def1: ) 36INLINE_HEAD_RE = re.compile(r"^[ \t]*:?\s*((?:Def|Lm)\d+)\s*:", re.I | re.M) 37 38# 行中に埋め込まれた :Def1: / :Lm3: など 39INLINE_INLINE_RE = re.compile(r":\s*((?:Def|Lm)\d+)\s*:", re.I) 40 41BY_RE = re.compile(r"\bby\s+([^;]+);", re.I | re.S) 42FROM_RE = re.compile(r"\bfrom\s+([^;]+);", re.I | re.S) 43 44# 〈記事:キーワード+番号〉も 1 トークン扱い 45LABEL_RE = re.compile( 46 r"[A-Z][A-Za-z0-9_]*:[A-Za-z0-9_]+\s*\d+" # TARSKI:def 1 など 47 r"|[A-Z][A-Za-z0-9_]*" # 単体記事名・内部ラベル 48) 49 50FOLD_RE = re.compile(r"^[A-Z](\d+)?$") # A, A1 …(吸収対象) 51INTERNAL_RE = re.compile(r"^(?:Th\d+|Th_auto\d+|Def\d+|Def_auto\d+|Lm\d+)$") 52EXT_ART_RE = re.compile(r"^[A-Z][A-Z0-9_]*$") 53 54 55# ─── environ から外部アーティクル抽出 ──────────────── 56def externals(txt: str) -> set[str]: 57 m = re.search(r"\benviron\b(.*?)(?:\bbegin\b)", txt, re.I | re.S) 58 return set() if not m else set(re.findall(r"[A-Z][A-Z0-9_]*", m[1])) 59 60 61# ─── 1 つの .miz を依存グラフに変換 ─────────────────── 62def parse_mizar(path: Path) -> nx.DiGraph: 63 src = COMMENT_RE.sub("", path.read_text(encoding="utf-8", errors="ignore")) 64 ext_art = externals(src) 65 66 #―― theorem:明示番号を把握して auto 番衝突を防ぐ ――――――――――― 67 explicit_nums, th_matches = set(), [] 68 for m in THEOREM_RE.finditer(src): 69 th_matches.append(m) 70 if m.group(1) and m.group(1).startswith("Th") and m.group(1)[2:].isdigit(): 71 explicit_nums.add(int(m.group(1)[2:])) 72 73 entries, inline_taken = [], set() 74 th_auto_i = def_auto_i = 1 75 76 #── theorem ブロック登録 ────────────────────────── 77 for m in th_matches: 78 if m.group(1): # 明示ラベル 79 lbl, kind = m.group(1), "th" 80 else: # auto 採番 81 while th_auto_i in explicit_nums: 82 th_auto_i += 1 83 lbl, kind = f"Th_auto{th_auto_i}", "th_auto" 84 th_auto_i += 1 85 entries.append(dict(start=m.start(), end=m.end(), label=lbl, kind=kind)) 86 87 #── definition ヘッダ処理 ───────────────────────── 88 for m in DEF_HDR_RE.finditer(src): 89 hdr_s, hdr_e = m.span() 90 # ブロック終端=次 the/def 手前 91 nxt_th = THEOREM_RE.search(src, hdr_e) 92 nxt_def = DEF_HDR_RE.search(src, hdr_e + 1) 93 blk_end = min(nxt_th.start() if nxt_th else len(src), 94 nxt_def.start() if nxt_def else len(src)) 95 96 # --- 行頭ラベル --- 97 for im in INLINE_HEAD_RE.finditer(src[hdr_e:blk_end]): 98 abs_pos = hdr_e + im.start() 99 lbl = im.group(1) 100 inline_taken.add(abs_pos) 101 kind = "lemma" if lbl.lower().startswith("lm") else "def" 102 entries.append(dict(start=abs_pos, end=abs_pos, label=lbl, kind=kind)) 103 104 # --- 行中ラベル --- 105 for jm in INLINE_INLINE_RE.finditer(src[hdr_e:blk_end]): 106 abs_pos = hdr_e + jm.start() 107 if abs_pos in inline_taken: # 行頭と重複しないように 108 continue 109 lbl = jm.group(1) 110 inline_taken.add(abs_pos) 111 kind = "lemma" if lbl.lower().startswith("lm") else "def" 112 entries.append(dict(start=abs_pos, end=abs_pos, label=lbl, kind=kind)) 113 114 # ヘッダに固有ラベルが無ければ auto 付与 115 if not any(hdr_s <= e["start"] < hdr_e for e in entries): 116 lbl = f"Def_auto{def_auto_i}" 117 def_auto_i += 1 118 entries.append(dict(start=hdr_s, end=hdr_e, label=lbl, kind="def")) 119 120 #── ファイル行頭の Def/Lm (未登録) ───────────────── 121 for m in INLINE_HEAD_RE.finditer(src): 122 if m.start() in inline_taken: 123 continue 124 lbl = m.group(1) 125 kind = "lemma" if lbl.lower().startswith("lm") else "def" 126 entries.append(dict(start=m.start(), end=m.end(), label=lbl, kind=kind)) 127 inline_taken.add(m.start()) 128 129 # 行中 :DefN: / :LmN: (未登録) 130 for m in INLINE_INLINE_RE.finditer(src): 131 if m.start() in inline_taken: 132 continue 133 lbl = m.group(1) 134 kind = "lemma" if lbl.lower().startswith("lm") else "def" 135 entries.append(dict(start=m.start(), end=m.end(), label=lbl, kind=kind)) 136 137 #―― 時系列順 & ラベル集合 ――――――――――――――――――――――― 138 entries.sort(key=lambda d: d["start"]) 139 internal_labels = {e["label"] for e in entries} 140 141 #―― グラフ (ノード) ――――――――――――――――――――――――――― 142 G = nx.DiGraph() 143 color_of = {"th": COL_TH, "th_auto": COL_AUTO, "def": COL_DEF, "lemma": COL_LM} 144 for e in entries: 145 G.add_node(e["label"], color=color_of[e["kind"]], label=e["label"]) 146 147 #―― 参照エッジ解析 ――――――――――――――――――――――――― 148 for idx, blk in enumerate(entries): 149 parent = blk["label"] 150 body = src[ 151 blk["end"]: 152 entries[idx + 1]["start"] if idx + 1 < len(entries) else len(src) 153 ] 154 155 refs: set[str] = set() 156 for seg in BY_RE.findall(body) + FROM_RE.findall(body): 157 for tok in LABEL_RE.findall(seg): 158 tok = tok.rstrip() 159 if FOLD_RE.fullmatch(tok): 160 continue 161 162 # ------------ 内部ラベル ------------ 163 if tok in internal_labels and INTERNAL_RE.fullmatch(tok): 164 refs.add(tok) 165 continue 166 167 # ------------ 外部参照 ------------ 168 art = tok.split(":")[0] # “XBOOLE_0:7” → “XBOOLE_0” 169 if art in ext_art: 170 refs.add(art) # 番号無視で記事名だけ追加 171 continue 172 173 refs.discard(parent) 174 for r in refs: 175 if r not in G: 176 col = COL_EXT if r in ext_art else COL_TH 177 G.add_node(r, color=col, label=r) 178 G.add_edge(r, parent) 179 180 return G 181 182 183# ─── 可視化 (HTML) ───────────────────────────────── 184def visualize(G: nx.DiGraph, outfile: Path) -> None: 185 net = Network(height="800px", width="100%", directed=True, notebook=False) 186 net.from_nx(G) 187 net.barnes_hut(gravity=-8000, central_gravity=0.5) 188 189 for n in net.nodes: 190 n.update({ 191 "value": max(nx.degree(G, n["id"]), 1), 192 "shape": "circle", 193 "font": {"vadjust": 0}, 194 }) 195 196 net.show(str(outfile)) 197 print(f"★ 生成完了: {outfile}") 198 199 200# ─── CLI ──────────────────────────────────────────── 201def main(files: Iterable[str]) -> None: 202 paths = [Path(p) for p in files] 203 if not paths: 204 print("usage: python miz_parser.py <file1.miz> [file2.miz ...]") 205 sys.exit(1) 206 207 for p in paths: 208 if not p.exists(): 209 print(f"※ file not found: {p}") 210 continue 211 graph = parse_mizar(p) 212 # visualize(graph, p.with_stem(p.stem + "_graph").with_suffix(".html")) 213 214 215if __name__ == "__main__": 216 main(sys.argv[1:]) 217