266 lines
8.9 KiB
Python
266 lines
8.9 KiB
Python
import re
|
||
import json
|
||
|
||
def process_labels_and_create_links(in_file="../main.tex", out_file="../main-struct.json"):
|
||
"""
|
||
Przetwarza plik TeX, usuwa komentarze/puste linie i buduje drzewo paragrafów i list z 9 poziomami:
|
||
- par (poziom 0, w numeracji: '\\S1')
|
||
- ust (głębokość listy=1)
|
||
- pkt (głębokość listy=2)
|
||
- ppkt (głębokość listy=3)
|
||
- lit (głębokość listy=4)
|
||
- plit (głębokość listy=5)
|
||
- tir (głębokość listy=6)
|
||
- lev7 (głębokość listy=7)
|
||
- lev8 (głębokość listy=8)
|
||
Jeśli głębokość listy > 8 – ostrzeżenie i pominięcie \\item.
|
||
|
||
Zapisuje wynik w pliku JSON z polami:
|
||
type, numbering, label, text, children.
|
||
|
||
- type: "par" / "ust" / "pkt" / "ppkt" / "lit" / "plit" / "tir" / "lev7" / "lev8"
|
||
- numbering: np. '\\S1 ust. 2 pkt. 1 ppkt. 1 lit. a ...'
|
||
- label: jeśli w tej samej linii występuje \\label{itm:...}, to tu jest ten fragment, w p.p. ""
|
||
- text: linia z \\item
|
||
- children: (lista obiektów zagnieżdżonych)
|
||
"""
|
||
|
||
with open(in_file, 'r', encoding='utf-8') as file:
|
||
content = file.readlines()
|
||
|
||
# -- Usunięcie komentarzy i linii pustych --
|
||
cleaned_content = []
|
||
for line in content:
|
||
# usuń wszystko po '%', jeśli nie poprzedza go backslash
|
||
line = re.sub(r'(?<!\\)%.*$', '', line).strip()
|
||
if line:
|
||
cleaned_content.append(line)
|
||
|
||
# -- Liczniki poziomów --
|
||
counters = {
|
||
"par": 0, # paragraf (\S)
|
||
"ust": 0, # 1-szy poziom list
|
||
"pkt": 0, # 2-gi
|
||
"ppkt": 0, # 3-ci
|
||
"lit": 0, # 4-ty
|
||
"plit": 0, # 5-ty
|
||
"tir": 0, # 6-ty
|
||
"lev7": 0, # 7-my
|
||
"lev8": 0 # 8-my
|
||
}
|
||
|
||
MAX_LIST_DEPTH = 8 # maksymalna głębokość zagnieżdżenia list
|
||
label_tree = [] # główna struktura do zapisu
|
||
current_env_stack = [] # stos środowisk list
|
||
errors = []
|
||
|
||
level_order = ["par","ust","pkt","ppkt","lit","plit","tir","lev7","lev8"]
|
||
|
||
def reset_counters(from_level):
|
||
"""
|
||
Zeruje liczniki dla poziomów "niższych" (w level_order) niż from_level.
|
||
Jeśli from_level=='pkt', zerujemy ppkt, lit, plit, tir, lev7, lev8.
|
||
"""
|
||
try:
|
||
idx = level_order.index(from_level)
|
||
except ValueError:
|
||
return
|
||
for lv in level_order[idx+1:]:
|
||
counters[lv] = 0
|
||
|
||
def get_current_numbering():
|
||
"""
|
||
Zwraca łańcuch numerujący, np:
|
||
'\\S1 ust. 2 pkt. 3 ppkt. 1 lit. a plit. a) tir. 1 lev7. 2 lev8. 1'
|
||
z uwzględnieniem tych liczników, które są > 0.
|
||
"""
|
||
parts = []
|
||
|
||
# par => \S + numer
|
||
if counters["par"] > 0:
|
||
parts.append(f"\\S{counters['par']}")
|
||
|
||
# ust => ust. X
|
||
if counters["ust"] > 0:
|
||
parts.append(f"ust. {counters['ust']}")
|
||
|
||
# pkt => pkt. X
|
||
if counters["pkt"] > 0:
|
||
parts.append(f"pkt. {counters['pkt']}")
|
||
|
||
# ppkt => ppkt. X
|
||
if counters["ppkt"] > 0:
|
||
parts.append(f"ppkt. {counters['ppkt']}")
|
||
|
||
# lit => lit. a, b, c...
|
||
if counters["lit"] > 0:
|
||
letter = chr(96 + counters["lit"]) # 1->a, 2->b, etc.
|
||
parts.append(f"lit. {letter}")
|
||
|
||
# plit => plit. a)
|
||
if counters["plit"] > 0:
|
||
letter = chr(96 + counters["plit"])
|
||
parts.append(f"plit. {letter})")
|
||
|
||
# tir => tir. X
|
||
if counters["tir"] > 0:
|
||
parts.append(f"tir. {counters['tir']}")
|
||
|
||
# lev7 => lev7. X
|
||
if counters["lev7"] > 0:
|
||
parts.append(f"lev7. {counters['lev7']}")
|
||
|
||
# lev8 => lev8. X
|
||
if counters["lev8"] > 0:
|
||
parts.append(f"lev8. {counters['lev8']}")
|
||
|
||
# scal całość w jeden łańcuch, oddzielając spacjami
|
||
return " ".join(parts)
|
||
|
||
def get_current_parent():
|
||
"""
|
||
Zwraca "bieżący" obiekt-rodzic w label_tree, do którego
|
||
dołączymy kolejny element (children).
|
||
"""
|
||
if not label_tree:
|
||
return None
|
||
parent = label_tree[-1]
|
||
# Głębokość = ile razy mamy 'list' w current_env_stack
|
||
depth = sum(1 for env in current_env_stack if env=="list")
|
||
|
||
# 'par' jest poziomem zerowym. Dla depth=1 => wchodzimy do children paragrafu itd.
|
||
for _ in range(depth-1):
|
||
if not parent["children"]:
|
||
break
|
||
parent = parent["children"][-1]
|
||
return parent
|
||
|
||
# Regexy do wykrywania \begin{longenum} itp.
|
||
begin_enum_regex = re.compile(r'\\begin\{(long[a-z0-9]*enum|customenum)\}')
|
||
end_enum_regex = re.compile(r'\\end\{(long[a-z0-9]*enum|customenum)\}')
|
||
|
||
fixed_content = []
|
||
|
||
for line_number, line in enumerate(cleaned_content, start=1):
|
||
|
||
# --- wykrywanie paragrafu \paragraf{...} ---
|
||
paragraf_match = re.search(r'\\paragraf\{([^}]+)\}', line)
|
||
if paragraf_match:
|
||
counters["par"] += 1
|
||
reset_counters("par")
|
||
par_title = paragraf_match.group(1).strip()
|
||
|
||
label_tree.append({
|
||
"type": "par",
|
||
"numbering": get_current_numbering(),
|
||
"label": "",
|
||
"text": line.strip(),
|
||
"children": []
|
||
})
|
||
fixed_content.append(line)
|
||
continue
|
||
|
||
# --- wykrywanie \begin{...enum} ---
|
||
if begin_enum_regex.search(line):
|
||
current_env_stack.append("list")
|
||
fixed_content.append(line)
|
||
continue
|
||
|
||
# --- wykrywanie \end{...enum} ---
|
||
if end_enum_regex.search(line):
|
||
if "list" in current_env_stack:
|
||
# Usunięcie ze stosu "list" od prawej
|
||
stack_rev = current_env_stack[::-1]
|
||
stack_rev.remove("list")
|
||
current_env_stack = stack_rev[::-1]
|
||
fixed_content.append(line)
|
||
continue
|
||
|
||
# --- wykrywanie \item ---
|
||
if r'\item' in line:
|
||
depth = sum(env=="list" for env in current_env_stack)
|
||
if depth==0:
|
||
errors.append(f"[Linia {line_number}] \\item poza listą. Ignoruję.")
|
||
else:
|
||
if depth==1:
|
||
counters["ust"] += 1
|
||
reset_counters("ust")
|
||
level_type = "ust"
|
||
elif depth==2:
|
||
counters["pkt"] += 1
|
||
reset_counters("pkt")
|
||
level_type = "pkt"
|
||
elif depth==3:
|
||
counters["ppkt"] += 1
|
||
reset_counters("ppkt")
|
||
level_type = "ppkt"
|
||
elif depth==4:
|
||
counters["lit"] += 1
|
||
reset_counters("lit")
|
||
level_type = "lit"
|
||
elif depth==5:
|
||
counters["plit"] += 1
|
||
reset_counters("plit")
|
||
level_type = "plit"
|
||
elif depth==6:
|
||
counters["tir"] += 1
|
||
reset_counters("tir")
|
||
level_type = "tir"
|
||
elif depth==7:
|
||
counters["lev7"] += 1
|
||
reset_counters("lev7")
|
||
level_type = "lev7"
|
||
elif depth==8:
|
||
counters["lev8"] += 1
|
||
reset_counters("lev8")
|
||
level_type = "lev8"
|
||
else:
|
||
errors.append(f"[Linia {line_number}] Zbyt głębokie zagnieżdżenie (>8). Pomijam \\item.")
|
||
fixed_content.append(line)
|
||
continue
|
||
|
||
label_match = re.search(r'\\label\{(itm:[^}]+)\}', line)
|
||
if label_match:
|
||
label_str = label_match.group(1)
|
||
else:
|
||
label_str = ""
|
||
|
||
parent = get_current_parent()
|
||
if parent is not None:
|
||
new_item = {
|
||
"type": level_type,
|
||
"numbering": get_current_numbering(),
|
||
"label": label_str,
|
||
"text": line.strip(),
|
||
"children": []
|
||
}
|
||
parent["children"].append(new_item)
|
||
|
||
fixed_content.append(line)
|
||
continue
|
||
|
||
fixed_content.append(line)
|
||
|
||
# Zapisz drzewo w pliku JSON
|
||
with open(out_file, "w", encoding="utf-8") as f:
|
||
json.dump(label_tree, f, ensure_ascii=False, indent=2)
|
||
|
||
if errors:
|
||
print("== OSTRZEŻENIA / KOMUNIKATY ==")
|
||
for err in errors:
|
||
print("•", err)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
import sys
|
||
args = sys.argv[1:]
|
||
if len(args)==0:
|
||
process_labels_and_create_links()
|
||
elif len(args)==1:
|
||
process_labels_and_create_links(in_file=args[0])
|
||
elif len(args)==2:
|
||
process_labels_and_create_links(in_file=args[0], out_file=args[1])
|
||
else:
|
||
print("Użycie: python script.py [plik_wejściowy] [plik_wyjściowy]")
|
||
|