2025-03-30 14:21:21 +02:00
|
|
|
|
import re
|
|
|
|
|
import json
|
|
|
|
|
|
|
|
|
|
def process_labels_and_create_links(in_file="../main.tex", out_file="../main-struct.json"):
|
|
|
|
|
"""
|
|
|
|
|
Przetwarza plik TeX, usuwa komentarze/puste linie i buduje drzewo paragrafów i list z 9 poziomami:
|
|
|
|
|
- par (poziom 0, w numeracji: '\\S1')
|
|
|
|
|
- ust (głębokość listy=1)
|
|
|
|
|
- pkt (głębokość listy=2)
|
|
|
|
|
- ppkt (głębokość listy=3)
|
|
|
|
|
- lit (głębokość listy=4)
|
|
|
|
|
- plit (głębokość listy=5)
|
|
|
|
|
- tir (głębokość listy=6)
|
|
|
|
|
- lev7 (głębokość listy=7)
|
|
|
|
|
- lev8 (głębokość listy=8)
|
|
|
|
|
Jeśli głębokość listy > 8 – ostrzeżenie i pominięcie \\item.
|
|
|
|
|
|
|
|
|
|
Zapisuje wynik w pliku JSON z polami:
|
|
|
|
|
type, numbering, label, text, children.
|
|
|
|
|
|
|
|
|
|
- type: "par" / "ust" / "pkt" / "ppkt" / "lit" / "plit" / "tir" / "lev7" / "lev8"
|
|
|
|
|
- numbering: np. '\\S1 ust. 2 pkt. 1 ppkt. 1 lit. a ...'
|
|
|
|
|
- label: jeśli w tej samej linii występuje \\label{itm:...}, to tu jest ten fragment, w p.p. ""
|
|
|
|
|
- text: linia z \\item
|
|
|
|
|
- children: (lista obiektów zagnieżdżonych)
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
with open(in_file, 'r', encoding='utf-8') as file:
|
|
|
|
|
content = file.readlines()
|
|
|
|
|
|
|
|
|
|
# -- Usunięcie komentarzy i linii pustych --
|
|
|
|
|
cleaned_content = []
|
|
|
|
|
for line in content:
|
|
|
|
|
# usuń wszystko po '%', jeśli nie poprzedza go backslash
|
|
|
|
|
line = re.sub(r'(?<!\\)%.*$', '', line).strip()
|
|
|
|
|
if line:
|
|
|
|
|
cleaned_content.append(line)
|
|
|
|
|
|
|
|
|
|
# -- Liczniki poziomów --
|
|
|
|
|
counters = {
|
|
|
|
|
"par": 0, # paragraf (\S)
|
|
|
|
|
"ust": 0, # 1-szy poziom list
|
|
|
|
|
"pkt": 0, # 2-gi
|
|
|
|
|
"ppkt": 0, # 3-ci
|
|
|
|
|
"lit": 0, # 4-ty
|
|
|
|
|
"plit": 0, # 5-ty
|
|
|
|
|
"tir": 0, # 6-ty
|
|
|
|
|
"lev7": 0, # 7-my
|
|
|
|
|
"lev8": 0 # 8-my
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
MAX_LIST_DEPTH = 8 # maksymalna głębokość zagnieżdżenia list
|
|
|
|
|
label_tree = [] # główna struktura do zapisu
|
|
|
|
|
current_env_stack = [] # stos środowisk list
|
|
|
|
|
errors = []
|
|
|
|
|
|
|
|
|
|
level_order = ["par","ust","pkt","ppkt","lit","plit","tir","lev7","lev8"]
|
|
|
|
|
|
|
|
|
|
def reset_counters(from_level):
|
|
|
|
|
"""
|
|
|
|
|
Zeruje liczniki dla poziomów "niższych" (w level_order) niż from_level.
|
|
|
|
|
Jeśli from_level=='pkt', zerujemy ppkt, lit, plit, tir, lev7, lev8.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
idx = level_order.index(from_level)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return
|
|
|
|
|
for lv in level_order[idx+1:]:
|
|
|
|
|
counters[lv] = 0
|
|
|
|
|
|
|
|
|
|
def get_current_numbering():
|
|
|
|
|
"""
|
|
|
|
|
Zwraca łańcuch numerujący, np:
|
|
|
|
|
'\\S1 ust. 2 pkt. 3 ppkt. 1 lit. a plit. a) tir. 1 lev7. 2 lev8. 1'
|
|
|
|
|
z uwzględnieniem tych liczników, które są > 0.
|
|
|
|
|
"""
|
|
|
|
|
parts = []
|
|
|
|
|
|
|
|
|
|
# par => \S + numer
|
|
|
|
|
if counters["par"] > 0:
|
|
|
|
|
parts.append(f"\\S{counters['par']}")
|
|
|
|
|
|
|
|
|
|
# ust => ust. X
|
|
|
|
|
if counters["ust"] > 0:
|
|
|
|
|
parts.append(f"ust. {counters['ust']}")
|
|
|
|
|
|
|
|
|
|
# pkt => pkt. X
|
|
|
|
|
if counters["pkt"] > 0:
|
|
|
|
|
parts.append(f"pkt. {counters['pkt']}")
|
|
|
|
|
|
|
|
|
|
# ppkt => ppkt. X
|
|
|
|
|
if counters["ppkt"] > 0:
|
2025-04-05 09:10:55 +02:00
|
|
|
|
parts.append(f"ppkt. {counters['ppkt']})")
|
2025-03-30 14:21:21 +02:00
|
|
|
|
|
|
|
|
|
# lit => lit. a, b, c...
|
|
|
|
|
if counters["lit"] > 0:
|
|
|
|
|
letter = chr(96 + counters["lit"]) # 1->a, 2->b, etc.
|
|
|
|
|
parts.append(f"lit. {letter}")
|
|
|
|
|
|
|
|
|
|
# plit => plit. a)
|
|
|
|
|
if counters["plit"] > 0:
|
|
|
|
|
letter = chr(96 + counters["plit"])
|
|
|
|
|
parts.append(f"plit. {letter})")
|
|
|
|
|
|
|
|
|
|
# tir => tir. X
|
|
|
|
|
if counters["tir"] > 0:
|
|
|
|
|
parts.append(f"tir. {counters['tir']}")
|
|
|
|
|
|
|
|
|
|
# lev7 => lev7. X
|
|
|
|
|
if counters["lev7"] > 0:
|
|
|
|
|
parts.append(f"lev7. {counters['lev7']}")
|
|
|
|
|
|
|
|
|
|
# lev8 => lev8. X
|
|
|
|
|
if counters["lev8"] > 0:
|
|
|
|
|
parts.append(f"lev8. {counters['lev8']}")
|
|
|
|
|
|
|
|
|
|
# scal całość w jeden łańcuch, oddzielając spacjami
|
|
|
|
|
return " ".join(parts)
|
|
|
|
|
|
|
|
|
|
def get_current_parent():
|
|
|
|
|
"""
|
|
|
|
|
Zwraca "bieżący" obiekt-rodzic w label_tree, do którego
|
|
|
|
|
dołączymy kolejny element (children).
|
|
|
|
|
"""
|
|
|
|
|
if not label_tree:
|
|
|
|
|
return None
|
|
|
|
|
parent = label_tree[-1]
|
|
|
|
|
# Głębokość = ile razy mamy 'list' w current_env_stack
|
|
|
|
|
depth = sum(1 for env in current_env_stack if env=="list")
|
|
|
|
|
|
|
|
|
|
# 'par' jest poziomem zerowym. Dla depth=1 => wchodzimy do children paragrafu itd.
|
|
|
|
|
for _ in range(depth-1):
|
|
|
|
|
if not parent["children"]:
|
|
|
|
|
break
|
|
|
|
|
parent = parent["children"][-1]
|
|
|
|
|
return parent
|
|
|
|
|
|
|
|
|
|
# Regexy do wykrywania \begin{longenum} itp.
|
|
|
|
|
begin_enum_regex = re.compile(r'\\begin\{(long[a-z0-9]*enum|customenum)\}')
|
|
|
|
|
end_enum_regex = re.compile(r'\\end\{(long[a-z0-9]*enum|customenum)\}')
|
|
|
|
|
|
|
|
|
|
fixed_content = []
|
|
|
|
|
|
|
|
|
|
for line_number, line in enumerate(cleaned_content, start=1):
|
|
|
|
|
|
|
|
|
|
# --- wykrywanie paragrafu \paragraf{...} ---
|
|
|
|
|
paragraf_match = re.search(r'\\paragraf\{([^}]+)\}', line)
|
|
|
|
|
if paragraf_match:
|
|
|
|
|
counters["par"] += 1
|
|
|
|
|
reset_counters("par")
|
|
|
|
|
par_title = paragraf_match.group(1).strip()
|
|
|
|
|
|
|
|
|
|
label_tree.append({
|
|
|
|
|
"type": "par",
|
|
|
|
|
"numbering": get_current_numbering(),
|
|
|
|
|
"label": "",
|
|
|
|
|
"text": line.strip(),
|
|
|
|
|
"children": []
|
|
|
|
|
})
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# --- wykrywanie \begin{...enum} ---
|
|
|
|
|
if begin_enum_regex.search(line):
|
|
|
|
|
current_env_stack.append("list")
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# --- wykrywanie \end{...enum} ---
|
|
|
|
|
if end_enum_regex.search(line):
|
|
|
|
|
if "list" in current_env_stack:
|
|
|
|
|
# Usunięcie ze stosu "list" od prawej
|
|
|
|
|
stack_rev = current_env_stack[::-1]
|
|
|
|
|
stack_rev.remove("list")
|
|
|
|
|
current_env_stack = stack_rev[::-1]
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# --- wykrywanie \item ---
|
|
|
|
|
if r'\item' in line:
|
|
|
|
|
depth = sum(env=="list" for env in current_env_stack)
|
|
|
|
|
if depth==0:
|
|
|
|
|
errors.append(f"[Linia {line_number}] \\item poza listą. Ignoruję.")
|
|
|
|
|
else:
|
|
|
|
|
if depth==1:
|
|
|
|
|
counters["ust"] += 1
|
|
|
|
|
reset_counters("ust")
|
|
|
|
|
level_type = "ust"
|
|
|
|
|
elif depth==2:
|
|
|
|
|
counters["pkt"] += 1
|
|
|
|
|
reset_counters("pkt")
|
|
|
|
|
level_type = "pkt"
|
|
|
|
|
elif depth==3:
|
|
|
|
|
counters["ppkt"] += 1
|
|
|
|
|
reset_counters("ppkt")
|
|
|
|
|
level_type = "ppkt"
|
|
|
|
|
elif depth==4:
|
|
|
|
|
counters["lit"] += 1
|
|
|
|
|
reset_counters("lit")
|
|
|
|
|
level_type = "lit"
|
|
|
|
|
elif depth==5:
|
|
|
|
|
counters["plit"] += 1
|
|
|
|
|
reset_counters("plit")
|
|
|
|
|
level_type = "plit"
|
|
|
|
|
elif depth==6:
|
|
|
|
|
counters["tir"] += 1
|
|
|
|
|
reset_counters("tir")
|
|
|
|
|
level_type = "tir"
|
|
|
|
|
elif depth==7:
|
|
|
|
|
counters["lev7"] += 1
|
|
|
|
|
reset_counters("lev7")
|
|
|
|
|
level_type = "lev7"
|
|
|
|
|
elif depth==8:
|
|
|
|
|
counters["lev8"] += 1
|
|
|
|
|
reset_counters("lev8")
|
|
|
|
|
level_type = "lev8"
|
|
|
|
|
else:
|
|
|
|
|
errors.append(f"[Linia {line_number}] Zbyt głębokie zagnieżdżenie (>8). Pomijam \\item.")
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
label_match = re.search(r'\\label\{(itm:[^}]+)\}', line)
|
|
|
|
|
if label_match:
|
|
|
|
|
label_str = label_match.group(1)
|
|
|
|
|
else:
|
|
|
|
|
label_str = ""
|
|
|
|
|
|
|
|
|
|
parent = get_current_parent()
|
|
|
|
|
if parent is not None:
|
|
|
|
|
new_item = {
|
|
|
|
|
"type": level_type,
|
|
|
|
|
"numbering": get_current_numbering(),
|
|
|
|
|
"label": label_str,
|
|
|
|
|
"text": line.strip(),
|
|
|
|
|
"children": []
|
|
|
|
|
}
|
|
|
|
|
parent["children"].append(new_item)
|
|
|
|
|
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
fixed_content.append(line)
|
|
|
|
|
|
|
|
|
|
# Zapisz drzewo w pliku JSON
|
|
|
|
|
with open(out_file, "w", encoding="utf-8") as f:
|
|
|
|
|
json.dump(label_tree, f, ensure_ascii=False, indent=2)
|
|
|
|
|
|
|
|
|
|
if errors:
|
|
|
|
|
print("== OSTRZEŻENIA / KOMUNIKATY ==")
|
|
|
|
|
for err in errors:
|
|
|
|
|
print("•", err)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
import sys
|
|
|
|
|
args = sys.argv[1:]
|
|
|
|
|
if len(args)==0:
|
|
|
|
|
process_labels_and_create_links()
|
|
|
|
|
elif len(args)==1:
|
|
|
|
|
process_labels_and_create_links(in_file=args[0])
|
|
|
|
|
elif len(args)==2:
|
|
|
|
|
process_labels_and_create_links(in_file=args[0], out_file=args[1])
|
|
|
|
|
else:
|
|
|
|
|
print("Użycie: python script.py [plik_wejściowy] [plik_wyjściowy]")
|
|
|
|
|
|