Skip to content

Commit e4e2062

Browse files
feat: finalize GUI simulation and branch-aware analysis upgrades
1 parent 6256821 commit e4e2062

File tree

12 files changed

+2264
-547
lines changed

12 files changed

+2264
-547
lines changed

mock_survey/_check_scale_counts.py

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import re
2+
from pathlib import Path
3+
4+
import pandas as pd
5+
6+
html = Path(r"E:\QueScript\mock_survey\index.html").read_text(encoding="utf-8", errors="replace")
7+
scale_qids = re.findall(r'id="q(\d+)"[^>]*data-type="scale_radio"', html)
8+
matrix_qids = re.findall(r'id="q(\d+)"[^>]*data-type="matrix"', html)
9+
print("scale_radio_count", len(scale_qids), scale_qids)
10+
print("matrix_count", len(matrix_qids), matrix_qids)
11+
12+
df = pd.read_csv(r"E:\QueScript\mock_survey\survey_data_collected.csv")
13+
qcols = [c for c in df.columns if c.startswith("Q")]
14+
15+
expanded = []
16+
for c in qcols:
17+
s = df[c].astype(str)
18+
if s.str.contains("|", regex=False).mean() >= 0.2:
19+
t = s.str.split("|", expand=True, regex=False)
20+
t.columns = [f"{c}_r{i+1}" for i in range(t.shape[1])]
21+
expanded.append(t)
22+
else:
23+
expanded.append(pd.DataFrame({c: df[c]}))
24+
25+
num = pd.concat(expanded, axis=1)
26+
num = num.replace({"": None, "None": None, "nan": None}).apply(pd.to_numeric, errors="coerce")
27+
ratio = num.notna().mean()
28+
common = ratio[ratio >= 0.999].index.tolist()
29+
print("common_count", len(common))
30+
print("common_items", common)
31+
print("top_answer_ratio", ratio.sort_values(ascending=False).head(30).to_dict())
32+
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import json
2+
import re
3+
from pathlib import Path
4+
5+
import survey_generator as sg
6+
7+
base = Path(__file__).resolve().parent
8+
txt = base / "问卷.txt"
9+
html_path = base / "index.html"
10+
11+
data = sg.parse_survey(str(txt))
12+
sg.generate_html(data, str(html_path), survey_title=sg.extract_survey_title(str(txt)))
13+
html = html_path.read_text(encoding="utf-8", errors="replace") if html_path.exists() else ""
14+
titles = re.findall(r'<div class="question-title">\s*\d+\.\s*(.*?)</div>', html)
15+
16+
out = {
17+
"parsed_count": len(data),
18+
"html_count": len(titles),
19+
"parsed_empty_text": [i + 1 for i, q in enumerate(data) if not str(q.get("text", "")).strip()],
20+
"html_empty_titles": [i + 1 for i, t in enumerate(titles) if not str(t).strip()],
21+
"parsed_preview": [{"id": q.get("id"), "type": q.get("type"), "text": q.get("text")} for q in data[:20]],
22+
"html_preview": titles[:20],
23+
}
24+
25+
(base / "_mapping_check.json").write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
26+
print("done")

mock_survey/analysis_meta.json

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
{
2+
"run_id": "RUN_20260306_133858",
3+
"seed": 1375906085,
4+
"analysis_settings": {
5+
"scope": "coverage",
6+
"coverage_threshold": 0.6,
7+
"branch_min_sample": 10,
8+
"branch_min_items": 3
9+
},
10+
"sample_counts": {
11+
"planned": 100,
12+
"completed": 100,
13+
"included": 100
14+
},
15+
"item_counts": {
16+
"numericizable": 41,
17+
"true_scale": 18,
18+
"strict_public": 4,
19+
"selected": 4,
20+
"detected": 41
21+
},
22+
"selected_items": [
23+
"Q8",
24+
"Q30",
25+
"Q32",
26+
"Q33"
27+
],
28+
"strict_public_items": [
29+
"Q8",
30+
"Q30",
31+
"Q32",
32+
"Q33"
33+
],
34+
"coverage_ratio": {
35+
"Q1": 0.0,
36+
"Q2": 0.0,
37+
"Q3": 0.0,
38+
"Q4": 0.0,
39+
"Q5": 0.0,
40+
"Q6": 0.0,
41+
"Q7": 0.0,
42+
"Q8": 1.0,
43+
"Q9": 0.0,
44+
"Q10": 0.0,
45+
"Q11": 0.0,
46+
"Q12": 0.0,
47+
"Q13": 0.0,
48+
"Q14": 0.0,
49+
"Q15": 0.0,
50+
"Q16": 0.56,
51+
"Q17": 0.56,
52+
"Q18": 0.56,
53+
"Q19": 0.0,
54+
"Q20": 0.0,
55+
"Q21": 0.0,
56+
"Q22": 0.0,
57+
"Q23": 0.56,
58+
"Q24_r1": 0.56,
59+
"Q24_r2": 0.56,
60+
"Q24_r3": 0.56,
61+
"Q24_r4": 0.56,
62+
"Q24_r5": 0.56,
63+
"Q24_r6": 0.56,
64+
"Q24_r7": 0.56,
65+
"Q24_r8": 0.56,
66+
"Q25": 0.56,
67+
"Q26": 0.56,
68+
"Q27": 0.0,
69+
"Q28": 0.0,
70+
"Q29": 0.0,
71+
"Q30": 1.0,
72+
"Q31": 0.0,
73+
"Q32": 1.0,
74+
"Q33": 1.0,
75+
"Q34": 0.0
76+
},
77+
"branch_sections": [
78+
{
79+
"branch_index": 1,
80+
"trace": "1>2>3>4>5>6>7>8>9>10>11>12>13>14>15>16>17>18>19>20>21>22>23>24>25>26>27>28>29>30>31>32>33>34",
81+
"sample_count": 56,
82+
"item_count": 18,
83+
"strict_item_count": 18,
84+
"selected_item_count": 18,
85+
"selected_items": [
86+
"Q8",
87+
"Q16",
88+
"Q17",
89+
"Q18",
90+
"Q23",
91+
"Q24_r1",
92+
"Q24_r2",
93+
"Q24_r3",
94+
"Q24_r4",
95+
"Q24_r5",
96+
"Q24_r6",
97+
"Q24_r7",
98+
"Q24_r8",
99+
"Q25",
100+
"Q26",
101+
"Q30",
102+
"Q32",
103+
"Q33"
104+
],
105+
"alpha": 0.9776974225885819,
106+
"kmo": 3.882258564807101e-15,
107+
"cfa_available": true,
108+
"cfa_cfi": 0.9417804353593766,
109+
"cfa_rmsea": 0.09349905848262634,
110+
"n_per_item": 3.111111111111111,
111+
"exploratory_only": true,
112+
"excluded_from_overall": true
113+
},
114+
{
115+
"branch_index": 2,
116+
"trace": "1>2>3>4>5>6>7>8>9>29>30>31>32>33>34",
117+
"sample_count": 44,
118+
"item_count": 4,
119+
"strict_item_count": 4,
120+
"selected_item_count": 4,
121+
"selected_items": [
122+
"Q8",
123+
"Q30",
124+
"Q32",
125+
"Q33"
126+
],
127+
"alpha": 0.9568490060248958,
128+
"kmo": 0.8353419890103603,
129+
"cfa_available": true,
130+
"cfa_cfi": 0.9934091102222444,
131+
"cfa_rmsea": 0.12153564274746322,
132+
"n_per_item": 11.0,
133+
"exploratory_only": false,
134+
"excluded_from_overall": false
135+
}
136+
],
137+
"consistency": {
138+
"grade": "A",
139+
"reliability": "达标",
140+
"validity": "达标",
141+
"structural_grade": "B"
142+
},
143+
"target_consistency": {
144+
"grade": "A",
145+
"reliability": "达标",
146+
"validity": "达标",
147+
"structural_grade": "B"
148+
},
149+
"structural_risk": {
150+
"grade": "B",
151+
"score": 5,
152+
"reasons": [
153+
"当前全样本仅纳入4题,难代表整份量表结构。",
154+
"全样本仅覆盖真量表题的22%,分支互斥较强。",
155+
"有1个分支因 n/p<5 仅作探索性参考,不纳入总体结论。"
156+
]
157+
},
158+
"generated_at": "2026-03-06T13:42:54",
159+
"analysis_signature": "6c4f154467d581ffff62bf9daf7b8932733e8837ad9b3868dcb89ac81001ba52"
160+
}

0 commit comments

Comments
 (0)