-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_fresh_install.py
More file actions
executable file
·379 lines (292 loc) · 11.5 KB
/
test_fresh_install.py
File metadata and controls
executable file
·379 lines (292 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
#!/usr/bin/env python3
"""
Test script for MolBox fresh installation.
This script tests basic functionality of MolBox in a fresh environment.
Run this after installing MolBox to verify the installation.
Usage:
python test_fresh_install.py
"""
import sys
import tempfile
from pathlib import Path
def test_imports():
"""Test that all core modules can be imported."""
print("=" * 70)
print("Testing imports...")
print("=" * 70)
try:
from molbox import MolBox, PropertyManager
print("✓ All core imports successful")
return True
except Exception as e:
print(f"✗ Import failed: {e}")
return False
def test_basic_save_load():
"""Test basic save and load functionality."""
print("\n" + "=" * 70)
print("Testing basic save/load...")
print("=" * 70)
try:
from molbox import MolBox
from rdkit import Chem
# Create test molecules
smiles_list = ['CCO', 'c1ccccc1', 'CC(C)O', 'CCN(CC)CC']
molecules = [Chem.MolFromSmiles(smi) for smi in smiles_list]
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test.box"
# Save molecules
MolBox.save_molecules(molecules, str(filepath))
print(f"✓ Saved {len(molecules)} molecules to {filepath.name}")
# Load molecules
loaded_mols = MolBox.load_molecules(str(filepath))
print(f"✓ Loaded {len(loaded_mols)} molecules")
# Verify
assert len(loaded_mols) == len(molecules), \
f"Expected {len(molecules)} molecules, got {len(loaded_mols)}"
assert all(mol is not None for mol in loaded_mols), \
"Some molecules failed to load"
print(f"✓ All molecules loaded successfully")
return True
except Exception as e:
print(f"✗ Save/load test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_save_with_properties():
"""Test saving molecules with properties."""
print("\n" + "=" * 70)
print("Testing save with properties...")
print("=" * 70)
try:
from molbox import MolBox
from rdkit import Chem
import pandas as pd
smiles_list = ['CCO', 'c1ccccc1', 'CC(C)O']
molecules = [Chem.MolFromSmiles(smi) for smi in smiles_list]
# Property values
energies = [1.2, 3.4, 5.6]
scores = [0.8, 0.9, 0.7]
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test_props.box"
# Save with properties
MolBox.save_molecules(
molecules,
str(filepath),
energy=energies,
score=scores
)
print(f"✓ Saved molecules with properties")
# Load metadata
df = MolBox.load_database(str(filepath))
print(f"✓ Loaded metadata: {len(df)} rows, {len(df.columns)} columns")
# Verify properties
assert 'energy' in df.columns, "Missing 'energy' column"
assert 'score' in df.columns, "Missing 'score' column"
assert len(df) == len(molecules), \
f"Expected {len(molecules)} rows, got {len(df)}"
print(f"✓ All properties present and correct")
print(f" Columns: {', '.join(df.columns[:10])}")
return True
except Exception as e:
print(f"✗ Properties test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_add_computed_property():
"""Test adding computed properties."""
print("\n" + "=" * 70)
print("Testing computed property addition...")
print("=" * 70)
try:
from molbox import MolBox
from rdkit import Chem
from rdkit.Chem import Descriptors
smiles_list = ['CCO', 'c1ccccc1', 'CC(C)O']
molecules = [Chem.MolFromSmiles(smi) for smi in smiles_list]
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test_computed.box"
# Save molecules
MolBox.save_molecules(molecules, str(filepath))
print(f"✓ Saved molecules")
# Add computed property
def calc_mol_weight(mol):
return Descriptors.MolWt(mol)
MolBox.add_property(
str(filepath),
"mol_weight",
property_function=calc_mol_weight
)
print(f"✓ Added molecular weight property")
# Verify
df = MolBox.load_database(str(filepath))
assert 'mol_weight' in df.columns, "Missing 'mol_weight' column"
assert not df['mol_weight'].isnull().any(), "Some weights are null"
print(f"✓ Computed properties added successfully")
print(f" Weights: {list(df['mol_weight'].values)}")
return True
except Exception as e:
print(f"✗ Computed property test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_coordinate_loading():
"""Test coordinate-only loading."""
print("\n" + "=" * 70)
print("Testing coordinate loading...")
print("=" * 70)
try:
from molbox import MolBox
from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
# Create molecules with 3D coordinates
smiles_list = ['CCO', 'CC(C)O']
molecules = []
for smi in smiles_list:
mol = Chem.MolFromSmiles(smi)
mol = Chem.AddHs(mol)
# Generate a few conformers
AllChem.EmbedMultipleConfs(mol, numConfs=3, randomSeed=42)
molecules.append(mol)
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test_coords.box"
# Save molecules
MolBox.save_molecules(molecules, str(filepath))
print(f"✓ Saved molecules with conformers")
# Load coordinates only
coords_list = MolBox.load_coordinates(str(filepath))
print(f"✓ Loaded coordinates for {len(coords_list)} molecules")
# Verify
assert len(coords_list) == len(molecules), \
f"Expected {len(molecules)} coordinate arrays"
for i, coords in enumerate(coords_list):
assert isinstance(coords, np.ndarray), \
f"Coordinates for molecule {i} are not ndarray"
n_confs = molecules[i].GetNumConformers()
assert coords.shape[0] == n_confs, \
f"Expected {n_confs} conformers, got {coords.shape[0]}"
print(f" Molecule {i}: {coords.shape[0]} conformers, " +
f"{coords.shape[1]} atoms, 3D coords")
print(f"✓ Coordinate loading successful")
return True
except Exception as e:
print(f"✗ Coordinate loading test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_iteration():
"""Test memory-efficient iteration."""
print("\n" + "=" * 70)
print("Testing iteration...")
print("=" * 70)
try:
from molbox import MolBox
from rdkit import Chem
smiles_list = ['CCO', 'c1ccccc1', 'CC(C)O', 'CCN(CC)CC', 'CCCC']
molecules = [Chem.MolFromSmiles(smi) for smi in smiles_list]
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test_iter.box"
# Save molecules
MolBox.save_molecules(molecules, str(filepath))
print(f"✓ Saved molecules")
# Test molecule iteration
count = 0
for idx, mol in MolBox.iterate_molecules(str(filepath)):
assert mol is not None, f"Molecule at index {idx} is None"
count += 1
assert count == len(molecules), \
f"Expected {len(molecules)} iterations, got {count}"
print(f"✓ Iterated through {count} molecules successfully")
# Test database iteration
batch_count = 0
total_rows = 0
for batch_df in MolBox.iterate_database(str(filepath), batch_size=2):
batch_count += 1
total_rows += len(batch_df)
assert total_rows == len(molecules), \
f"Expected {len(molecules)} total rows, got {total_rows}"
print(f"✓ Iterated through database in {batch_count} batches")
return True
except Exception as e:
print(f"✗ Iteration test failed: {e}")
import traceback
traceback.print_exc()
return False
def test_dataframe_input():
"""Test saving from DataFrame with SMILES."""
print("\n" + "=" * 70)
print("Testing DataFrame input...")
print("=" * 70)
try:
from molbox import MolBox
import pandas as pd
# Create test DataFrame
df = pd.DataFrame({
'smiles': ['CCO', 'c1ccccc1', 'CC(C)O'],
'energy': [1.2, 3.4, 5.6],
'name': ['ethanol', 'benzene', 'isopropanol']
})
with tempfile.TemporaryDirectory() as tmpdir:
filepath = Path(tmpdir) / "test_df.box"
# Save from DataFrame
MolBox.save_molecules(
df,
str(filepath),
smiles_column='smiles',
auto_properties=True
)
print(f"✓ Saved from DataFrame")
# Load and verify
loaded_df = MolBox.load_database(str(filepath))
assert 'energy' in loaded_df.columns, "Missing 'energy' property"
assert 'name' in loaded_df.columns, "Missing 'name' property"
assert len(loaded_df) == len(df), \
f"Expected {len(df)} rows, got {len(loaded_df)}"
print(f"✓ DataFrame input successful with auto_properties")
print(f" Properties: {', '.join([c for c in loaded_df.columns if not c.startswith('MolBox-')])}")
return True
except Exception as e:
print(f"✗ DataFrame input test failed: {e}")
import traceback
traceback.print_exc()
return False
def main():
"""Run all tests."""
print("\n" + "=" * 70)
print("MolBox Fresh Installation Test")
print("=" * 70)
tests = [
("Imports", test_imports),
("Basic Save/Load", test_basic_save_load),
("Save with Properties", test_save_with_properties),
("Computed Properties", test_add_computed_property),
("Coordinate Loading", test_coordinate_loading),
("Iteration", test_iteration),
("DataFrame Input", test_dataframe_input),
]
results = []
for name, test_func in tests:
try:
result = test_func()
results.append((name, result))
except Exception as e:
print(f"\n✗ Test '{name}' crashed: {e}")
results.append((name, False))
# Summary
print("\n" + "=" * 70)
print("Test Summary")
print("=" * 70)
passed = sum(1 for _, result in results if result)
total = len(results)
for name, result in results:
status = "✓ PASS" if result else "✗ FAIL"
print(f"{status}: {name}")
print(f"\nTotal: {passed}/{total} tests passed")
if passed == total:
print("\n🎉 All tests passed! MolBox is ready to use.")
return 0
else:
print(f"\n⚠️ {total - passed} test(s) failed. Please check the output above.")
return 1
if __name__ == "__main__":
sys.exit(main())