1+ # Copyright 2022 the Regents of the University of California, Nerfstudio Team and contributors. All rights reserved.
2+ #
3+ # Licensed under the Apache License, Version 2.0 (the "License");
4+ # you may not use this file except in compliance with the License.
5+ # You may obtain a copy of the License at
6+ #
7+ # http://www.apache.org/licenses/LICENSE-2.0
8+ #
9+ # Unless required by applicable law or agreed to in writing, software
10+ # distributed under the License is distributed on an "AS IS" BASIS,
11+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+ # See the License for the specific language governing permissions and
13+ # limitations under the License.
14+
15+ """Helper utils for processing meshroom data into the nerfstudio format."""
16+
17+ import json
18+ import math
19+ from copy import deepcopy as dc
20+ from pathlib import Path
21+ from typing import Dict , List , Optional
22+
23+ import numpy as np
24+
25+ from nerfstudio .process_data .process_data_utils import CAMERA_MODELS
26+ from nerfstudio .utils .rich_utils import CONSOLE
27+
28+ # Rotation matrix to adjust coordinate system
29+ ROT_MAT = np .array ([[1 , 0 , 0 , 0 ],
30+ [0 , 0 , 1 , 0 ],
31+ [0 ,- 1 , 0 , 0 ],
32+ [0 , 0 , 0 , 1 ]])
33+
34+ def reflect (axis , size = 4 ):
35+ """Create a reflection matrix along the specified axis."""
36+ _diag = np .ones (size )
37+ _diag [axis ] = - 1
38+ refl = np .diag (_diag )
39+ return refl
40+
41+ def Mat2Nerf (mat ):
42+ """Convert a matrix to NeRF coordinate system."""
43+ M = np .array (mat )
44+ M = ((M @ reflect (2 )) @ reflect (1 ))
45+ return M
46+
47+ def closest_point_2_lines (oa , da , ob , db ):
48+ """Find the point closest to both rays of form o+t*d."""
49+ da = da / np .linalg .norm (da )
50+ db = db / np .linalg .norm (db )
51+ c = np .cross (da , db )
52+ denom = np .linalg .norm (c )** 2
53+ t = ob - oa
54+ ta = np .linalg .det ([t , db , c ]) / (denom + 1e-10 )
55+ tb = np .linalg .det ([t , da , c ]) / (denom + 1e-10 )
56+ if ta > 0 :
57+ ta = 0
58+ if tb > 0 :
59+ tb = 0
60+ return (oa + ta * da + ob + tb * db ) * 0.5 , denom
61+
62+ def central_point (out ):
63+ """Find a central point all cameras are looking at."""
64+ CONSOLE .print ("Computing center of attention..." )
65+ totw = 0.0
66+ totp = np .array ([0.0 , 0.0 , 0.0 ])
67+ for f in out ["frames" ]:
68+ mf = np .array (f ["transform_matrix" ])[0 :3 ,:]
69+ for g in out ["frames" ]:
70+ mg = np .array (g ["transform_matrix" ])[0 :3 ,:]
71+ p , w = closest_point_2_lines (mf [:,3 ], mf [:,2 ], mg [:,3 ], mg [:,2 ])
72+ if w > 0.01 :
73+ totp += p * w
74+ totw += w
75+
76+ if len (out ["frames" ]) == 0 :
77+ CONSOLE .print ("[bold red]No frames found when computing center of attention[/bold red]" )
78+ return totp
79+
80+ if (totw == 0 ) and (not totp .any ()):
81+ CONSOLE .print ("[bold red]Center of attention is zero[/bold red]" )
82+ return totp
83+
84+ totp /= totw
85+ CONSOLE .print (f"The center of attention is: { totp } " )
86+
87+ return totp
88+
89+ def build_sensor (intrinsic ):
90+ """Build camera intrinsics from Meshroom data."""
91+ out = {}
92+ out ["w" ] = float (intrinsic ['width' ])
93+ out ["h" ] = float (intrinsic ['height' ])
94+
95+ # Focal length in mm
96+ focal = float (intrinsic ['focalLength' ])
97+
98+ # Sensor width in mm
99+ sensor_width = float (intrinsic ['sensorWidth' ])
100+ sensor_height = float (intrinsic ['sensorHeight' ])
101+
102+ # Focal length in pixels
103+ out ["fl_x" ] = (out ["w" ] * focal ) / sensor_width
104+
105+ # Check W/H ratio to sensor ratio
106+ if np .isclose ((out ["w" ] / out ["h" ]), (sensor_width / sensor_height )):
107+ out ["fl_y" ] = (out ["h" ] * focal ) / sensor_height
108+ else :
109+ CONSOLE .print ("[yellow]WARNING: W/H ratio does not match sensor ratio, this is likely a bug from Meshroom. Will use fl_x to set fl_y.[/yellow]" )
110+ out ["fl_y" ] = out ["fl_x" ]
111+
112+ camera_angle_x = math .atan (out ["w" ] / (out ['fl_x' ]) * 2 ) * 2
113+ camera_angle_y = math .atan (out ["h" ] / (out ['fl_y' ]) * 2 ) * 2
114+
115+ out ["camera_angle_x" ] = camera_angle_x
116+ out ["camera_angle_y" ] = camera_angle_y
117+
118+ out ["cx" ] = float (intrinsic ['principalPoint' ][0 ]) + (out ["w" ] / 2.0 )
119+ out ["cy" ] = float (intrinsic ['principalPoint' ][1 ]) + (out ["h" ] / 2.0 )
120+
121+ if intrinsic ['type' ] == 'radial3' :
122+ for i , coef in enumerate (intrinsic ['distortionParams' ]):
123+ out [f"k{ i + 1 } " ] = float (coef )
124+
125+ return out
126+
127+ def meshroom_to_json (
128+ image_filename_map : Dict [str , Path ],
129+ json_filename : Path ,
130+ output_dir : Path ,
131+ ply_filename : Optional [Path ] = None ,
132+ verbose : bool = False ,
133+ ) -> List [str ]:
134+ """Convert Meshroom data into a nerfstudio dataset.
135+
136+ Args:
137+ image_filename_map: Mapping of original image filenames to their saved locations.
138+ json_filename: Path to the Meshroom json file.
139+ output_dir: Path to the output directory.
140+ ply_filename: Path to the exported ply file.
141+ verbose: Whether to print verbose output.
142+
143+ Returns:
144+ Summary of the conversion.
145+ """
146+ summary_log = []
147+
148+ with open (json_filename , 'r' ) as f :
149+ data = json .load (f )
150+
151+ # Create output structure
152+ out = {}
153+ out ['aabb_scale' ] = 16 # Default value
154+
155+ # Extract transforms from Meshroom data
156+ transforms = {}
157+ for pose in data .get ('poses' , []):
158+ transform = pose ['pose' ]['transform' ]
159+ rot = np .asarray (transform ['rotation' ])
160+ rot = rot .reshape (3 , 3 ).astype (float )
161+
162+ ctr = np .asarray (transform ['center' ])
163+ ctr = ctr .astype (float )
164+
165+ M = np .eye (4 )
166+ M [:3 , :3 ] = rot
167+ M [:3 , 3 ] = ctr
168+
169+ M = Mat2Nerf (M .astype (float ))
170+ transforms [pose ['poseId' ]] = np .dot (ROT_MAT , M )
171+
172+ # Extract intrinsics from Meshroom data
173+ intrinsics = {}
174+ for intrinsic in data .get ('intrinsics' , []):
175+ intrinsics [intrinsic ['intrinsicId' ]] = build_sensor (intrinsic )
176+
177+ # Set camera model based on intrinsic type
178+ if data .get ('intrinsics' ) and 'type' in data ['intrinsics' ][0 ]:
179+ intrinsic_type = data ['intrinsics' ][0 ]['type' ]
180+ if intrinsic_type in ['radial1' , 'radial3' ]:
181+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
182+ elif intrinsic_type in ['fisheye' , 'fisheye4' ]:
183+ out ["camera_model" ] = CAMERA_MODELS ["fisheye" ].value
184+ else :
185+ # Default to perspective
186+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
187+ else :
188+ out ["camera_model" ] = CAMERA_MODELS ["perspective" ].value
189+
190+ # Build frames
191+ frames = []
192+ skipped_images = 0
193+
194+ for view in data .get ('views' , []):
195+ # Get the image name from the path
196+ path = Path (view ['path' ])
197+ name = path .stem
198+
199+ # Check if the image exists in our mapping
200+ if name not in image_filename_map :
201+ if verbose :
202+ CONSOLE .print (f"[yellow]Missing image for { name } , skipping[/yellow]" )
203+ skipped_images += 1
204+ continue
205+
206+ # Get poseId and intrinsicId
207+ poseId = view ['poseId' ]
208+ intrinsicId = view ['intrinsicId' ]
209+
210+ # Check if we have the necessary data
211+ if poseId not in transforms :
212+ if verbose :
213+ CONSOLE .print (f"[yellow]PoseId { poseId } not found in transforms, skipping image: { name } [/yellow]" )
214+ skipped_images += 1
215+ continue
216+
217+ if intrinsicId not in intrinsics :
218+ if verbose :
219+ CONSOLE .print (f"[yellow]IntrinsicId { intrinsicId } not found, skipping image: { name } [/yellow]" )
220+ skipped_images += 1
221+ continue
222+
223+ # Create camera data
224+ camera = {}
225+ camera .update (dc (intrinsics [intrinsicId ]))
226+ camera ['transform_matrix' ] = transforms [poseId ]
227+ camera ['file_path' ] = image_filename_map [name ].as_posix ()
228+
229+ frames .append (camera )
230+
231+ out ['frames' ] = frames
232+
233+ # Calculate center point
234+ center = central_point (out )
235+
236+ # Adjust camera positions by centering
237+ for f in out ["frames" ]:
238+ f ["transform_matrix" ][0 :3 , 3 ] -= center
239+ f ["transform_matrix" ] = f ["transform_matrix" ].tolist ()
240+
241+ # Include point cloud if provided
242+ if ply_filename is not None :
243+ import open3d as o3d
244+
245+ # Create the applied transform
246+ applied_transform = np .eye (4 )[:3 , :]
247+ applied_transform = applied_transform [np .array ([2 , 0 , 1 ]), :]
248+ out ["applied_transform" ] = applied_transform .tolist ()
249+
250+ # Load and transform point cloud
251+ pc = o3d .io .read_point_cloud (str (ply_filename ))
252+ points3D = np .asarray (pc .points )
253+ points3D = np .einsum ("ij,bj->bi" , applied_transform [:3 , :3 ], points3D ) + applied_transform [:3 , 3 ]
254+ pc .points = o3d .utility .Vector3dVector (points3D )
255+ o3d .io .write_point_cloud (str (output_dir / "sparse_pc.ply" ), pc )
256+ out ["ply_file_path" ] = "sparse_pc.ply"
257+ summary_log .append (f"Imported { ply_filename } as starting points" )
258+
259+ # Write output
260+ with open (output_dir / "transforms.json" , "w" , encoding = "utf-8" ) as f :
261+ json .dump (out , f , indent = 4 )
262+
263+ # Add summary info
264+ if skipped_images == 1 :
265+ summary_log .append (f"{ skipped_images } image skipped due to missing camera pose or intrinsic data." )
266+ elif skipped_images > 1 :
267+ summary_log .append (f"{ skipped_images } images were skipped due to missing camera poses or intrinsic data." )
268+
269+ summary_log .append (f"Final dataset contains { len (out ['frames' ])} frames." )
270+
271+ return summary_log
0 commit comments