@@ -76,24 +76,63 @@ def extract(self, tarinfo: tarfile.TarInfo, extract_root: Path): # noqa: C901
7676
7777 # prevent traversal attempts through links
7878 if tarinfo .islnk () or tarinfo .issym ():
79- if Path (tarinfo .linkname ).is_absolute ():
80- self .record_problem (
81- tarinfo ,
82- "Absolute path as link target." ,
83- "Converted to extraction relative path." ,
84- )
85- tarinfo .linkname = f"{ extract_root } /{ tarinfo .linkname } "
86-
87- if not is_safe_path (
88- basedir = extract_root ,
89- path = extract_root / Path (tarinfo .name ).parent / tarinfo .linkname ,
90- ):
91- self .record_problem (
92- tarinfo ,
93- "Traversal attempt through link path." ,
94- "Skipped." ,
95- )
96- return
79+ link_target = Path (tarinfo .linkname )
80+
81+ # Check if the link is absolute and make it relative to extract_root
82+ if link_target .is_absolute ():
83+ # Strip leading '/' to make the path relative
84+ rel_target = link_target .relative_to ('/' )
85+
86+ if Path (tarinfo .linkname ).is_absolute ():
87+ self .record_problem (
88+ tarinfo ,
89+ "Absolute path as link target." ,
90+ "Converted to extraction relative path." ,
91+ )
92+ else :
93+ # Directly use the relative link target. If it points to an unsafe path, we'll
94+ # check and fix below
95+ rel_target = link_target
96+
97+ # The symlink will point to our relative target (may be updated below if unsafe)
98+ tarinfo .linkname = rel_target
99+
100+ # Resolve the link target to an absolute path
101+ resolved_path = (extract_root / tarinfo .name ).parent / rel_target
102+
103+ # If the resolved path points outside of extract_root, we need to fix it!
104+ if not is_safe_path (extract_root , resolved_path ):
105+ logger .warning ("Traversal attempt through link path." , src = tarinfo .name , dest = tarinfo .linkname , basedir = extract_root , resovled_path = resolved_path )
106+
107+ for drop_count in range (0 , len (str (rel_target ).split ('/' ))):
108+ new_path = (extract_root / tarinfo .name ).parent / Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
109+ resolved_path = os .path .abspath (new_path )
110+ if str (resolved_path ).startswith (str (extract_root )):
111+ break
112+ else :
113+ # We didn't hit the break, we couldn't resolve the path safely
114+ self .record_problem (
115+ tarinfo ,
116+ "Traversal attempt through link path." ,
117+ "Skipped." ,
118+ )
119+ return
120+
121+ # Double check that it's safe now
122+ if not is_safe_path (extract_root , resolved_path ):
123+ self .record_problem (
124+ tarinfo ,
125+ "Traversal attempt through link path." ,
126+ "Skipped." ,
127+ )
128+ return
129+
130+ # Prepend placeholder directories before rel_target to get a valid path
131+ # within extract_root. This is the relative version of resolved_path.
132+ rel_target = Path ("/" .join (["placeholder" ] * drop_count )) / rel_target
133+ tarinfo .linkname = rel_target
134+
135+ logger .debug ("Creating symlink" , points_to = resolved_path , name = tarinfo .name )
97136
98137 target_path = extract_root / tarinfo .name
99138 # directories are special: we can not set their metadata now + they might also be already existing
0 commit comments