-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathparser.py
More file actions
24 lines (22 loc) · 877 Bytes
/
parser.py
File metadata and controls
24 lines (22 loc) · 877 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
import re
import pandas as pd
import json
def parse_log(filepath):
if filepath.endswith('.json'):
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
df = pd.DataFrame(data)
if 'timestamp' in df.columns:
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
return df
else:
pattern = r'(?P<ip>\d+\.\d+\.\d+\.\d+) - - \[(?P<datetime>[^\]]+)\] "(?P<method>\w+) (?P<url>[^ ]+) HTTP[^"]+" (?P<status>\d+)'
logs = []
with open(filepath, 'r', encoding='utf-8') as file:
for line in file:
match = re.search(pattern, line)
if match:
logs.append(match.groupdict())
df = pd.DataFrame(logs)
df['datetime'] = pd.to_datetime(df['datetime'], format='%d/%b/%Y:%H:%M:%S %z')
return df