From cf6facf0b53c9bca9954cee823f42df42cb5418c Mon Sep 17 00:00:00 2001 From: ouyang1030 Date: Tue, 20 Jan 2026 18:46:39 +0100 Subject: [PATCH 1/2] Update databallpy code with FIFA data support --- .../event_data_parsers/__init__.py | 3 + .../event_data_parsers/fifa_parser.py | 782 ++++++++++++++++++ .../utils/_add_datetime.py | 1 + databallpy/game.py | 7 + databallpy/schemas/event_data.py | 22 +- databallpy/schemas/tracking_data.py | 99 ++- databallpy/utils/constants.py | 1 + ...3\346\215\267\346\226\271\345\274\217.lnk" | Bin 0 -> 1485 bytes databallpy/utils/get_game.py | 16 +- databallpy/visualize.py | 166 +++- 10 files changed, 1043 insertions(+), 54 deletions(-) create mode 100644 databallpy/data_parsers/event_data_parsers/fifa_parser.py create mode 100644 "databallpy/utils/databallpy - \345\277\253\346\215\267\346\226\271\345\274\217.lnk" diff --git a/databallpy/data_parsers/event_data_parsers/__init__.py b/databallpy/data_parsers/event_data_parsers/__init__.py index 349d05e9..4565effc 100644 --- a/databallpy/data_parsers/event_data_parsers/__init__.py +++ b/databallpy/data_parsers/event_data_parsers/__init__.py @@ -16,3 +16,6 @@ from databallpy.data_parsers.event_data_parsers.statsbomb_parser import ( load_statsbomb_event_data, ) +from databallpy.data_parsers.event_data_parsers.fifa_parser import ( + load_fifa_event_data, +) \ No newline at end of file diff --git a/databallpy/data_parsers/event_data_parsers/fifa_parser.py b/databallpy/data_parsers/event_data_parsers/fifa_parser.py new file mode 100644 index 00000000..6530a3a9 --- /dev/null +++ b/databallpy/data_parsers/event_data_parsers/fifa_parser.py @@ -0,0 +1,782 @@ +from ast import In +import json +import numpy as np +import pandas as pd + +from databallpy.data_parsers import Metadata +from databallpy.events import ( + IndividualCloseToBallEvent, + PassEvent, + ShotEvent, + TackleEvent, +) +from databallpy.utils.constants import MISSING_INT +from databallpy.utils.logging import logging_wrapper + + +# FIFA event type mappings to databallpy events +FIFA_TO_DATABALLPY_MAP = { + "pass": "pass", + "assist": "pass", + "cross": "pass", + "attempt_at_goal": "shot", + "own_goal": "own_goal", + "tackle": "tackle", + "no_event": "tackle", +} + +# Shot outcome mappings +SHOT_OUTCOMES = { + "incomplete": "miss_off_target", + "off_target": "miss_off_target", + "on_target": "on_target", + "complete": "goal", + "own_goal": "own_goal" +} + +# Body part mappings +BODY_PART_MAP = { + "right_foot": "right_foot", + "left_foot": "left_foot", + "head": "head", + "hands": "hands", + "body": "other", + "feet": "other", +} + +# Set piece mappings +SET_PIECE_MAP = { + "freekick": "free_kick", + "throwin": "throw_in", + "goalkick": "goal_kick", + "corner": "corner_kick", + "kickoff": "kick_off", + "penalty": "penalty", +} + +# Phase of the game mappings +PHASE_TYPE_MAP = { + "referee_action": "referee_action", + "in_possession": "in_possession", + "out_of_possession": "out_of_possession", + "in_contest": "in_contest", +} + + +@logging_wrapper(__file__) +def load_fifa_event_data( + metadata_loc: str, + events_loc: str, + pitch_dimensions: list = [105.0, 68.0] +) -> tuple[ + pd.DataFrame, Metadata, dict[str, dict[str | int, IndividualCloseToBallEvent]] +]: + """This function retrieves the metadata and event data of a FIFA match. The x + and y coordinates provided have been scaled to the dimensions of the pitch, with + (0, 0) being the center. Additionally, the coordinates have been standardized so + that the home team is represented as playing from left to right for the entire + game, and the away team is represented as playing from right to left. + + Args: + metadata_loc (str): location of the metadata JSON file. + events_loc (str): location of the events JSON file. + pitch_dimensions (list, optional): the length and width of the pitch in meters + + Returns: + Tuple[pd.DataFrame, Metadata, dict]: the event data of the game, the metadata, + and the databallpy_events. + """ + if not isinstance(metadata_loc, str): + raise TypeError(f"metadata_loc should be a string, not a {type(metadata_loc)}") + if not isinstance(events_loc, str): + raise TypeError(f"events_loc should be a string, not a {type(events_loc)}") + if not metadata_loc[-5:] == ".json": + raise ValueError( + f"metadata file should be of .json format, not {metadata_loc.split('.')[-1]}" + ) + if not events_loc[-5:] == ".json": + raise ValueError( + f"events file should be of .json format, not {events_loc.split('.')[-1]}" + ) + + metadata = _load_metadata(metadata_loc, pitch_dimensions=pitch_dimensions) + all_players = pd.concat( + [metadata.home_players, metadata.away_players], ignore_index=True + ) + kickoff_time = metadata.kickoff_utc + + event_data, databallpy_events = _load_event_data( + events_loc, + metadata.home_team_id, + metadata.away_team_id, + pitch_dimensions=pitch_dimensions, + players=all_players, + kickoff_time=kickoff_time, + ) + + home_score, away_score = _get_game_score( + event_data, + metadata.home_team_id, + metadata.away_team_id + ) + + metadata.home_score = home_score + metadata.away_score = away_score + + # Add player names to the event data dataframe + home_players = dict( + zip(metadata.home_players["id"], metadata.home_players["full_name"]) + ) + away_players = dict( + zip(metadata.away_players["id"], metadata.away_players["full_name"]) + ) + + home_mask = (event_data["team_id"] == metadata.home_team_id) & ~pd.isnull( + event_data["player_id"] + ) + away_mask = (event_data["team_id"] == metadata.away_team_id) & ~pd.isnull( + event_data["player_id"] + ) + + event_data.insert(6, "player_name", None) + event_data.loc[home_mask, "player_name"] = event_data.loc[ + home_mask, "player_id" + ].map(home_players) + event_data.loc[away_mask, "player_name"] = event_data.loc[ + away_mask, "player_id" + ].map(away_players) + event_data["player_name"] = event_data["player_name"].replace({np.nan: None}) + + # Rescale the x and y coordinates relative to the pitch dimensions + # The original dimension of the x and y coordinates range from 0 to 1 + event_data.loc[:, ["start_x"]] = ( + event_data.loc[:, ["start_x"]] * pitch_dimensions[0] + ) - (pitch_dimensions[0] / 2.0) + event_data.loc[:, ["start_y"]] = ( + event_data.loc[:, ["start_y"]] * pitch_dimensions[1] + ) - (pitch_dimensions[1] / 2.0) + + # Change direction of play of the away team so it is represented from right to left + event_data.loc[ + event_data["team_id"] == metadata.away_team_id, ["start_x", "start_y"] + ] *= -1 + + return event_data, metadata, databallpy_events + + +@logging_wrapper(__file__) +def _load_metadata(metadata_loc: str, pitch_dimensions: list) -> Metadata: + """Function to load metadata from the FIFA metadata JSON file + + Args: + metadata_loc (str): location of the metadata JSON file + pitch_dimensions (list): the length and width of the pitch in meters + + Returns: + Metadata: all metadata information of the current game + """ + with open(metadata_loc, "r", encoding="utf-8") as f: + metadata_json = json.load(f) + + # Get match information + match_id = metadata_json["match_id"] + country = metadata_json.get("country", "UNKNOWN") + + # Parse kickoff time + kickoff_time = pd.to_datetime(metadata_json["kickoff_utc"], utc=True) + + # Create periods dataframe from phases + periods = { + "period_id": [1, 2, 3, 4, 5], + "start_datetime_ed": [], + "end_datetime_ed": [], + } + + for phase in metadata_json["phases"][:2]: + start_time = kickoff_time + pd.to_timedelta(phase["phase_start"]) + end_time = kickoff_time + pd.to_timedelta(phase["phase_end"]) + periods["start_datetime_ed"].append(start_time) + periods["end_datetime_ed"].append(end_time) + + # Fill remaining periods with NaT + for _ in range(3): + periods["start_datetime_ed"].append(pd.to_datetime("NaT", utc=True)) + periods["end_datetime_ed"].append(pd.to_datetime("NaT", utc=True)) + + # Get player information + home_players = _get_player_info(metadata_json["home_team_players"]) + away_players = _get_player_info(metadata_json["away_team_players"]) + + # Get game scores + home_team_id = metadata_json["home_team_id"] + away_team_id = metadata_json["away_team_id"] + home_score, away_score = np.nan, np.nan + + # Get team formation + home_formation = metadata_json["home_formation"] + away_formation = metadata_json["away_formation"] + + metadata = Metadata( + game_id=match_id, + pitch_dimensions=pitch_dimensions, + periods_frames=pd.DataFrame(periods), + frame_rate=25, + home_team_id=home_team_id, + home_team_name=metadata_json["home_team_name"], + home_players=home_players, + home_score=home_score, + home_formation=home_formation, + away_team_id=away_team_id, + away_team_name=metadata_json["away_team_name"], + away_players=away_players, + away_score=away_score, + away_formation=away_formation, + country=country, + ) + metadata.kickoff_utc = kickoff_time + return metadata + + +def _get_player_info(players_data: list) -> pd.DataFrame: + """Function to loop over all players and save data in a pd.DataFrame + + Args: + players_data (list): for every player a dictionary with info about the player + + Returns: + pd.DataFrame: all information of the players + """ + n = len(players_data) + result_dict = { + "id": [MISSING_INT] * n, + "full_name": [""] * n, + "shirt_num": [MISSING_INT] * n, + "position": ["unspecified"] * n, + } + + for idx, player in enumerate(players_data): + result_dict["id"][idx] = player["player_id"] + result_dict["full_name"][idx] = player["player_name"] + result_dict["shirt_num"][idx] = player["player_shirt_number"] + + return pd.DataFrame(result_dict) + + +@logging_wrapper(__file__) +def _load_event_data( + events_loc: str, + home_team_id: int, + away_team_id: int, + players: pd.DataFrame, + pitch_dimensions: list = [105.0, 68.0], + kickoff_time: pd.Timestamp = None, +) -> tuple[pd.DataFrame, dict[str, dict[str | int, IndividualCloseToBallEvent]]]: + """Function to load FIFA event data from JSON file + + Args: + events_loc (str): location of the events JSON file + home_team_id (int): id of the home team + away_team_id (int): id of the away team + players (pd.DataFrame): dataframe with player information + pitch_dimensions (list, optional): dimensions of the pitch. + Defaults to [105.0, 68.0]. + kickoff_time (pd.Timestamp): kickoff time of the match + + Returns: + pd.DataFrame: all events of the game in a pd dataframe + dict: dict with "shot_events", "dribble_events", "pass_events", "other_events" + as key and a dict with the IndividualCloseToBallEvent instances + """ + offer_events = {} + shot_events = {} + pass_events = {} + other_events = {} + + # Load JSON file + events_list = [] + with open(events_loc, "r", encoding="utf-8") as f: + for line in f: + if line.strip(): + events_list.append(json.loads(line)) + + flip_first_half, flip_second_half = _determine_period_flips( + events_list, home_team_id, away_team_id + ) + + result_dict = { + "event_id": [], + "databallpy_event": [], + "period_id": [], + "minutes": [], + "seconds": [], + "player_id": [], + "team_id": [], + "is_successful": [], + "start_x": [], + "start_y": [], + "datetime": [], + "phase": [], + "original_event_id": [], + "original_event": [], + "original_event_type": [], + "outcome_additional": [], + } + + for i_event, event in enumerate(events_list): + # Skip referee actions and other non-player events + if event["team_id"] == 0 or event["from_player_id"] == 0: + continue + + result_dict["event_id"].append(i_event) + result_dict["original_event_id"].append(event["event_id"]) + result_dict["original_event_type"].append(event["event_type"]) + result_dict["phase"].append(event["category"]) + + event_name = event["event"] + result_dict["original_event"].append(event_name) + result_dict["period_id"].append(event["half_time"]) + + # Convert time from match_time_in_ms + total_seconds = event["match_time_in_ms"] / 1000.0 + minutes = int(total_seconds // 60) + seconds = total_seconds % 60 + result_dict["minutes"].append(minutes) + result_dict["seconds"].append(seconds) + + if kickoff_time is not None: + event_datetime = kickoff_time + pd.to_timedelta(total_seconds, unit='s') + else: + event_datetime = pd.NaT + result_dict["datetime"].append(event_datetime) + + result_dict["player_id"].append( + event["from_player_id"] if event["from_player_id"] != 0 else MISSING_INT + ) + result_dict["team_id"].append(event["team_id"]) + result_dict["outcome_additional"].append(event.get("outcome_additional", "")) + + # Determine success based on outcome + if event_name in ["pass", "tackle"]: + outcome = (event.get("outcome") or "").lower() + result_dict["is_successful"].append( + 1 if ("possession_complete" in outcome or "possession_won" in outcome) else 0 + ) + else: + result_dict["is_successful"].append(None) + + x_norm = event.get("x_location_start") if "x_location_start" in event else event.get("x") + y_norm = event.get("y_location_start") if "y_location_start" in event else event.get("y") + + if x_norm is None or pd.isna(x_norm): + x_norm = 0.5 + if y_norm is None or pd.isna(y_norm): + y_norm = 0.5 + + x_norm = max(0.0, min(1.0, float(x_norm))) + y_norm = max(0.0, min(1.0, float(y_norm))) + + period_id = event.get("half_time") + + if period_id == 1 and flip_first_half: + x_norm = 1.0 - x_norm + y_norm = 1.0 - y_norm + elif period_id == 2 and flip_second_half: + x_norm = 1.0 - x_norm + y_norm = 1.0 - y_norm + + result_dict["start_x"].append(x_norm) + result_dict["start_y"].append(y_norm) + + # Create databallpy event instances + if event_name in ["pass", "assist"]: + pass_events[i_event] = _make_pass_instance( + event, + home_team_id, + away_team_id, + pitch_dimensions=pitch_dimensions, + players=players, + id=i_event, + period_id=period_id, + flip_first_half=flip_first_half, + flip_second_half=flip_second_half, + ) + + if event_name in ["attempt_at_goal", "own_goal"]: + shot_events[i_event] = _make_shot_event_instance( + event, + home_team_id, + away_team_id, + pitch_dimensions=pitch_dimensions, + players=players, + id=i_event, + period_id=period_id, + flip_first_half=flip_first_half, + flip_second_half=flip_second_half, + ) + + if event_name == "tackle": + other_events[i_event] = _make_tackle_event_instance( + event, + home_team_id, + away_team_id, + pitch_dimensions=pitch_dimensions, + players=players, + id=i_event, + period_id=period_id, + flip_first_half=flip_first_half, + flip_second_half=flip_second_half, + ) + + result_dict["databallpy_event"] = [None] * len(result_dict["event_id"]) + event_data = pd.DataFrame(result_dict) + event_data["databallpy_event"] = ( + event_data["original_event"] + .map(FIFA_TO_DATABALLPY_MAP) + .replace([np.nan], [None]) + ) + + # Handle shot success + event_data.loc[ + event_data["original_event"] == "attempt_at_goal", "is_successful" + ] = event_data.loc[ + event_data["original_event"] == "attempt_at_goal" + ].apply( + lambda row: 1 if str(row.get("outcome_additional", "")).lower() == "goal" else 0, + axis=1 + ) + + # Ensure boolean dtype + event_data["is_successful"] = event_data["is_successful"].astype("boolean") + event_data.loc[event_data["period_id"] > 5, "period_id"] = -1 + event_data = event_data.drop(columns=["outcome_additional"]) + + return event_data, { + "shot_events": shot_events, + "pass_events": pass_events, + "offer_events": offer_events, + "other_events": other_events, + } + + +def _make_pass_instance( + event: dict, + home_team_id: int, + away_team_id: int, + players: pd.DataFrame, + pitch_dimensions: list = [105.0, 68.0], + id: int = None, + period_id: int = None, + flip_first_half: bool = False, + flip_second_half: bool = False, +) -> PassEvent: + """Function to create a PassEvent instance from FIFA event data""" + on_ball_info = _get_on_ball_event_info(event) + on_ball_info.update( + _get_close_to_ball_event_info( + event, pitch_dimensions, home_team_id, away_team_id, players, + id, period_id, flip_first_half, flip_second_half + ) + ) + + outcome_str = "successful" if event.get("outcome") == "possession_complete" else "unsuccessful" + + # Get pass_type + line_break = event.get("line_break_direction") + if line_break in ["around", "over", "through"]: + pass_type = "line_break" + elif line_break is not None: + pass_type = str(line_break) + else: + pass_type = "unspecified" + + # Get end coordinates + x_end_norm = event.get("x_location_end") + y_end_norm = event.get("y_location_end") + + if x_end_norm is not None and y_end_norm is not None: + if period_id == 1 and flip_first_half: + x_end_norm = 1.0 - x_end_norm + y_end_norm = 1.0 - y_end_norm + elif period_id == 2 and flip_second_half: + x_end_norm = 1.0 - x_end_norm + y_end_norm = 1.0 - y_end_norm + + x_end = (x_end_norm * pitch_dimensions[0]) - (pitch_dimensions[0] / 2.0) + y_end = (y_end_norm * pitch_dimensions[1]) - (pitch_dimensions[1] / 2.0) + + if event.get("team_id") == away_team_id: + x_end *= -1 + y_end *= -1 + else: + x_end, y_end = np.nan, np.nan + + return PassEvent( + **on_ball_info, + _xt=-1.0, + outcome_str=outcome_str, + end_x=x_end, + end_y=y_end, + pass_type=pass_type, + ) + + +def _make_shot_event_instance( + event: dict, + home_team_id: int, + away_team_id: int, + players: pd.DataFrame, + pitch_dimensions: list = [105.0, 68.0], + id: int = None, + period_id: int = None, + flip_first_half: bool = False, + flip_second_half: bool = False, +) -> ShotEvent: + """Function to create a ShotEvent instance from FIFA event data""" + on_ball_info = _get_on_ball_event_info(event) + on_ball_info.update( + _get_close_to_ball_event_info( + event, pitch_dimensions, home_team_id, away_team_id, players, + id, period_id, flip_first_half, flip_second_half + ) + ) + on_ball_info.pop("outcome") + + event_name = (event.get("event") or "").lower() + outcome_additional = (event.get("outcome_additional") or "").lower() + + if event_name == "own_goal": + shot_outcome = "own_goal" + elif "goal" in outcome_additional: + shot_outcome = "goal" + else: + shot_outcome = "miss_off_target" + + return ShotEvent( + **on_ball_info, + _xt=-1.0, + outcome=shot_outcome == "goal", + outcome_str=shot_outcome, + ) + + +def _make_tackle_event_instance( + event: dict, + home_team_id: int, + away_team_id: int, + players: pd.DataFrame, + pitch_dimensions: list = [105.0, 68.0], + id: int = None, + period_id: int = None, + flip_first_half: bool = False, + flip_second_half: bool = False, +) -> TackleEvent: + """Function to create a TackleEvent instance from FIFA event data""" + close_to_ball_info = _get_close_to_ball_event_info( + event, pitch_dimensions, home_team_id, away_team_id, players, + id, period_id, flip_first_half, flip_second_half + ) + return TackleEvent(**close_to_ball_info) + + +def _get_on_ball_event_info(event: dict) -> dict: + """Function to get the on-ball event data from the event based on + the IndividualOnBallEvent class. + + Args: + event (dict): event from FIFA data + + Returns: + dict: dictionary with body_part, set_piece, and possession_type + """ + # Get body part + body_part = BODY_PART_MAP.get(event.get("body_type", "other"), "unspecified") + + # Get set piece + origin = event.get("origin", "") + set_piece = SET_PIECE_MAP.get(origin, "no_set_piece") + + possession_type = "open_play" + + return { + "body_part": body_part, + "set_piece": set_piece, + "possession_type": possession_type, + } + + +def _get_close_to_ball_event_info( + event: dict, + pitch_dimensions: list, + home_team_id: int, + away_team_id: int, + players: pd.DataFrame, + id: int, + period_id: int, + flip_first_half: bool, + flip_second_half: bool, +) -> dict: + """Function to get the base event data from the event based on + the CloseToBallEvent class. + + Args: + event (dict): event from FIFA data + pitch_dimensions (list): pitch dimensions in x and y direction + home_team_id (int): id of the home team + away_team_id (int): id of the away team + players (pd.DataFrame): dataframe with player information + id (int): event id + period_id (int): period id + flip_first_half (bool): whether to flip first half coordinates + flip_second_half (bool): whether to flip second half coordinates + + Returns: + dict: dictionary with the base event data + """ + + x_norm = event.get("x_location_start") if "x_location_start" in event else event.get("x") + y_norm = event.get("y_location_start") if "y_location_start" in event else event.get("y") + + if x_norm is None or pd.isna(x_norm): + x_norm = 0.5 + if y_norm is None or pd.isna(y_norm): + y_norm = 0.5 + + x_norm = max(0.0, min(1.0, float(x_norm))) + y_norm = max(0.0, min(1.0, float(y_norm))) + + if period_id == 1 and flip_first_half: + x_norm = 1.0 - x_norm + y_norm = 1.0 - y_norm + elif period_id == 2 and flip_second_half: + x_norm = 1.0 - x_norm + y_norm = 1.0 - y_norm + + x_start = (x_norm * pitch_dimensions[0]) - (pitch_dimensions[0] / 2.0) + y_start = (y_norm * pitch_dimensions[1]) - (pitch_dimensions[1] / 2.0) + + if event.get("team_id") == away_team_id: + x_start *= -1 + y_start *= -1 + + # Calculate time + total_seconds = event.get("match_time_in_ms", 0) / 1000.0 + minutes = int(total_seconds // 60) + seconds = int(total_seconds % 60) + + # Get player jersey number + player_id = event.get("from_player_id") + jersey = players.loc[players["id"] == player_id, "shirt_num"].iloc[0] if len( + players[players["id"] == player_id] + ) > 0 else MISSING_INT + + return { + "start_x": x_start, + "start_y": y_start, + "related_event_id": MISSING_INT, + "event_id": id, + "period_id": period_id, + "minutes": minutes, + "seconds": seconds, + "datetime": pd.NaT, + "team_id": event.get("team_id"), + "team_side": "home" if event.get("team_id") != away_team_id else "away", + "pitch_size": pitch_dimensions, + "player_id": player_id, + "jersey": jersey, + "outcome": event.get("outcome") == "possession_complete" if "outcome" in event else True, + } + + +def _get_game_score( + events: pd.DataFrame, + home_team_id: int, + away_team_id: int +) -> tuple[int, int]: + """ + Function to extract game scores by counting goals from event data. + + Args: + events (pd.DataFrame): DataFrame of event data + home_team_id (int): ID of the home team + away_team_id (int): ID of the away team + + Returns: + tuple[int, int]: (home_score, away_score) + """ + home_score = 0 + away_score = 0 + + shot_events = events[ + events["original_event"].isin(["attempt_at_goal", "own_goal"]) + ] + + for _, row in shot_events.iterrows(): + event_name = (row.get("original_event") or "").lower() + team_id = row.get("team_id") + + is_goal = False + is_own_goal = False + + if event_name == "own_goal": + is_own_goal = True + is_goal = True + elif event_name == "attempt_at_goal": + if row.get("is_successful") == True: + is_goal = True + + if is_goal: + if is_own_goal: + # Own goal: goal counts for opponent + if team_id == home_team_id: + away_score += 1 + elif team_id == away_team_id: + home_score += 1 + else: + # Normal goal + if team_id == home_team_id: + home_score += 1 + elif team_id == away_team_id: + away_score += 1 + + return home_score, away_score + + +def _determine_period_flips( + events_list: list, + home_team_id: int, + away_team_id: int +) -> tuple[bool, bool]: + """ + Determine whether to flip coordinates for first and second half + + Args: + events_list: List of all events + home_team_id: Home team ID + away_team_id: Away team ID + + Returns: + tuple[bool, bool]: (flip_first_half, flip_second_half) + """ + flip_first_half = False + flip_second_half = False + + # Find first half kickoff event + for event in events_list: + if (event.get("half_time") == 1 and + event.get("event", "").lower() == "game_period_start"): + team_id = event.get("team_id") + side = (event.get("side") or "").lower() + + # If home team kicks off with side='r', or away team kicks off with side='l', flip first half + if (team_id == home_team_id and side == "r") or \ + (team_id == away_team_id and side == "l"): + flip_first_half = True + + # If home team kicks off with side='l', or away team kicks off with side='r', flip second half + if (team_id == home_team_id and side == "l") or \ + (team_id == away_team_id and side == "r"): + flip_second_half = True + + break + + return flip_first_half, flip_second_half \ No newline at end of file diff --git a/databallpy/data_parsers/tracking_data_parsers/utils/_add_datetime.py b/databallpy/data_parsers/tracking_data_parsers/utils/_add_datetime.py index 9beae849..db6d2e09 100644 --- a/databallpy/data_parsers/tracking_data_parsers/utils/_add_datetime.py +++ b/databallpy/data_parsers/tracking_data_parsers/utils/_add_datetime.py @@ -24,6 +24,7 @@ def _add_datetime( frames_minutes = frames_time.hour * 60 + frames_time.minute dt_start_game_minutes = dt_start_game.hour * 60 + dt_start_game.minute + frames = frames % (frame_rate * 60 * 60 * 24) # if diff in minutes < 10, assume it is a timestamp if abs(dt_start_game_minutes - frames_minutes) <= 10: date = dt_start_game.date() diff --git a/databallpy/game.py b/databallpy/game.py index f7f31f8b..dee33a96 100644 --- a/databallpy/game.py +++ b/databallpy/game.py @@ -520,6 +520,13 @@ def synchronise_tracking_and_event_data( LOGGER.error(message) raise DataBallPyError(message) + for col in ["sync_certainty", "databallpy_event", "event_id"]: + if col in self.tracking_data.columns: + self.tracking_data.drop(columns=[col], inplace=True) + for col in ["tracking_frame", "sync_certainty"]: + if col in self.event_data.columns: + self.event_data.drop(columns=[col], inplace=True) + self.tracking_data = pre_compute_synchronisation_variables( self.tracking_data, self.tracking_data.frame_rate, self.pitch_dimensions ) diff --git a/databallpy/schemas/event_data.py b/databallpy/schemas/event_data.py index b72bf103..a3c7a6bb 100644 --- a/databallpy/schemas/event_data.py +++ b/databallpy/schemas/event_data.py @@ -20,9 +20,25 @@ class EventDataSchema(pa.DataFrameModel): is_successful: pa.typing.Series[pd.BooleanDtype] = pa.Field(nullable=True) start_x: pa.typing.Series[float] = pa.Field(ge=-60, le=60, nullable=True) start_y: pa.typing.Series[float] = pa.Field(ge=-45, le=45, nullable=True) - datetime: pa.typing.Series[pd.Timestamp] = pa.Field( - ge=pd.Timestamp("1975-01-01"), le=pd.Timestamp.now(), coerce=True - ) + + datetime: pa.typing.Series[object] = pa.Field(nullable=True, coerce=True) + + @pa.check("datetime") + def is_timestamp(self, series: pa.typing.Series[object]) -> bool: + return series.dropna().apply(lambda x: isinstance(x, pd.Timestamp)).all() + + @pa.check("datetime") + def after_1975(self, series: pa.typing.Series[object]) -> bool: + return ( + series.dropna() + .apply(lambda x: x >= pd.Timestamp("1975-01-01", tz=x.tzinfo)) + .all() + ) + + @pa.check("datetime") + def before_now(self, series: pa.typing.Series[object]) -> bool: + return series.dropna().apply(lambda x: x <= pd.Timestamp.now(tz=x.tzinfo)).all() + original_event_id: pa.typing.Series[object] = pa.Field(coerce=True) original_event: pa.typing.Series[str] = pa.Field(nullable=True) diff --git a/databallpy/schemas/tracking_data.py b/databallpy/schemas/tracking_data.py index 4e8e5228..69372937 100644 --- a/databallpy/schemas/tracking_data.py +++ b/databallpy/schemas/tracking_data.py @@ -109,9 +109,24 @@ def check_all_locations(df): class TrackingDataSchema(pa.DataFrameModel): frame: pa.typing.Series[int] = pa.Field(unique=True) - datetime: pa.typing.Series[pd.Timestamp] = pa.Field( - ge=pd.Timestamp("1975-01-01"), le=pd.Timestamp.now(), coerce=True, nullable=True - ) + datetime: pa.typing.Series[object] = pa.Field(nullable=True, coerce=True) + + @pa.check("datetime") + def is_timestamp(self, series: pa.typing.Series[object]) -> bool: + return series.dropna().apply(lambda x: isinstance(x, pd.Timestamp)).all() + + @pa.check("datetime") + def after_1975(self, series: pa.typing.Series[object]) -> bool: + return ( + series.dropna() + .apply(lambda x: x >= pd.Timestamp("1975-01-01", tz=x.tzinfo)) + .all() + ) + + @pa.check("datetime") + def before_now(self, series: pa.typing.Series[object]) -> bool: + return series.dropna().apply(lambda x: x <= pd.Timestamp.now(tz=x.tzinfo)).all() + ball_x: pa.typing.Series[float] = pa.Field(ge=-62.5, le=62.5, nullable=True) ball_y: pa.typing.Series[float] = pa.Field(ge=-45, le=45, nullable=True) ball_z: pa.typing.Series[float] = pa.Field(ge=-5, le=45, nullable=True) @@ -814,6 +829,84 @@ def add_team_possession( last_team = "home" if current_team_id == home_team_id else "away" self.loc[start_idx:, "team_possession"] = last_team + def add_dangerous_accessible_space( + self, mask: pd.Series = None, **kwargs + ) -> None | pd.DataFrame: + """Function to add a column 'dangerous_accessible_space' to the tracking data, + indicating the accessible space weighted by the expected value (measured by xG) of the respective location. + + Warning: Can be expensive, only use for frames that are needed. + + SOURCE: + Jonas Bischofberger, Arnold Baca. Dangerous Accessible Space: A Unified Model of Space and Value in Team Sports, + 21 August 2025, PREPRINT (Version 1) available at Research Square [https://doi.org/10.21203/rs.3.rs-6932689/v1] + + Args: + mask (Series): Boolean filter to calculate fewer values. + + Returns: + None + """ + try: + import accessible_space + except ImportError: + raise ImportError( + "This function requires the accessible-space package. Please run `pip install 'accessible-space>=2.0.13'` " + "Or install databallpy using `pip install 'databallpy[accessible-space]'`" + ) + + mask = pd.Series(True, index=self.index) if mask is None else mask + + col_ids = [ + x[:-2] for x in self.columns if x.endswith("_x") and not x.startswith("ball") + ] + if not all([f"{col_id}_vx" in self.columns.to_list() for col_id in col_ids]): + raise ValueError( + "To dangerous accessible space you need to add velocities of all players. Try using the" + " game.tracking_data.add_velocity method to do so." + ) + if "player_possession" not in self.columns.to_list(): + raise ValueError( + "To dangerous accessible space you need to add the inidividual player possession column. Try using the" + " game.tracking_data.add_individual_player_possession method to do so." + ) + + self["team_in_possession"] = ( + self["player_possession"] + .str.startswith("home") + .map({True: "home", False: "away"}) + ) + + td_long = self[mask].to_long_format() + td_long["team"] = td_long["column_id"].str[:4] + + res = accessible_space.interface.get_dangerous_accessible_space( + td_long, + frame_col="frame", + player_col="column_id", + team_col="team", + x_col="x", + y_col="y", + vx_col="vx", + vy_col="vy", + team_in_possession_col="team_in_possession", + period_col="period_id", + player_in_possession_col="player_possession", + ball_player_id="ball", + **kwargs, + ) + + td_long.loc[ + ~pd.isnull(td_long["team_in_possession"]), "dangerous_accessible_space" + ] = res.das + del res + + td_long = td_long[["frame", "dangerous_accessible_space"]].drop_duplicates() + self["dangerous_accessible_space"] = self.merge( + td_long, on="frame", how="left", validate="one_to_one" + )["dangerous_accessible_space"] + self.drop(columns="team_in_possession", inplace=True) + def to_long_format(self) -> pd.DataFrame: """Function that moves from the base format, with a row for every frame, to a long format, with a row for every frame/column_id combination diff --git a/databallpy/utils/constants.py b/databallpy/utils/constants.py index be4b735a..2df7a66c 100644 --- a/databallpy/utils/constants.py +++ b/databallpy/utils/constants.py @@ -74,6 +74,7 @@ "flick_on", "pull_back", "switch_off_play", + "line_break", "unspecified", ] """The pass type strings that are supported in databallpy.""" diff --git "a/databallpy/utils/databallpy - \345\277\253\346\215\267\346\226\271\345\274\217.lnk" "b/databallpy/utils/databallpy - \345\277\253\346\215\267\346\226\271\345\274\217.lnk" new file mode 100644 index 0000000000000000000000000000000000000000..a39d0b691944f773945b153c4c3ffc049cfbd005 GIT binary patch literal 1485 zcmbtUZAg<*6n?ghR+7zJ%c4?>#IlVU2s$vfHT)>?1C>5}E8l&(wzik{CfSduA5oDh z8UBjI#InMWlJZ9%4Ej;@YZ!=zLQua9F$p9huygi0aiYcOzMONZv_CA zPE8z$#5hWBKm#qwUw>R~^H+_l(hM~itQlABDr}9dV~2FEQuG)2W3>)h)t{@P*^_3U zwr5m>Jd^3J8OBQ;dHRp;+ZReY{T-6|gJvLyY;!REW;5C9>-R;@PK>JF$-gUIwKz0Y4!+<#{R5zhEgUp~?f) z6VWOnV?o5m@4zln_U&dt=|CagGy>OH;L-?`Q6V80f{Sb%Vjv)qLp6-_H7cmxM``8} z5G6Sz8TRKF%HAT62|eQMqrv*I(mS?a1>^5MQ?JyA%d200Pw!IY`V01Ur~K=DP6f&h zx^EG7er|s~2ag8_!WSZGMG+nw?dw)jV*xL22pEcI81_zW%w0yOcdTd}t z3$s*bJO7lnkeI@2wr1g~d$zLZn4+D=2Nt4*Ulz&th&YZ1r=7XQox|zRM-ZtR%Zy&3 z{T)OxBs0zbdv)P2sTEedB~V1ZIl!m-GxjGprLw*dCf@BfK9Ue*hXT6X5^= literal 0 HcmV?d00001 diff --git a/databallpy/utils/get_game.py b/databallpy/utils/get_game.py index c6882a5e..ad6c5a4c 100644 --- a/databallpy/utils/get_game.py +++ b/databallpy/utils/get_game.py @@ -15,6 +15,7 @@ load_sportec_event_data, load_sportec_open_event_data, load_statsbomb_event_data, + load_fifa_event_data, ) from databallpy.data_parsers.kloppy_parsers import ( convert_kloppy_event_dataset, @@ -175,7 +176,8 @@ def get_game( event_match_loc=event_match_loc, event_lineup_loc=event_lineup_loc, ) - EventDataSchema.validate(event_data) + if _check_game_class_: + EventDataSchema.validate(event_data) uses_event_data = True event_precise_timestamps = { @@ -186,6 +188,7 @@ def get_game( "sportec": True, "dfl": True, "statsbomb": False, + "fifa":True, } LOGGER.info( @@ -202,8 +205,8 @@ def get_game( ) if not uses_event_data: databallpy_events = {} - - TrackingDataSchema.validate(tracking_data) + if _check_game_class_: + TrackingDataSchema.validate(tracking_data) uses_tracking_data = True if not uses_event_data and not uses_tracking_data: @@ -480,6 +483,7 @@ def load_event_data( "statsbomb", "sportec", "dfl", + "fifa" ]: raise ValueError( f"We do not support '{event_data_provider}' as event data provider yet, " @@ -510,6 +514,11 @@ def load_event_data( match_loc=event_match_loc, lineup_loc=event_lineup_loc, ) + elif event_data_provider == "fifa": + event_data, event_metadata, databallpy_events = load_fifa_event_data( + metadata_loc=event_metadata_loc, + events_loc=event_data_loc + ) elif event_data_provider in ["sportec", "dfl"]: event_data, event_metadata, databallpy_events = load_sportec_event_data( event_data_loc=event_data_loc, metadata_loc=event_metadata_loc @@ -619,6 +628,7 @@ def get_open_game( _periods_changed_playing_direction=(metadata.periods_changed_playing_direction), ) + print(f"saving game at: {save_path}") game.save_game(save_path, verbose=False, allow_overwrite=True) return game diff --git a/databallpy/visualize.py b/databallpy/visualize.py index bd07654d..99b897cf 100644 --- a/databallpy/visualize.py +++ b/databallpy/visualize.py @@ -7,7 +7,7 @@ import matplotlib.pyplot as plt import numpy as np import pandas as pd -from matplotlib.colors import Colormap +from matplotlib.colors import Colormap, to_rgb from tqdm import tqdm from databallpy.game import Game @@ -66,12 +66,7 @@ def plot_soccer_pitch( # Set pitch and line colors ax.set_facecolor(pitch_color) - if pitch_color not in ["white", "w"]: - lc = "whitesmoke" # line color - pc = "w" # 'spot' colors - else: - lc = "k" - pc = "k" + pc = lc = pick_bw_for_contrast(to_rgb(pitch_color)) # All dimensions in meters border_dimen = (3, 3) # include a border arround of the field of width 3m @@ -133,7 +128,7 @@ def plot_soccer_pitch( ax.plot( [s * half_pitch_length, s * half_pitch_length], [-goal_line_width / 2.0, goal_line_width / 2.0], - pc + "s", + pc, markersize=6 * markersize / 20.0, linewidth=linewidth, zorder=zorder - 1, @@ -321,7 +316,7 @@ def plot_events( event_data = event_data.loc[mask] if len(event_data) == 0: - print("No events could be found that game yourrequirements, please try again.") + print("No events could be found that game your requirements, please try again.") return None, None if fig is None and ax is None: @@ -337,14 +332,16 @@ def plot_events( fontsize=14, c=team_colors[0], zorder=2.5, + ha="left", ) ax.text( - game.pitch_dimensions[0] / 2.0 - 15, + game.pitch_dimensions[0] / 2.0 - 2, game.pitch_dimensions[1] / 2.0 + 1.0, game.away_team_name, fontsize=14, c=team_colors[1], zorder=2.5, + ha="right", ) # Check if color_by_col is specified and is a valid column name @@ -455,12 +452,18 @@ def plot_tracking_data( heatmap_overlay=heatmap_overlay, cmap=overlay_cmap, ) - + pitch_color = "mediumseagreen" if fig is None and ax is None: - fig, ax = plot_soccer_pitch(field_dimen=game.pitch_dimensions) + if heatmap_overlay is not None: + pitch_color = plt.get_cmap(overlay_cmap)(0) + fig, ax = plot_soccer_pitch( + field_dimen=game.pitch_dimensions, pitch_color=pitch_color + ) if title: ax.set_title(title) + contrasting_color = pick_bw_for_contrast(to_rgb(pitch_color)) + # Set game name ax.text( game.pitch_dimensions[0] / -2.0 + 2, @@ -469,14 +472,16 @@ def plot_tracking_data( fontsize=14, c=team_colors[0], zorder=2.5, + ha="left", ) ax.text( - game.pitch_dimensions[0] / 2.0 - 15, + game.pitch_dimensions[0] / 2.0 - 2, game.pitch_dimensions[1] / 2.0 + 1.0, game.away_team_name, fontsize=14, c=team_colors[1], zorder=2.5, + ha="right", ) if heatmap_overlay is not None: @@ -485,20 +490,50 @@ def plot_tracking_data( if add_velocities: _, ax = _plot_velocities(ax, td, idx, game, []) - _, ax = _plot_single_frame(ax, td_ht, td_at, idx, team_colors, [], td, game) + _, ax = _plot_single_frame( + ax, td_ht, td_at, idx, team_colors, [], td, game, pitch_color + ) if variable_of_interest is not None: - _, ax = _plot_variable_of_interest(ax, variable_of_interest, [], game) + _, ax = _plot_variable_of_interest( + ax, variable_of_interest, [], game, contrasting_color + ) if add_player_possession: column_id = game.tracking_data.loc[idx, "player_possession"] _, ax = _plot_player_possession(ax, column_id, idx, game, []) if len(events) > 0 and td.loc[idx, "databallpy_event"] in events: - _, ax = _plot_events(ax, td, idx, game, []) + _, ax = _plot_events(ax, td, idx, game, [], contrasting_color) return fig, ax +def pick_bw_for_contrast(rgb): + """ + Determines whether black or white text provides better contrast on a given background color. + + Parameters + ---------- + rgb : tuple or list of float + The background color as a tuple or list of three or four floats (RGB or RGBA), + where each value is in the range [0, 1]. + + Returns + ------- + str + "black" if black text provides better contrast, "white" otherwise. + + Notes + ----- + Uses a relative luminance calculation with gamma correction: + luminance = 0.2126 * (R ** 2.2) + 0.7152 * (G ** 2.2) + 0.0722 * (B ** 2.2) + If luminance > 0.5, returns "black"; otherwise, returns "white". + """ + r, g, b = rgb[:3] + res = 0.2126 * (r**2.2) + 0.7152 * (g**2.2) + 0.0722 * (b**2.2) + return "black" if res > 0.5 else "white" + + @logging_wrapper(__file__) @requires_ffmpeg def save_tracking_video( @@ -580,30 +615,40 @@ def save_tracking_video( ) video_loc = f"{save_folder}/{title}.mp4" - pitch_color = "white" if heatmap_overlay is not None else "mediumseagreen" + pitch_color = ( + plt.get_cmap(overlay_cmap)(0) + if heatmap_overlay is not None + else "mediumseagreen" + ) fig, ax = plot_soccer_pitch( field_dimen=game.pitch_dimensions, pitch_color=pitch_color ) - # Set game name, non variable over time + # Set game name ax.text( game.pitch_dimensions[0] / -2.0 + 2, game.pitch_dimensions[1] / 2.0 + 1.0, game.home_team_name, fontsize=14, - color=team_colors[0], + c=team_colors[0], + zorder=2.5, + ha="left", ) ax.text( - game.pitch_dimensions[0] / 2.0 - 15, + game.pitch_dimensions[0] / 2.0 - 2, game.pitch_dimensions[1] / 2.0 + 1.0, game.away_team_name, fontsize=14, - color=team_colors[1], + c=team_colors[1], + zorder=2.5, + ha="right", ) - indexes = ( td.index if not verbose else tqdm(td.index, desc="Making game clip", leave=False) ) + + contrasting_color = pick_bw_for_contrast(to_rgb(pitch_color)) + # Generate movie with variable info with writer.saving(fig, video_loc, dpi=300): for idx_loc, idx in enumerate(indexes): @@ -624,7 +669,15 @@ def save_tracking_video( ) variable_fig_objs, ax = _plot_single_frame( - ax, td_ht, td_at, idx, team_colors, variable_fig_objs, td, game + ax, + td_ht, + td_at, + idx, + team_colors, + variable_fig_objs, + td, + game, + pitch_color, ) if variable_of_interest is not None: @@ -634,7 +687,7 @@ def save_tracking_video( else variable_of_interest[idx_loc] ) variable_fig_objs, ax = _plot_variable_of_interest( - ax, value, variable_fig_objs, game + ax, value, variable_fig_objs, game, contrasting_color ) if add_player_possession: @@ -645,7 +698,7 @@ def save_tracking_video( if len(events) > 0 and td.loc[idx, "databallpy_event"] in events: variable_fig_objs, ax = _plot_events( - ax, td, idx, game, variable_fig_objs + ax, td, idx, game, variable_fig_objs, contrasting_color ) # 'pause' the clip for 1 second on this event @@ -758,8 +811,8 @@ def _plot_heatmap_overlay( ], origin="lower", cmap=cmap, - alpha=0.5, zorder=-5, + interpolation="bilinear", ) variable_fig_objs.append(fig_obj) @@ -809,6 +862,7 @@ def _plot_single_frame( variable_fig_objs: list, td: pd.DataFrame, game: Game, + pitch_color, ) -> tuple[list, plt.axes]: """Helper function to plot the single frame of the current frame.""" # Scatter plot the teams @@ -836,13 +890,15 @@ def _plot_single_frame( td_team[y] - 0.5, x.split("_")[1], # player number fontsize=9, - c="white", + c=pick_bw_for_contrast(to_rgb(c)), zorder=3.0, ) variable_fig_objs.append(fig_obj) # Plot the ball - fig_obj = ax.scatter(td.loc[idx, "ball_x"], td.loc[idx, "ball_y"], c="black") + fig_obj = ax.scatter( + td.loc[idx, "ball_x"], td.loc[idx, "ball_y"], c="black", zorder=4 + ) variable_fig_objs.append(fig_obj) # Add time info @@ -850,7 +906,7 @@ def _plot_single_frame( -20.5, game.pitch_dimensions[1] / 2.0 + 1.0, td.loc[idx, "gametime_td"], - c="k", + c=pick_bw_for_contrast(to_rgb(pitch_color)), fontsize=14, ) variable_fig_objs.append(fig_obj) @@ -863,6 +919,7 @@ def _plot_variable_of_interest( value: any, variable_fig_objs: list, game: Game, + c: str | tuple, ) -> tuple[list, plt.axes]: """Helper function to plot the variable of interest of the current frame.""" fig_obj = ax.text( @@ -870,6 +927,7 @@ def _plot_variable_of_interest( game.pitch_dimensions[1] / 2.0 + 1.0, str(value), fontsize=14, + color=c, ) variable_fig_objs.append(fig_obj) @@ -883,23 +941,32 @@ def _plot_player_possession( if pd.isnull(column_id) or pd.isnull(game.tracking_data.loc[idx, f"{column_id}_x"]): return variable_fig_objs, ax - circle = plt.Circle( - ( - game.tracking_data.loc[idx, f"{column_id}_x"], - game.tracking_data.loc[idx, f"{column_id}_y"], - ), - radius=1, - color="gold", - fill=False, - ) - fig_obj = ax.add_artist(circle) - variable_fig_objs.append(fig_obj) + for r in range(15): + radius = r / 8 + circle = plt.Circle( + ( + game.tracking_data.loc[idx, f"{column_id}_x"], + game.tracking_data.loc[idx, f"{column_id}_y"], + ), + radius=radius, + color="white", + fill=True, + alpha=0.07, + zorder=2, + ) + fig_obj = ax.add_artist(circle) + variable_fig_objs.append(fig_obj) return variable_fig_objs, ax def _plot_events( - ax: plt.axes, td: pd.DataFrame, idx: int, game: Game, variable_fig_objs: list + ax: plt.axes, + td: pd.DataFrame, + idx: int, + game: Game, + variable_fig_objs: list, + c: str | tuple, ) -> tuple[list, plt.axes]: """Helper function to plot the events of the current frame.""" event = ( @@ -907,13 +974,22 @@ def _plot_events( ) player_name = event["player_name"] event_name = event["databallpy_event"] + text = f"{event_name}: {player_name}" + if len(text) > 30: + target = len(text) // 2 # roughly the middle + spaces = [i for i, c in enumerate(text) if c == " "] + split_index = min(spaces, key=lambda x: abs(x - target)) + text = text[:split_index] + "\n" + text[split_index + 1 :] # Add event text fig_obj = ax.text( - 5, - game.pitch_dimensions[1] / 2.0 + 1, - f"{player_name}: {event_name}", - fontsize=14, + 1, + game.pitch_dimensions[1] / 2.0, + text, + fontsize=8, + color=c, + ha="left", + va="bottom", ) variable_fig_objs.append(fig_obj) From 892d568102ee7072004503fdd1817938402f2b5b Mon Sep 17 00:00:00 2001 From: Alek050 Date: Wed, 28 Jan 2026 13:43:19 +0100 Subject: [PATCH 2/2] fixed linters --- .../event_data_parsers/__init__.py | 6 +- .../event_data_parsers/fifa_parser.py | 216 ++++++++++-------- databallpy/utils/get_game.py | 9 +- 3 files changed, 131 insertions(+), 100 deletions(-) diff --git a/databallpy/data_parsers/event_data_parsers/__init__.py b/databallpy/data_parsers/event_data_parsers/__init__.py index 4565effc..4e15f3b9 100644 --- a/databallpy/data_parsers/event_data_parsers/__init__.py +++ b/databallpy/data_parsers/event_data_parsers/__init__.py @@ -1,3 +1,6 @@ +from databallpy.data_parsers.event_data_parsers.fifa_parser import ( + load_fifa_event_data, +) from databallpy.data_parsers.event_data_parsers.instat_parser import ( load_instat_event_data, ) @@ -16,6 +19,3 @@ from databallpy.data_parsers.event_data_parsers.statsbomb_parser import ( load_statsbomb_event_data, ) -from databallpy.data_parsers.event_data_parsers.fifa_parser import ( - load_fifa_event_data, -) \ No newline at end of file diff --git a/databallpy/data_parsers/event_data_parsers/fifa_parser.py b/databallpy/data_parsers/event_data_parsers/fifa_parser.py index 6530a3a9..d4e58272 100644 --- a/databallpy/data_parsers/event_data_parsers/fifa_parser.py +++ b/databallpy/data_parsers/event_data_parsers/fifa_parser.py @@ -1,5 +1,5 @@ -from ast import In import json + import numpy as np import pandas as pd @@ -13,7 +13,6 @@ from databallpy.utils.constants import MISSING_INT from databallpy.utils.logging import logging_wrapper - # FIFA event type mappings to databallpy events FIFA_TO_DATABALLPY_MAP = { "pass": "pass", @@ -31,7 +30,7 @@ "off_target": "miss_off_target", "on_target": "on_target", "complete": "goal", - "own_goal": "own_goal" + "own_goal": "own_goal", } # Body part mappings @@ -65,9 +64,7 @@ @logging_wrapper(__file__) def load_fifa_event_data( - metadata_loc: str, - events_loc: str, - pitch_dimensions: list = [105.0, 68.0] + metadata_loc: str, events_loc: str, pitch_dimensions: list = [105.0, 68.0] ) -> tuple[ pd.DataFrame, Metadata, dict[str, dict[str | int, IndividualCloseToBallEvent]] ]: @@ -115,9 +112,7 @@ def load_fifa_event_data( ) home_score, away_score = _get_game_score( - event_data, - metadata.home_team_id, - metadata.away_team_id + event_data, metadata.home_team_id, metadata.away_team_id ) metadata.home_score = home_score @@ -181,23 +176,23 @@ def _load_metadata(metadata_loc: str, pitch_dimensions: list) -> Metadata: # Get match information match_id = metadata_json["match_id"] country = metadata_json.get("country", "UNKNOWN") - + # Parse kickoff time kickoff_time = pd.to_datetime(metadata_json["kickoff_utc"], utc=True) - + # Create periods dataframe from phases periods = { "period_id": [1, 2, 3, 4, 5], "start_datetime_ed": [], "end_datetime_ed": [], } - + for phase in metadata_json["phases"][:2]: start_time = kickoff_time + pd.to_timedelta(phase["phase_start"]) end_time = kickoff_time + pd.to_timedelta(phase["phase_end"]) periods["start_datetime_ed"].append(start_time) periods["end_datetime_ed"].append(end_time) - + # Fill remaining periods with NaT for _ in range(3): periods["start_datetime_ed"].append(pd.to_datetime("NaT", utc=True)) @@ -215,7 +210,7 @@ def _load_metadata(metadata_loc: str, pitch_dimensions: list) -> Metadata: # Get team formation home_formation = metadata_json["home_formation"] away_formation = metadata_json["away_formation"] - + metadata = Metadata( game_id=match_id, pitch_dimensions=pitch_dimensions, @@ -224,12 +219,12 @@ def _load_metadata(metadata_loc: str, pitch_dimensions: list) -> Metadata: home_team_id=home_team_id, home_team_name=metadata_json["home_team_name"], home_players=home_players, - home_score=home_score, + home_score=home_score, home_formation=home_formation, away_team_id=away_team_id, away_team_name=metadata_json["away_team_name"], away_players=away_players, - away_score=away_score, + away_score=away_score, away_formation=away_formation, country=country, ) @@ -319,7 +314,7 @@ def _load_event_data( "original_event_id": [], "original_event": [], "original_event_type": [], - "outcome_additional": [], + "outcome_additional": [], } for i_event, event in enumerate(events_list): @@ -335,48 +330,58 @@ def _load_event_data( event_name = event["event"] result_dict["original_event"].append(event_name) result_dict["period_id"].append(event["half_time"]) - + # Convert time from match_time_in_ms total_seconds = event["match_time_in_ms"] / 1000.0 minutes = int(total_seconds // 60) seconds = total_seconds % 60 result_dict["minutes"].append(minutes) result_dict["seconds"].append(seconds) - + if kickoff_time is not None: - event_datetime = kickoff_time + pd.to_timedelta(total_seconds, unit='s') + event_datetime = kickoff_time + pd.to_timedelta(total_seconds, unit="s") else: event_datetime = pd.NaT result_dict["datetime"].append(event_datetime) - + result_dict["player_id"].append( event["from_player_id"] if event["from_player_id"] != 0 else MISSING_INT ) result_dict["team_id"].append(event["team_id"]) result_dict["outcome_additional"].append(event.get("outcome_additional", "")) - + # Determine success based on outcome if event_name in ["pass", "tackle"]: outcome = (event.get("outcome") or "").lower() result_dict["is_successful"].append( - 1 if ("possession_complete" in outcome or "possession_won" in outcome) else 0 + 1 + if ("possession_complete" in outcome or "possession_won" in outcome) + else 0 ) else: result_dict["is_successful"].append(None) - x_norm = event.get("x_location_start") if "x_location_start" in event else event.get("x") - y_norm = event.get("y_location_start") if "y_location_start" in event else event.get("y") - + x_norm = ( + event.get("x_location_start") + if "x_location_start" in event + else event.get("x") + ) + y_norm = ( + event.get("y_location_start") + if "y_location_start" in event + else event.get("y") + ) + if x_norm is None or pd.isna(x_norm): x_norm = 0.5 if y_norm is None or pd.isna(y_norm): y_norm = 0.5 - + x_norm = max(0.0, min(1.0, float(x_norm))) y_norm = max(0.0, min(1.0, float(y_norm))) period_id = event.get("half_time") - + if period_id == 1 and flip_first_half: x_norm = 1.0 - x_norm y_norm = 1.0 - y_norm @@ -438,18 +443,16 @@ def _load_event_data( # Handle shot success event_data.loc[ event_data["original_event"] == "attempt_at_goal", "is_successful" - ] = event_data.loc[ - event_data["original_event"] == "attempt_at_goal" - ].apply( + ] = event_data.loc[event_data["original_event"] == "attempt_at_goal"].apply( lambda row: 1 if str(row.get("outcome_additional", "")).lower() == "goal" else 0, - axis=1 + axis=1, ) # Ensure boolean dtype event_data["is_successful"] = event_data["is_successful"].astype("boolean") event_data.loc[event_data["period_id"] > 5, "period_id"] = -1 - event_data = event_data.drop(columns=["outcome_additional"]) - + event_data = event_data.drop(columns=["outcome_additional"]) + return event_data, { "shot_events": shot_events, "pass_events": pass_events, @@ -473,13 +476,22 @@ def _make_pass_instance( on_ball_info = _get_on_ball_event_info(event) on_ball_info.update( _get_close_to_ball_event_info( - event, pitch_dimensions, home_team_id, away_team_id, players, - id, period_id, flip_first_half, flip_second_half + event, + pitch_dimensions, + home_team_id, + away_team_id, + players, + id, + period_id, + flip_first_half, + flip_second_half, ) ) - outcome_str = "successful" if event.get("outcome") == "possession_complete" else "unsuccessful" - + outcome_str = ( + "successful" if event.get("outcome") == "possession_complete" else "unsuccessful" + ) + # Get pass_type line_break = event.get("line_break_direction") if line_break in ["around", "over", "through"]: @@ -492,7 +504,7 @@ def _make_pass_instance( # Get end coordinates x_end_norm = event.get("x_location_end") y_end_norm = event.get("y_location_end") - + if x_end_norm is not None and y_end_norm is not None: if period_id == 1 and flip_first_half: x_end_norm = 1.0 - x_end_norm @@ -500,10 +512,10 @@ def _make_pass_instance( elif period_id == 2 and flip_second_half: x_end_norm = 1.0 - x_end_norm y_end_norm = 1.0 - y_end_norm - + x_end = (x_end_norm * pitch_dimensions[0]) - (pitch_dimensions[0] / 2.0) y_end = (y_end_norm * pitch_dimensions[1]) - (pitch_dimensions[1] / 2.0) - + if event.get("team_id") == away_team_id: x_end *= -1 y_end *= -1 @@ -535,15 +547,22 @@ def _make_shot_event_instance( on_ball_info = _get_on_ball_event_info(event) on_ball_info.update( _get_close_to_ball_event_info( - event, pitch_dimensions, home_team_id, away_team_id, players, - id, period_id, flip_first_half, flip_second_half + event, + pitch_dimensions, + home_team_id, + away_team_id, + players, + id, + period_id, + flip_first_half, + flip_second_half, ) ) on_ball_info.pop("outcome") event_name = (event.get("event") or "").lower() outcome_additional = (event.get("outcome_additional") or "").lower() - + if event_name == "own_goal": shot_outcome = "own_goal" elif "goal" in outcome_additional: @@ -572,8 +591,15 @@ def _make_tackle_event_instance( ) -> TackleEvent: """Function to create a TackleEvent instance from FIFA event data""" close_to_ball_info = _get_close_to_ball_event_info( - event, pitch_dimensions, home_team_id, away_team_id, players, - id, period_id, flip_first_half, flip_second_half + event, + pitch_dimensions, + home_team_id, + away_team_id, + players, + id, + period_id, + flip_first_half, + flip_second_half, ) return TackleEvent(**close_to_ball_info) @@ -590,13 +616,13 @@ def _get_on_ball_event_info(event: dict) -> dict: """ # Get body part body_part = BODY_PART_MAP.get(event.get("body_type", "other"), "unspecified") - + # Get set piece origin = event.get("origin", "") set_piece = SET_PIECE_MAP.get(origin, "no_set_piece") - + possession_type = "open_play" - + return { "body_part": body_part, "set_piece": set_piece, @@ -633,27 +659,31 @@ def _get_close_to_ball_event_info( dict: dictionary with the base event data """ - x_norm = event.get("x_location_start") if "x_location_start" in event else event.get("x") - y_norm = event.get("y_location_start") if "y_location_start" in event else event.get("y") - + x_norm = ( + event.get("x_location_start") if "x_location_start" in event else event.get("x") + ) + y_norm = ( + event.get("y_location_start") if "y_location_start" in event else event.get("y") + ) + if x_norm is None or pd.isna(x_norm): x_norm = 0.5 if y_norm is None or pd.isna(y_norm): y_norm = 0.5 - + x_norm = max(0.0, min(1.0, float(x_norm))) y_norm = max(0.0, min(1.0, float(y_norm))) - + if period_id == 1 and flip_first_half: x_norm = 1.0 - x_norm y_norm = 1.0 - y_norm elif period_id == 2 and flip_second_half: x_norm = 1.0 - x_norm y_norm = 1.0 - y_norm - + x_start = (x_norm * pitch_dimensions[0]) - (pitch_dimensions[0] / 2.0) y_start = (y_norm * pitch_dimensions[1]) - (pitch_dimensions[1] / 2.0) - + if event.get("team_id") == away_team_id: x_start *= -1 y_start *= -1 @@ -665,9 +695,11 @@ def _get_close_to_ball_event_info( # Get player jersey number player_id = event.get("from_player_id") - jersey = players.loc[players["id"] == player_id, "shirt_num"].iloc[0] if len( - players[players["id"] == player_id] - ) > 0 else MISSING_INT + jersey = ( + players.loc[players["id"] == player_id, "shirt_num"].iloc[0] + if len(players[players["id"] == player_id]) > 0 + else MISSING_INT + ) return { "start_x": x_start, @@ -683,47 +715,45 @@ def _get_close_to_ball_event_info( "pitch_size": pitch_dimensions, "player_id": player_id, "jersey": jersey, - "outcome": event.get("outcome") == "possession_complete" if "outcome" in event else True, + "outcome": event.get("outcome") == "possession_complete" + if "outcome" in event + else True, } def _get_game_score( - events: pd.DataFrame, - home_team_id: int, - away_team_id: int + events: pd.DataFrame, home_team_id: int, away_team_id: int ) -> tuple[int, int]: """ Function to extract game scores by counting goals from event data. - + Args: events (pd.DataFrame): DataFrame of event data home_team_id (int): ID of the home team away_team_id (int): ID of the away team - + Returns: tuple[int, int]: (home_score, away_score) """ home_score = 0 away_score = 0 - - shot_events = events[ - events["original_event"].isin(["attempt_at_goal", "own_goal"]) - ] + + shot_events = events[events["original_event"].isin(["attempt_at_goal", "own_goal"])] for _, row in shot_events.iterrows(): event_name = (row.get("original_event") or "").lower() - team_id = row.get("team_id") - + team_id = row.get("team_id") + is_goal = False is_own_goal = False - + if event_name == "own_goal": is_own_goal = True is_goal = True elif event_name == "attempt_at_goal": - if row.get("is_successful") == True: + if row.get("is_successful"): is_goal = True - + if is_goal: if is_own_goal: # Own goal: goal counts for opponent @@ -737,46 +767,48 @@ def _get_game_score( home_score += 1 elif team_id == away_team_id: away_score += 1 - + return home_score, away_score def _determine_period_flips( - events_list: list, - home_team_id: int, - away_team_id: int + events_list: list, home_team_id: int, away_team_id: int ) -> tuple[bool, bool]: """ Determine whether to flip coordinates for first and second half - + Args: events_list: List of all events home_team_id: Home team ID away_team_id: Away team ID - + Returns: tuple[bool, bool]: (flip_first_half, flip_second_half) """ flip_first_half = False flip_second_half = False - + # Find first half kickoff event for event in events_list: - if (event.get("half_time") == 1 and - event.get("event", "").lower() == "game_period_start"): + if ( + event.get("half_time") == 1 + and event.get("event", "").lower() == "game_period_start" + ): team_id = event.get("team_id") side = (event.get("side") or "").lower() - + # If home team kicks off with side='r', or away team kicks off with side='l', flip first half - if (team_id == home_team_id and side == "r") or \ - (team_id == away_team_id and side == "l"): + if (team_id == home_team_id and side == "r") or ( + team_id == away_team_id and side == "l" + ): flip_first_half = True - + # If home team kicks off with side='l', or away team kicks off with side='r', flip second half - if (team_id == home_team_id and side == "l") or \ - (team_id == away_team_id and side == "r"): + if (team_id == home_team_id and side == "l") or ( + team_id == away_team_id and side == "r" + ): flip_second_half = True - + break - - return flip_first_half, flip_second_half \ No newline at end of file + + return flip_first_half, flip_second_half diff --git a/databallpy/utils/get_game.py b/databallpy/utils/get_game.py index c06b3cfd..36848ca9 100644 --- a/databallpy/utils/get_game.py +++ b/databallpy/utils/get_game.py @@ -9,6 +9,7 @@ from databallpy.data_parsers import Metadata from databallpy.data_parsers.event_data_parsers import ( + load_fifa_event_data, load_instat_event_data, load_metrica_event_data, load_metrica_open_event_data, @@ -17,7 +18,6 @@ load_sportec_event_data, load_sportec_open_event_data, load_statsbomb_event_data, - load_fifa_event_data, ) from databallpy.data_parsers.kloppy_parsers import ( convert_kloppy_event_dataset, @@ -191,7 +191,7 @@ def get_game( "sportec": True, "dfl": True, "statsbomb": False, - "fifa":True, + "fifa": True, } LOGGER.info( @@ -486,7 +486,7 @@ def load_event_data( "statsbomb", "sportec", "dfl", - "fifa" + "fifa", ]: raise ValueError( f"We do not support '{event_data_provider}' as event data provider yet, " @@ -519,8 +519,7 @@ def load_event_data( ) elif event_data_provider == "fifa": event_data, event_metadata, databallpy_events = load_fifa_event_data( - metadata_loc=event_metadata_loc, - events_loc=event_data_loc + metadata_loc=event_metadata_loc, events_loc=event_data_loc ) elif event_data_provider in ["sportec", "dfl"]: event_data, event_metadata, databallpy_events = load_sportec_event_data(