Source code for doeasyeda.create_scatter_plot

import altair as alt

# Define custom error handling
[docs] class DoEasyEDAException(Exception): def __init__(self, message, original_exception): super().__init__(f"{message}: {original_exception}") self.original_exception = original_exception
[docs] def create_scatter_plot(df, x_col, y_col, size=60, color=None, title=None, x_title=None, y_title=None, tooltip=None, interactive=False, width=None, height=None): """ Creates a scatter plot using Altair with customizable options. Parameters ---------- df : pd.DataFrame Dataframe containing the data for the scatter plot. x_col : str The column name to be used for the x-axis. y_col : str The column name to be used for the y-axis. size : int, optional The size of the scatter plot markers (default is 60). color : str, optional The column name to be used for color encoding (default is None). title : str, optional The title of the scatter plot (default is None). x_title : str, optional The title for the x-axis (default is None). y_title : str, optional The title for the y-axis (default is None). tooltip : list of str, optional List of column names to be used for tooltips (default is None). interactive : bool, optional If True, enables interactive features like zooming and panning (default is False). width : int, optional The width of the chart (default is None). height : int, optional The height of the chart (default is None). Returns ------- alt.Chart An Altair Chart object representing the scatter plot. Example ------- >>> data = pd.DataFrame({'gdpPercap': [1000, 2000, 3000, 4000], ... 'lifeExp': [70, 80, 60, 65], ... 'continent': ['Asia', 'Europe', 'Africa', 'Americas']}) >>> create_scatter_plot(data, 'gdpPercap', 'lifeExp', size=60, color='continent', ... title='Life Expectancy vs GDP Per Capita by Continent', ... x_title='GDP Per Capita', y_title='Life Expectancy', interactive=True) """ # Validate dataframe if df.empty: raise ValueError("Empty dataframe received!") # Validate input columns x_col, y_col, tooltip for col in [x_col, y_col] \ + ([color] if color else []) \ + (tooltip if isinstance(tooltip, list) else [tooltip] if tooltip else []): if col not in df.columns: raise ValueError(f"Column '{col}' not found in dataframe!") # More checking can be added here try: # Set Altair base chart chart = alt.Chart(df).mark_circle(size=size) # Set Altair encoding with optional color and tooltip encoding = { 'x': alt.X(x_col, title=x_title), 'y': alt.Y(y_col, title=y_title) } if color: encoding['color'] = color if tooltip: encoding['tooltip'] = tooltip if isinstance(tooltip, list) else [tooltip] # Pass the encoding to chart chart = chart.encode(**encoding) # Set optional title, interactive, width and height if title: chart = chart.properties(title=title) if interactive: chart = chart.interactive() if width and height: chart = chart.properties(width=width, height=height) return chart except Exception as e: # Error Handling raise DoEasyEDAException("Error in creating scatter plot", e)