From d2b5cd83a6487b44e77d94e6cd9952d6516f960f Mon Sep 17 00:00:00 2001 From: Shahmir Varqha Date: Fri, 19 Sep 2025 12:23:12 +0800 Subject: [PATCH 1/4] use polars instead of pd, altair instead of pyplot, add sql guides --- docs/_static/CLAUDE.md | 102 +++++--- .../__snapshots__/prompt.test.ts.snap | 100 +++++--- frontend/src/components/chat/acp/prompt.ts | 100 +++++--- marimo/_server/ai/prompts.py | 33 ++- .../ai/snapshots/chat_system_prompts.txt | 231 +++++++++++++----- tests/_server/ai/snapshots/system_prompts.txt | 22 +- 6 files changed, 395 insertions(+), 193 deletions(-) diff --git a/docs/_static/CLAUDE.md b/docs/_static/CLAUDE.md index 38d8c30322e..8fabf9c0035 100644 --- a/docs/_static/CLAUDE.md +++ b/docs/_static/CLAUDE.md @@ -46,7 +46,7 @@ Marimo's reactivity means: -- Use pandas for data manipulation +- Use polars for data manipulation - Implement proper data validation - Handle missing values appropriately - Use efficient data structures @@ -56,7 +56,7 @@ Marimo's reactivity means: - For matplotlib: use plt.gca() as the last expression instead of plt.show() - For plotly: return the figure object directly -- For altair: return the chart object directly +- For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. - Include proper labels, titles, and color schemes - Make visualizations interactive where appropriate @@ -132,7 +132,8 @@ Common issues and solutions: # Cell 1 import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Cell 2 @@ -151,58 +152,64 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart # Cell 1 import marimo as mo -import pandas as pd +import polars as pl from vega_datasets import data # Cell 2 # Load and display dataset with interactive explorer -cars_df = data.cars() +cars_df = pl.DataFrame(data.cars()) mo.ui.data_explorer(cars_df) # Cell 1 import marimo as mo -import pandas as pd -import matplotlib.pyplot as plt -import seaborn as sns +import polars as pl +import altair as alt +from vega_datasets import data # Cell 2 # Load dataset -iris = sns.load_dataset('iris') +iris = pl.DataFrame(data.iris()) # Cell 3 # Create UI elements species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().tolist(), + options=["All"] + iris["species"].unique().to_list(), value="All", label="Species" ) x_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_length", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalLength", label="X Feature" ) y_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_width", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalWidth", label="Y Feature" ) @@ -214,36 +221,45 @@ mo.hstack([species_selector, x_feature, y_feature]) # Filter data based on selection -filtered_data = iris if species_selector.value == "All" else iris[iris["species"] == species_selector.value] +filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) # Create visualization based on UI selections -plt.figure(figsize=(10, 6)) -sns.scatterplot( - data=filtered_data, - x=x_feature.value, - y=y_feature.value, - hue="species" +chart = alt.Chart(filtered_data).mark_circle().encode( + x=alt.X(x_feature.value, title=x_feature.value), + y=alt.Y(y_feature.value, title=y_feature.value), + color='species' +).properties( + title=f"{y_feature.value} vs {x_feature.value}", + width=500, + height=400 ) -plt.title(f"{y_feature.value} vs {x_feature.value}") -plt.gca() + +chart # Cell 1 import marimo as mo import altair as alt -import pandas as pd +import polars as pl # Cell 2 # Load dataset -cars_df = pd.read_csv('') -_chart = alt.Chart(cars_df).mark_point().encode( - x='Horsepower', - y='Miles_per_Gallon', - color='Origin', +weather = pl.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv") +weather_dates = weather.with_columns( + pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d") +) +_chart = ( + alt.Chart(weather_dates) + .mark_point() + .encode( + x="date:T", + y="temp_max", + color="location", + ) ) chart = mo.ui.altair_chart(_chart) @@ -279,16 +295,21 @@ else: # Cell 1 import marimo as mo +import polars as pl # Cell 2 # Load dataset -cars_df = pd.read_csv('') +weather = pl.read_csv('https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv') # Cell 3 -_df = mo.sql("SELECT * from cars_df WHERE Miles_per_Gallon > 20") +seattle_weather_df = mo.sql( + f""" + SELECT * FROM weather WHERE location = 'Seattle'; + """ +) @@ -297,10 +318,11 @@ import marimo as mo # Cell 2 -mo.md(r""" - +mo.md( + r""" The quadratic function $f$ is defined as $$f(x) = x^2.$$ -""") +""" +) diff --git a/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap b/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap index 7d544e8242e..f171d361a1c 100644 --- a/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap +++ b/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap @@ -52,7 +52,7 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil ## Best Practices - - Use pandas for data manipulation + - Use polars for data manipulation - Implement proper data validation - Handle missing values appropriately - Use efficient data structures @@ -62,7 +62,7 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil - For matplotlib: use plt.gca() as the last expression instead of plt.show() - For plotly: return the figure object directly - - For altair: return the chart object directly + - For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. - Include proper labels, titles, and color schemes - Make visualizations interactive where appropriate @@ -76,7 +76,7 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil - - When writing duckdb, prefer using marimo's SQL cells, which start with _df = mo.sql(query) + - When writing duckdb, prefer using marimo's SQL cells, which start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. - See the SQL with duckdb example for an example on how to do this - Don't add comments in cells that use mo.sql() - Consider using \`vega_datasets\` for common example datasets @@ -146,7 +146,8 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil @app.cell def _(): import marimo as mo - import matplotlib.pyplot as plt + import altair as alt + import polars as pl import numpy as np return @@ -161,12 +162,18 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) - plt.figure(figsize=(8, 6)) - plt.scatter(x, y, alpha=0.7) - plt.title(f"Scatter plot with {n_points.value} points") - plt.xlabel("X axis") - plt.ylabel("Y axis") - plt.gca() + df = pl.DataFrame({"x": x, "y": y}) + + chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') + ).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 + ) + + chart return @@ -176,13 +183,13 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil @app.cell def _(): import marimo as mo - import pandas as pd + import polars as pl from vega_datasets import data return @app.cell def _(): - cars_df = data.cars() + cars_df = pl.DataFrame(data.cars()) mo.ui.data_explorer(cars_df) return @@ -193,31 +200,31 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil @app.cell def _(): import marimo as mo - import pandas as pd - import matplotlib.pyplot as plt - import seaborn as sns + import polars as pl + import altair as alt + from vega_datasets import data return @app.cell def _(): - iris = sns.load_dataset('iris') + iris = pl.DataFrame(data.iris()) return @app.cell def _(): species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().tolist(), + options=["All"] + iris["species"].unique().to_list(), value="All", label="Species" ) x_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_length", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalLength", label="X Feature" ) y_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_width", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalWidth", label="Y Feature" ) mo.hstack([species_selector, x_feature, y_feature]) @@ -225,17 +232,19 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil @app.cell def _(): - filtered_data = iris if species_selector.value == "All" else iris[iris["species"] == species_selector.value] - - plt.figure(figsize=(10, 6)) - sns.scatterplot( - data=filtered_data, - x=x_feature.value, - y=y_feature.value, - hue="species" + filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) + + chart = alt.Chart(filtered_data).mark_circle().encode( + x=alt.X(x_feature.value, title=x_feature.value), + y=alt.Y(y_feature.value, title=y_feature.value), + color='species' + ).properties( + title=f"{y_feature.value} vs {x_feature.value}", + width=500, + height=400 ) - plt.title(f"{y_feature.value} vs {x_feature.value}") - plt.gca() + + chart return @@ -260,17 +269,24 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil def _(): import marimo as mo import altair as alt - import pandas as pd + import polars as pl return @app.cell def _(): # Load dataset - cars_df = pd.read_csv('') - _chart = alt.Chart(cars_df).mark_point().encode( - x='Horsepower', - y='Miles_per_Gallon', - color='Origin', + weather = pl.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv") + weather_dates = weather.with_columns( + pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d") + ) + _chart = ( + alt.Chart(weather_dates) + .mark_point() + .encode( + x="date:T", + y="temp_max", + color="location", + ) ) return @@ -319,17 +335,21 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil @app.cell def _(): import marimo as mo - import pandas as pd + import polars as pl return @app.cell def _(): - cars_df = pd.read_csv('') + weather = pl.read_csv('https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv') return @app.cell def _(): - _df = mo.sql("SELECT * from cars_df WHERE Miles_per_Gallon > 20") + seattle_weather_df = mo.sql( + f""" + SELECT * FROM weather WHERE location = 'Seattle'; + """ + ) return " diff --git a/frontend/src/components/chat/acp/prompt.ts b/frontend/src/components/chat/acp/prompt.ts index 2f257d18a44..86d99b9d4de 100644 --- a/frontend/src/components/chat/acp/prompt.ts +++ b/frontend/src/components/chat/acp/prompt.ts @@ -52,7 +52,7 @@ export function getAgentPrompt(filename: string) { ## Best Practices - - Use pandas for data manipulation + - Use polars for data manipulation - Implement proper data validation - Handle missing values appropriately - Use efficient data structures @@ -62,7 +62,7 @@ export function getAgentPrompt(filename: string) { - For matplotlib: use plt.gca() as the last expression instead of plt.show() - For plotly: return the figure object directly - - For altair: return the chart object directly + - For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. - Include proper labels, titles, and color schemes - Make visualizations interactive where appropriate @@ -76,7 +76,7 @@ export function getAgentPrompt(filename: string) { - - When writing duckdb, prefer using marimo's SQL cells, which start with _df = mo.sql(query) + - When writing duckdb, prefer using marimo's SQL cells, which start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. - See the SQL with duckdb example for an example on how to do this - Don't add comments in cells that use mo.sql() - Consider using \`vega_datasets\` for common example datasets @@ -144,7 +144,8 @@ export function getAgentPrompt(filename: string) { ${formatCells([ ` import marimo as mo - import matplotlib.pyplot as plt + import altair as alt + import polars as pl import numpy as np `, ` @@ -155,12 +156,18 @@ export function getAgentPrompt(filename: string) { x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) - plt.figure(figsize=(8, 6)) - plt.scatter(x, y, alpha=0.7) - plt.title(f"Scatter plot with {n_points.value} points") - plt.xlabel("X axis") - plt.ylabel("Y axis") - plt.gca() + df = pl.DataFrame({"x": x, "y": y}) + + chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') + ).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 + ) + + chart `, ])} @@ -169,11 +176,11 @@ export function getAgentPrompt(filename: string) { ${formatCells([ ` import marimo as mo - import pandas as pd + import polars as pl from vega_datasets import data `, ` - cars_df = data.cars() + cars_df = pl.DataFrame(data.cars()) mo.ui.data_explorer(cars_df) `, ])} @@ -183,43 +190,45 @@ export function getAgentPrompt(filename: string) { ${formatCells([ ` import marimo as mo - import pandas as pd - import matplotlib.pyplot as plt - import seaborn as sns + import polars as pl + import altair as alt + from vega_datasets import data `, ` - iris = sns.load_dataset('iris') + iris = pl.DataFrame(data.iris()) `, ` species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().tolist(), + options=["All"] + iris["species"].unique().to_list(), value="All", label="Species" ) x_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_length", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalLength", label="X Feature" ) y_feature = mo.ui.dropdown( - options=iris.select_dtypes('number').columns.tolist(), - value="sepal_width", + options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, + value="sepalWidth", label="Y Feature" ) mo.hstack([species_selector, x_feature, y_feature]) `, ` - filtered_data = iris if species_selector.value == "All" else iris[iris["species"] == species_selector.value] - - plt.figure(figsize=(10, 6)) - sns.scatterplot( - data=filtered_data, - x=x_feature.value, - y=y_feature.value, - hue="species" + filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) + + chart = alt.Chart(filtered_data).mark_circle().encode( + x=alt.X(x_feature.value, title=x_feature.value), + y=alt.Y(y_feature.value, title=y_feature.value), + color='species' + ).properties( + title=f"{y_feature.value} vs {x_feature.value}", + width=500, + height=400 ) - plt.title(f"{y_feature.value} vs {x_feature.value}") - plt.gca() + + chart `, ])} @@ -242,14 +251,21 @@ export function getAgentPrompt(filename: string) { ` import marimo as mo import altair as alt - import pandas as pd + import polars as pl `, `# Load dataset - cars_df = pd.read_csv('') - _chart = alt.Chart(cars_df).mark_point().encode( - x='Horsepower', - y='Miles_per_Gallon', - color='Origin', + weather = pl.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv") + weather_dates = weather.with_columns( + pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d") + ) + _chart = ( + alt.Chart(weather_dates) + .mark_point() + .encode( + x="date:T", + y="temp_max", + color="location", + ) ) `, "chart = mo.ui.altair_chart(_chart)\nchart", @@ -278,9 +294,13 @@ export function getAgentPrompt(filename: string) { ${formatCells([ - "import marimo as mo\nimport pandas as pd", - `cars_df = pd.read_csv('')`, - `_df = mo.sql("SELECT * from cars_df WHERE Miles_per_Gallon > 20")`, + "import marimo as mo\n import polars as pl", + `weather = pl.read_csv('https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv')`, + `seattle_weather_df = mo.sql( + f""" + SELECT * FROM weather WHERE location = 'Seattle'; + """ + )`, ])} `; } diff --git a/marimo/_server/ai/prompts.py b/marimo/_server/ai/prompts.py index 44cab42b6fe..ebad135714a 100644 --- a/marimo/_server/ai/prompts.py +++ b/marimo/_server/ai/prompts.py @@ -19,7 +19,7 @@ "python": [ "For matplotlib: use plt.gca() as the last expression instead of plt.show().", "For plotly: return the figure object directly.", - "For altair: return the chart object directly. Add tooltips where appropriate.", + "For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair.", "Include proper labels, titles, and color schemes.", "Make visualizations interactive where appropriate.", "If an import already exists, do not import it again.", @@ -28,6 +28,8 @@ "markdown": [], "sql": [ "The SQL must use duckdb syntax.", + 'SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines.', + "This will automatically display the result in the UI. You do not need to return the dataframe in the cell.", ], } @@ -276,6 +278,14 @@ def get_chat_system_prompt( ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -323,7 +333,8 @@ def get_chat_system_prompt( import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -335,12 +346,18 @@ def get_chat_system_prompt( x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {{n_points.value}} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({{"x": x, "y": y}}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {{n_points.value}} points", + width=400, + height=300 +) + +chart """ for language in language_rules: diff --git a/tests/_server/ai/snapshots/chat_system_prompts.txt b/tests/_server/ai/snapshots/chat_system_prompts.txt index 1fd7f8758e3..374f556b4ba 100644 --- a/tests/_server/ai/snapshots/chat_system_prompts.txt +++ b/tests/_server/ai/snapshots/chat_system_prompts.txt @@ -47,6 +47,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -94,7 +102,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -106,18 +115,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -125,6 +140,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ==================== with custom rules ==================== @@ -173,6 +190,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -220,7 +245,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -232,18 +258,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -251,6 +283,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Additional rules: Always be polite. @@ -302,6 +336,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -349,7 +391,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -361,18 +404,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -380,6 +429,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Available variables from other cells: - variable: `var1`- variable: `var2` @@ -431,6 +482,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -478,7 +537,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -490,18 +550,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -509,6 +575,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Available variables from other cells: - variable: `df` @@ -566,6 +634,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -613,7 +689,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -625,18 +702,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -644,6 +727,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Available schema: - Table: df_1 @@ -709,6 +794,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -756,7 +849,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -768,18 +862,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -787,6 +887,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. import pandas as pd @@ -840,6 +942,14 @@ Marimo's reactivity means: ## Best Practices + +- Use polars for data manipulation +- Implement proper data validation +- Handle missing values appropriately +- Use efficient data structures +- A variable in the last expression of a cell is automatically displayed as a table + + - Access UI element values with .value attribute (e.g., slider.value) - Create UI elements in one cell and reference them in later cells @@ -887,7 +997,8 @@ Marimo's reactivity means: import marimo as mo -import matplotlib.pyplot as plt +import altair as alt +import polars as pl import numpy as np # Create a slider and display it @@ -899,18 +1010,24 @@ n_points # Display the slider x = np.random.rand(n_points.value) y = np.random.rand(n_points.value) -plt.figure(figsize=(8, 6)) -plt.scatter(x, y, alpha=0.7) -plt.title(f"Scatter plot with {n_points.value} points") -plt.xlabel("X axis") -plt.ylabel("Y axis") -plt.gca() # Return the current axes to display the plot +df = pl.DataFrame({"x": x, "y": y}) + +chart = alt.Chart(df).mark_circle(opacity=0.7).encode( + x=alt.X('x', title='X axis'), + y=alt.Y('y', title='Y axis') +).properties( + title=f"Scatter plot with {n_points.value} points", + width=400, + height=300 +) + +chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -918,6 +1035,8 @@ plt.gca() # Return the current axes to display the plot ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Additional rules: Always be polite. diff --git a/tests/_server/ai/snapshots/system_prompts.txt b/tests/_server/ai/snapshots/system_prompts.txt index fff3e165487..f8db84a6c97 100644 --- a/tests/_server/ai/snapshots/system_prompts.txt +++ b/tests/_server/ai/snapshots/system_prompts.txt @@ -16,7 +16,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -54,6 +54,8 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for sql 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. Again, just output the code itself. @@ -88,7 +90,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -115,7 +117,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -156,7 +158,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -193,7 +195,7 @@ print('Hello, world!') ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -230,7 +232,7 @@ print('Hello, world!') ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -266,7 +268,7 @@ pl.DataFrame() ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -295,7 +297,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -340,7 +342,7 @@ Separate logic into multiple cells to keep the code organized and readable. ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -348,6 +350,8 @@ Separate logic into multiple cells to keep the code organized and readable. ## Rules for sql: 1. The SQL must use duckdb syntax. +2. SQL cells start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. +3. This will automatically display the result in the UI. You do not need to return the dataframe in the cell. ## Available variables from other cells: - variable: `df` From 8aeb1131539e6e2ee6529e3f1cc2c22481976135 Mon Sep 17 00:00:00 2001 From: Shahmir Varqha Date: Fri, 19 Sep 2025 12:26:19 +0800 Subject: [PATCH 2/4] add sql for claude --- docs/_static/CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/_static/CLAUDE.md b/docs/_static/CLAUDE.md index 8fabf9c0035..18a9aa504be 100644 --- a/docs/_static/CLAUDE.md +++ b/docs/_static/CLAUDE.md @@ -79,7 +79,7 @@ Marimo's reactivity means: -- When writing duckdb, prefer using marimo's SQL cells, which start with _df = mo.sql(query) +- When writing duckdb, prefer using marimo's SQL cells, which start with df = mo.sql(f"""""") for DuckDB, or df = mo.sql(f"""""", engine=engine) for other SQL engines. - See the SQL with duckdb example for an example on how to do this - Don't add comments in cells that use mo.sql() - Consider using \`vega_datasets\` for common example datasets From 8d14cd6b8957787d0c7909f2a86b5f97aa7b09d7 Mon Sep 17 00:00:00 2001 From: Shahmir Varqha Date: Fri, 19 Sep 2025 14:57:22 +0800 Subject: [PATCH 3/4] better inline example for rules --- marimo/_server/ai/prompts.py | 2 +- .../ai/snapshots/chat_system_prompts.txt | 14 +++++++------- tests/_server/ai/snapshots/system_prompts.txt | 18 +++++++++--------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/marimo/_server/ai/prompts.py b/marimo/_server/ai/prompts.py index ebad135714a..6f30c432a69 100644 --- a/marimo/_server/ai/prompts.py +++ b/marimo/_server/ai/prompts.py @@ -19,7 +19,7 @@ "python": [ "For matplotlib: use plt.gca() as the last expression instead of plt.show().", "For plotly: return the figure object directly.", - "For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair.", + "For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)).", "Include proper labels, titles, and color schemes.", "Make visualizations interactive where appropriate.", "If an import already exists, do not import it again.", diff --git a/tests/_server/ai/snapshots/chat_system_prompts.txt b/tests/_server/ai/snapshots/chat_system_prompts.txt index 374f556b4ba..6894ab2bc57 100644 --- a/tests/_server/ai/snapshots/chat_system_prompts.txt +++ b/tests/_server/ai/snapshots/chat_system_prompts.txt @@ -132,7 +132,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -275,7 +275,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -421,7 +421,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -567,7 +567,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -719,7 +719,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -879,7 +879,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -1027,7 +1027,7 @@ chart ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. diff --git a/tests/_server/ai/snapshots/system_prompts.txt b/tests/_server/ai/snapshots/system_prompts.txt index f8db84a6c97..396cb6f08a9 100644 --- a/tests/_server/ai/snapshots/system_prompts.txt +++ b/tests/_server/ai/snapshots/system_prompts.txt @@ -16,7 +16,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -90,7 +90,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -117,7 +117,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -158,7 +158,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -195,7 +195,7 @@ print('Hello, world!') ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -232,7 +232,7 @@ print('Hello, world!') ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -268,7 +268,7 @@ pl.DataFrame() ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -297,7 +297,7 @@ Immediately start with the following format. Do NOT comment on the code, just ou ## Rules for python 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. @@ -342,7 +342,7 @@ Separate logic into multiple cells to keep the code organized and readable. ## Rules for python: 1. For matplotlib: use plt.gca() as the last expression instead of plt.show(). 2. For plotly: return the figure object directly. -3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair. +3. For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair (e.g., alt.Chart(df)). 4. Include proper labels, titles, and color schemes. 5. Make visualizations interactive where appropriate. 6. If an import already exists, do not import it again. From af7c2c166955fd4c93d840b458e2861eb92f3994 Mon Sep 17 00:00:00 2001 From: Shahmir Varqha Date: Fri, 19 Sep 2025 20:29:57 +0800 Subject: [PATCH 4/4] change iris vega dataset to huggingface csv --- docs/_static/CLAUDE.md | 21 +++++++++---------- .../__snapshots__/prompt.test.ts.snap | 19 ++++++++--------- frontend/src/components/chat/acp/prompt.ts | 19 ++++++++--------- 3 files changed, 28 insertions(+), 31 deletions(-) diff --git a/docs/_static/CLAUDE.md b/docs/_static/CLAUDE.md index 18a9aa504be..55ee76a789e 100644 --- a/docs/_static/CLAUDE.md +++ b/docs/_static/CLAUDE.md @@ -185,32 +185,31 @@ mo.ui.data_explorer(cars_df) import marimo as mo import polars as pl import altair as alt -from vega_datasets import data # Cell 2 # Load dataset -iris = pl.DataFrame(data.iris()) +iris = pl.read_csv("hf://datasets/scikit-learn/iris/Iris.csv") # Cell 3 # Create UI elements species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().to_list(), + options=["All"] + iris["Species"].unique().to_list(), value="All", - label="Species" + label="Species", ) x_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalLength", - label="X Feature" + value="SepalLengthCm", + label="X Feature", ) y_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalWidth", - label="Y Feature" + value="SepalWidthCm", + label="Y Feature", ) # Display UI elements in a horizontal stack @@ -221,14 +220,14 @@ mo.hstack([species_selector, x_feature, y_feature]) # Filter data based on selection -filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) +filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("Species") == species_selector.value) # Create visualization based on UI selections chart = alt.Chart(filtered_data).mark_circle().encode( x=alt.X(x_feature.value, title=x_feature.value), y=alt.Y(y_feature.value, title=y_feature.value), - color='species' + color='Species' ).properties( title=f"{y_feature.value} vs {x_feature.value}", width=500, @@ -301,7 +300,7 @@ import polars as pl # Load dataset -weather = pl.read_csv('https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv') +weather = pl.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv") # Cell 3 diff --git a/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap b/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap index f171d361a1c..79773353831 100644 --- a/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap +++ b/frontend/src/components/chat/acp/__tests__/__snapshots__/prompt.test.ts.snap @@ -202,42 +202,41 @@ exports[`getAgentPrompt > should generate complete agent prompt with default fil import marimo as mo import polars as pl import altair as alt - from vega_datasets import data return @app.cell def _(): - iris = pl.DataFrame(data.iris()) + iris = pl.read_csv("hf://datasets/scikit-learn/iris/Iris.csv") return @app.cell def _(): species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().to_list(), + options=["All"] + iris["Species"].unique().to_list(), value="All", - label="Species" + label="Species", ) x_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalLength", - label="X Feature" + value="SepalLengthCm", + label="X Feature", ) y_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalWidth", - label="Y Feature" + value="SepalWidthCm", + label="Y Feature", ) mo.hstack([species_selector, x_feature, y_feature]) return @app.cell def _(): - filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) + filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("Species") == species_selector.value) chart = alt.Chart(filtered_data).mark_circle().encode( x=alt.X(x_feature.value, title=x_feature.value), y=alt.Y(y_feature.value, title=y_feature.value), - color='species' + color='Species' ).properties( title=f"{y_feature.value} vs {x_feature.value}", width=500, diff --git a/frontend/src/components/chat/acp/prompt.ts b/frontend/src/components/chat/acp/prompt.ts index 86d99b9d4de..975193b286d 100644 --- a/frontend/src/components/chat/acp/prompt.ts +++ b/frontend/src/components/chat/acp/prompt.ts @@ -192,36 +192,35 @@ export function getAgentPrompt(filename: string) { import marimo as mo import polars as pl import altair as alt - from vega_datasets import data `, ` - iris = pl.DataFrame(data.iris()) + iris = pl.read_csv("hf://datasets/scikit-learn/iris/Iris.csv") `, ` species_selector = mo.ui.dropdown( - options=["All"] + iris["species"].unique().to_list(), + options=["All"] + iris["Species"].unique().to_list(), value="All", - label="Species" + label="Species", ) x_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalLength", - label="X Feature" + value="SepalLengthCm", + label="X Feature", ) y_feature = mo.ui.dropdown( options=iris.select(pl.col(pl.Float64, pl.Int64)).columns, - value="sepalWidth", - label="Y Feature" + value="SepalWidthCm", + label="Y Feature", ) mo.hstack([species_selector, x_feature, y_feature]) `, ` - filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value) + filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("Species") == species_selector.value) chart = alt.Chart(filtered_data).mark_circle().encode( x=alt.X(x_feature.value, title=x_feature.value), y=alt.Y(y_feature.value, title=y_feature.value), - color='species' + color='Species' ).properties( title=f"{y_feature.value} vs {x_feature.value}", width=500,