Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 63 additions & 41 deletions docs/_static/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Marimo's reactivity means:

<data_handling>

- Use pandas for data manipulation
- Use polars for data manipulation
- Implement proper data validation
- Handle missing values appropriately
- Use efficient data structures
Expand All @@ -56,7 +56,7 @@ Marimo's reactivity means:
<visualization>
- For matplotlib: use plt.gca() as the last expression instead of plt.show()
- For plotly: return the figure object directly
- For altair: return the chart object directly
- For altair: return the chart object directly. Add tooltips where appropriate. You can pass polars dataframes directly to altair.
- Include proper labels, titles, and color schemes
- Make visualizations interactive where appropriate
</visualization>
Expand All @@ -79,7 +79,7 @@ Marimo's reactivity means:
</data_sources>

<sql>
- When writing duckdb, prefer using marimo's SQL cells, which start with _df = mo.sql(query)
- When writing duckdb, prefer using marimo's SQL cells, which start with df = mo.sql(f"""<your query>""") for DuckDB, or df = mo.sql(f"""<your query>""", engine=engine) for other SQL engines.
- See the SQL with duckdb example for an example on how to do this
- Don't add comments in cells that use mo.sql()
- Consider using \`vega_datasets\` for common example datasets
Expand Down Expand Up @@ -132,7 +132,8 @@ Common issues and solutions:
<example title="Basic UI with reactivity">
# Cell 1
import marimo as mo
import matplotlib.pyplot as plt
import altair as alt
import polars as pl
import numpy as np

# Cell 2
Expand All @@ -151,58 +152,64 @@ n_points # Display the slider
x = np.random.rand(n_points.value)
y = np.random.rand(n_points.value)

plt.figure(figsize=(8, 6))
plt.scatter(x, y, alpha=0.7)
plt.title(f"Scatter plot with {n_points.value} points")
plt.xlabel("X axis")
plt.ylabel("Y axis")
plt.gca() # Return the current axes to display the plot
df = pl.DataFrame({"x": x, "y": y})

chart = alt.Chart(df).mark_circle(opacity=0.7).encode(
x=alt.X('x', title='X axis'),
y=alt.Y('y', title='Y axis')
).properties(
title=f"Scatter plot with {n_points.value} points",
width=400,
height=300
)

chart
</example>

<example title="Data explorer">
# Cell 1
import marimo as mo
import pandas as pd
import polars as pl
from vega_datasets import data

# Cell 2

# Load and display dataset with interactive explorer

cars_df = data.cars()
cars_df = pl.DataFrame(data.cars())
mo.ui.data_explorer(cars_df)
</example>

<example title="Multiple UI elements">
# Cell 1
import marimo as mo
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import polars as pl
import altair as alt
from vega_datasets import data
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we can remove vega_datasets and instead do read_cab from either hf:// or GitHub

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I changed one example to use huggingface. We have this under best practices, so I think we can keep the other example to use vega_datasets.

- Consider using `vega_datasets` for common example datasets


# Cell 2

# Load dataset

iris = sns.load_dataset('iris')
iris = pl.DataFrame(data.iris())

# Cell 3

# Create UI elements

species_selector = mo.ui.dropdown(
options=["All"] + iris["species"].unique().tolist(),
options=["All"] + iris["species"].unique().to_list(),
value="All",
label="Species"
)
x_feature = mo.ui.dropdown(
options=iris.select_dtypes('number').columns.tolist(),
value="sepal_length",
options=iris.select(pl.col(pl.Float64, pl.Int64)).columns,
value="sepalLength",
label="X Feature"
)
y_feature = mo.ui.dropdown(
options=iris.select_dtypes('number').columns.tolist(),
value="sepal_width",
options=iris.select(pl.col(pl.Float64, pl.Int64)).columns,
value="sepalWidth",
label="Y Feature"
)

Expand All @@ -214,36 +221,45 @@ mo.hstack([species_selector, x_feature, y_feature])

# Filter data based on selection

filtered_data = iris if species_selector.value == "All" else iris[iris["species"] == species_selector.value]
filtered_data = iris if species_selector.value == "All" else iris.filter(pl.col("species") == species_selector.value)

# Create visualization based on UI selections

plt.figure(figsize=(10, 6))
sns.scatterplot(
data=filtered_data,
x=x_feature.value,
y=y_feature.value,
hue="species"
chart = alt.Chart(filtered_data).mark_circle().encode(
x=alt.X(x_feature.value, title=x_feature.value),
y=alt.Y(y_feature.value, title=y_feature.value),
color='species'
).properties(
title=f"{y_feature.value} vs {x_feature.value}",
width=500,
height=400
)
plt.title(f"{y_feature.value} vs {x_feature.value}")
plt.gca()

chart
</example>

<example title="Interactive chart with Altair">
# Cell 1
import marimo as mo
import altair as alt
import pandas as pd
import polars as pl

# Cell 2

# Load dataset

cars_df = pd.read_csv('<https://raw.githubusercontent.com/vega/vega-datasets/master/data/cars.json>')
_chart = alt.Chart(cars_df).mark_point().encode(
x='Horsepower',
y='Miles_per_Gallon',
color='Origin',
weather = pl.read_csv("https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv")
weather_dates = weather.with_columns(
pl.col("date").str.strptime(pl.Date, format="%Y-%m-%d")
)
_chart = (
alt.Chart(weather_dates)
.mark_point()
.encode(
x="date:T",
y="temp_max",
color="location",
)
)

chart = mo.ui.altair_chart(_chart)
Expand Down Expand Up @@ -279,16 +295,21 @@ else:
<example title="SQL with duckdb">
# Cell 1
import marimo as mo
import polars as pl

# Cell 2

# Load dataset

cars_df = pd.read_csv('<https://raw.githubusercontent.com/vega/vega-datasets/master/data/cars.json>')
weather = pl.read_csv('https://raw.githubusercontent.com/vega/vega-datasets/refs/heads/main/data/weather.csv')

# Cell 3

_df = mo.sql("SELECT * from cars_df WHERE Miles_per_Gallon > 20")
seattle_weather_df = mo.sql(
f"""
SELECT * FROM weather WHERE location = 'Seattle';
"""
)
</example>

<example title="Writing LaTeX in markdown">
Expand All @@ -297,10 +318,11 @@ import marimo as mo

# Cell 2

mo.md(r"""

mo.md(
r"""
The quadratic function $f$ is defined as

$$f(x) = x^2.$$
""")
"""
)
</example>
Loading
Loading