amitm02
diff --git a/‎.buildkite/pyproject.toml‎
Lines changed: 0 additions & 5 deletions b/‎.buildkite/pyproject.toml‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎.buildkite/test-pipeline.yaml‎
Lines changed: 1 addition & 1 deletion b/‎.buildkite/test-pipeline.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 1 deletion b/‎.gitignore‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/pyproject.toml‎
Lines changed: 0 additions & 5 deletions b/‎benchmarks/pyproject.toml‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎docs/.nav.yml‎
Lines changed: 4 additions & 5 deletions b/‎docs/.nav.yml‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎docs/configuration/README.md‎
Lines changed: 7 additions & 2 deletions b/‎docs/configuration/README.md‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎docs/usage/env_vars.md‎ renamed to ‎docs/configuration/env_vars.md‎ b/‎docs/usage/env_vars.md‎ renamed to ‎docs/configuration/env_vars.md‎
diff --git a/‎docs/design/v1/metrics.md‎
Lines changed: 2 additions & 2 deletions b/‎docs/design/v1/metrics.md‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/mkdocs/hooks/generate_examples.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/mkdocs/hooks/generate_examples.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/models/extensions/tensorizer.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/models/extensions/tensorizer.md‎
Lines changed: 1 addition & 1 deletion
@@ -6,11 +6,6 @@
 
 [tool.ruff]
 line-length = 88
-exclude = [
-    # External file, leaving license intact
-    "examples/other/fp8/quantizer/quantize.py",
-    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
-]
 
 [tool.ruff.lint.per-file-ignores]
 "vllm/third_party/**" = ["ALL"]
 
@@ -246,7 +246,7 @@ steps:
     - python3 offline_inference/vision_language.py --seed 0
     - python3 offline_inference/vision_language_embedding.py --seed 0
     - python3 offline_inference/vision_language_multi_image.py --seed 0
-    - VLLM_USE_V1=0 python3 other/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 other/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
+    - VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors
     - python3 offline_inference/encoder_decoder.py
     - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0
     - python3 offline_inference/basic/classify.py
 
@@ -146,7 +146,7 @@ venv.bak/
 
 # mkdocs documentation
 /site
-docs/getting_started/examples
+docs/examples
 
 # mypy
 .mypy_cache/
 
@@ -6,11 +6,6 @@
 
 [tool.ruff]
 line-length = 88
-exclude = [
-    # External file, leaving license intact
-    "examples/other/fp8/quantizer/quantize.py",
-    "vllm/vllm_flash_attn/flash_attn_interface.pyi"
-]
 
 [tool.ruff.lint.per-file-ignores]
 "vllm/third_party/**" = ["ALL"]
 
@@ -5,11 +5,9 @@ nav:
       - getting_started/quickstart.md
       - getting_started/installation
     - Examples:
-      - Offline Inference: getting_started/examples/offline_inference
-      - Online Serving: getting_started/examples/online_serving
-      - Others:
-        - LMCache: getting_started/examples/lmcache
-        - getting_started/examples/other/*
+      - Offline Inference: examples/offline_inference
+      - Online Serving: examples/online_serving
+      - Others: examples/others
     - Quick Links:
       - User Guide: usage/README.md
       - Developer Guide: contributing/README.md
@@ -19,6 +17,7 @@ nav:
       - Releases: https://github.com/vllm-project/vllm/releases
   - User Guide:
     - Summary: usage/README.md
+    - usage/v1_guide.md
     - General:
       - usage/*
     - Inference and Serving:
 
@@ -1,4 +1,9 @@
 # Configuration Options
 
-This section lists the most common options for running the vLLM engine.
-For a full list, refer to the [configuration][configuration] page.
+This section lists the most common options for running vLLM.
+
+There are three main levels of configuration, from highest priority to lowest priority:
+
+- [Request parameters][completions-api] and [input arguments][sampling-params]
+- [Engine arguments](./engine_args.md)
+- [Environment variables](./env_vars.md)
@@ -61,7 +61,7 @@ These are documented under [Inferencing and Serving -> Production Metrics](../..
 
 ### Grafana Dashboard
 
-vLLM also provides [a reference example](https://docs.vllm.ai/en/latest/getting_started/examples/prometheus_grafana.html) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
+vLLM also provides [a reference example](https://docs.vllm.ai/en/latest/examples/prometheus_grafana.html) for how to collect and store these metrics using Prometheus and visualize them using a Grafana dashboard.
 
 The subset of metrics exposed in the Grafana dashboard gives us an indication of which metrics are especially important:
 
@@ -673,7 +673,7 @@ v0 has support for OpenTelemetry tracing:
 - [OpenTelemetry blog
   post](https://opentelemetry.io/blog/2024/llm-observability/)
 - [User-facing
-  docs](https://docs.vllm.ai/en/latest/getting_started/examples/opentelemetry.html)
+  docs](https://docs.vllm.ai/en/latest/examples/opentelemetry.html)
 - [Blog
   post](https://medium.com/@ronen.schaffer/follow-the-trail-supercharging-vllm-with-opentelemetry-distributed-tracing-aa655229b46f)
 - [IBM product
 
@@ -9,7 +9,7 @@
 ROOT_DIR = Path(__file__).parent.parent.parent.parent
 ROOT_DIR_RELATIVE = '../../../../..'
 EXAMPLE_DIR = ROOT_DIR / "examples"
-EXAMPLE_DOC_DIR = ROOT_DIR / "docs/getting_started/examples"
+EXAMPLE_DOC_DIR = ROOT_DIR / "docs/examples"
 print(ROOT_DIR.resolve())
 print(EXAMPLE_DIR.resolve())
 print(EXAMPLE_DOC_DIR.resolve())
 
@@ -10,7 +10,7 @@ shorter Pod startup times and CPU memory usage. Tensor encryption is also suppor
 
 For more information on CoreWeave's Tensorizer, please refer to
 [CoreWeave's Tensorizer documentation](https://github.com/coreweave/tensorizer). For more information on serializing a vLLM model, as well a general usage guide to using Tensorizer with vLLM, see
-the [vLLM example script](https://docs.vllm.ai/en/latest/getting_started/examples/tensorize_vllm_model.html).
+the [vLLM example script](https://docs.vllm.ai/en/latest/examples/tensorize_vllm_model.html).
 
 !!! note
     Note that to use this feature you will need to install `tensorizer` by running `pip install vllm[tensorizer]`.