diff --git a/README.md b/README.md
index e0e2446e..3d907919 100644
--- a/README.md
+++ b/README.md
@@ -58,37 +58,40 @@ Check Helm's [official docs](https://helm.sh/docs/intro/using_helm/) for more gu
 
 ## Values
 Below are the values you can set.
-| Key                                    | Description                                                                                                       | Type         | Default                                     |
-|----------------------------------------|-------------------------------------------------------------------------------------------------------------------|--------------|---------------------------------------------|
-| `modelArtifacts.name`                  | name of model in the form namespace/modelId. Required.                                                            | string       | N/A                                         |
-| `modelArtifacts.uri`                   | Model artifacts URI. Current formats supported include `hf://`, `pvc://`, and `oci://`                            | string       | N/A                                         |
-| `modelArtifacts.size`                  | Size used to create an emptyDir volume for downloading the model.                                                 | string       | N/A                                         |
-| `modelArtifacts.authSecretName`        | The name of the Secret containing `HF_TOKEN` for `hf://` artifacts that require a token for downloading a model.  | string       | N/A                                         |
-| `modelArtifacts.mountPath`             | Path to mount the volume created to store models                                                                  | string       | /model-cache                                |
-| `multinode`                            | Determines whether to create P/D using Deployments (false) or LeaderWorkerSets (true)                             | bool         | `false`                                     |
-| `routing.servicePort`                  | The port the routing proxy sidecar listens on. <br>If there is no sidecar, this is the port the request goes to.  | int          | N/A                                         |
-| `routing.proxy.image`                  | Image used for the sidecar                                                                                        | string       | `ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6` |
-| `routing.proxy.targetPort`             | The port the vLLM decode container listens on. <br>If proxy is present, it will forward request to this port.     | string       | N/A                                         |
-| `routing.proxy.debugLevel`             | Debug level of the routing proxy                                                                                  | int          | 5                                           |
-| `routing.proxy.parentRefs[*].name`     | The name of the inference gateway                                                                                 | string       | N/A                                         |
-| `decode.create`                        | If true, creates decode Deployment or LeaderWorkerSet                                                             | List         | `true`                                      |
-| `decode.annotations`                   | Annotations that should be added to the Deployment or LeaderWorkerSet                                             | Dict         | {}                                          |
-| `decode.tolerations`                   | Tolerations that should be added to the Deployment or LeaderWorkerSet                                             | List         | []                                          |
-| `decode.replicas`                      | Number of replicas for decode pods                                                                                | int          | 1                                           |
-| `decode.extraConfig`                   | Extra pod configuration                                                                                           | dict         | {}                                          |
-| `decode.containers[*].name`            | Name of the container for the decode deployment/LWS                                                               | string       | N/A                                         |
-| `decode.containers[*].image`           | Image of the container for the decode deployment/LWS                                                              | string       | N/A                                         |
-| `decode.containers[*].args`            | List of arguments for the decode container.                                                                       | List[string] | []                                          |
-| `decode.containers[*].modelCommand`    | Nature of the command. One of `vllmServe`, `imageDefault` or `custom`                                             | string       | `imageDefault`                              |
-| `decode.containers[*].command`         | List of commands for the decode container.                                                                        | List[string] | []                                          |
-| `decode.containers[*].ports`           | List of ports for the decode container.                                                                           | List[Port]   | []                                          |
-| `decode.containers[*].extraConfig`     | Extra container configuration                                                                                     | dict         | {}                                          |
-| `decode.parallelism.data`              | Amount of data parallelism                                                                                        | int          | 1                                           |
-| `decode.parallelism.tensor`            | Amount of tensor parallelism                                                                                      | int          | 1                                           |
-| `decode.acceleratorTypes.labelKey`     | Key of label on node that identifies the hosted GPU type                                                          | string       | N/A                                         |
-| `decode.acceleratorTypes.labelValue`   | Value of label on node that identifies type of hosted GPU                                                         | string       | N/A                                         |
-| `prefill`                              | Same fields supported in `decode`                                                                                 | See above    | See above                                   |
-| `extraObjects`                         | Additional Kubernetes objects to be deployed alongside the main application                                        | List         | []                                          |
+| Key                                    | Description                                                                                                       | Type            | Default                                     |
+|----------------------------------------|-------------------------------------------------------------------------------------------------------------------|-----------------|---------------------------------------------|
+| `modelArtifacts.name`                  | name of model in the form namespace/modelId. Required.                                                            | string          | N/A                                         |
+| `modelArtifacts.uri`                   | Model artifacts URI. Current formats supported include `hf://`, `pvc://`, and `oci://`                            | string          | N/A                                         |
+| `modelArtifacts.size`                  | Size used to create an emptyDir volume for downloading the model.                                                 | string          | N/A                                         |
+| `modelArtifacts.authSecretName`        | The name of the Secret containing `HF_TOKEN` for `hf://` artifacts that require a token for downloading a model.  | string          | N/A                                         |
+| `modelArtifacts.mountPath`             | Path to mount the volume created to store models                                                                  | string          | /model-cache                                |
+| `multinode`                            | Determines whether to create P/D using Deployments (false) or LeaderWorkerSets (true)                             | bool            | `false`                                     |
+| `routing.servicePort`                  | The port the routing proxy sidecar listens on. <br>If there is no sidecar, this is the port the request goes to.  | int             | N/A                                         |
+| `routing.proxy.image`                  | Image used for the sidecar                                                                                        | string          | `ghcr.io/llm-d/llm-d-routing-sidecar:0.0.6` |
+| `routing.proxy.targetPort`             | The port the vLLM decode container listens on. <br>If proxy is present, it will forward request to this port.     | string          | N/A                                         |
+| `routing.proxy.debugLevel`             | Debug level of the routing proxy                                                                                  | int             | 5                                           |
+| `routing.proxy.parentRefs[*].name`     | The name of the inference gateway                                                                                 | string          | N/A                                         |
+| `decode.create`                        | If true, creates decode Deployment or LeaderWorkerSet                                                             | List            | `true`                                      |
+| `decode.annotations`                   | Annotations that should be added to the Deployment or LeaderWorkerSet                                             | Dict            | {}                                          |
+| `decode.tolerations`                   | Tolerations that should be added to the Deployment or LeaderWorkerSet                                             | List            | []                                          |
+| `decode.replicas`                      | Number of replicas for decode pods                                                                                | int             | 1                                           |
+| `decode.extraConfig`                   | Extra pod configuration                                                                                           | dict            | {}                                          |
+| `decode.containers[*].name`            | Name of the container for the decode deployment/LWS                                                               | string          | N/A                                         |
+| `decode.containers[*].image`           | Image of the container for the decode deployment/LWS                                                              | string          | N/A                                         |
+| `decode.containers[*].args`            | List of arguments for the decode container.                                                                       | List[string]    | []                                          |
+| `decode.containers[*].modelCommand`    | Nature of the command. One of `vllmServe`, `imageDefault` or `custom`                                             | string          | `imageDefault`                              |
+| `decode.containers[*].command`         | List of commands for the decode container.                                                                        | List[string]    | []                                          |
+| `decode.containers[*].ports`           | List of ports for the decode container.                                                                           | List[Port]      | []                                          |
+| `decode.containers[*].extraConfig`     | Extra container configuration                                                                                     | dict            | {}                                          |
+| `decode.initContainers`.               | List of initContainers that should be added (in addition to routing proxy if enabled)                             | List[Container] | N/A                                         |
+| `decode.parallelism.tensor`            | Amount of tensor parallelism                                                                                      | int             | 1                                           |
+| `decode.parallelism.data`              | Amount of data parallelism                                                                                        | int             | 1                                           |
+| `decode.parallelism.dataLocal`         | Amount of data local parallelism                                                                                  | int             | 1                                           |
+| `decode.parallelism.workers`           | Number of workers over which data parallelism is implemented                                                      | int             | 1                                           |
+| `decode.acceleratorTypes.labelKey`     | Key of label on node that identifies the hosted GPU type                                                          | string          | N/A                                         |
+| `decode.acceleratorTypes.labelValue`   | Value of label on node that identifies type of hosted GPU                                                         | string          | N/A                                         |
+| `prefill`                              | Same fields supported in `decode`                                                                                 | See above       | See above                                   |
+| `extraObjects`                         | Additional Kubernetes objects to be deployed alongside the main application                                       | List            | []                                          |
 
 ## Contribute
 
diff --git a/charts/llm-d-modelservice/Chart.yaml b/charts/llm-d-modelservice/Chart.yaml
index bec901fd..87359da7 100644
--- a/charts/llm-d-modelservice/Chart.yaml
+++ b/charts/llm-d-modelservice/Chart.yaml
@@ -13,7 +13,7 @@ type: application
 # This is the chart version. This version number should be incremented each time you make changes
 # to the chart and its templates, including the app version.
 # Versions are expected to follow Semantic Versioning (https://semver.org/)
-version: "v0.3.4"
+version: "v0.3.5"
 # This is the version number of the application being deployed. This version number should be
 # incremented each time you make changes to the application. Versions are not expected to
 # follow Semantic Versioning. They should reflect the version the application is using.
diff --git a/charts/llm-d-modelservice/templates/_helpers.tpl b/charts/llm-d-modelservice/templates/_helpers.tpl
index ba6d287e..3477979b 100644
--- a/charts/llm-d-modelservice/templates/_helpers.tpl
+++ b/charts/llm-d-modelservice/templates/_helpers.tpl
@@ -122,14 +122,87 @@ initContainers:
 {{- end }}
 {{- end }}
 
-{{/* Desired P/D tensor parallelism -- user set or defaults to 1 */}}
+{{/* Desired tensor parallelism --
+- if tensor set, return it
+- else return 1
+*/}}
 {{- define "llm-d-modelservice.tensorParallelism" -}}
-{{- if and . .tensor }}{{ .tensor }}{{ else }}1{{ end }}
+{{- if and . .tensor -}}
+{{ .tensor }}
+{{- else -}}
+1
+{{- end -}}
 {{- end }}
 
-{{/* Desired P/D data parallelism -- user set or defaults to 1 */}}
+{{/*
+Desired data parallelism --
+- if data set, return it
+- else if dataLocal and workers set, return dataLocal * workers
+- else if dataLocal set, return dataLocal (w = 1)
+- else return 1 (dpl = 1, w = 1)
+*/}}
 {{- define "llm-d-modelservice.dataParallelism" -}}
-{{- if and . .data }}{{ .data }}{{ else }}1{{ end }}
+{{- if and . .data -}}
+{{ .data }}
+{{- else if and . .dataLocal .workers -}}
+{{ mul .dataLocal .workers }}
+{{- else if and . .dataLocal -}}
+{{ .dataLocal }}
+{{- else -}}
+1
+{{- end -}}
+{{- end }}
+
+{{/*
+Desired data local parallelism --
+- if dataLocal set, return it
+- else if data and workers set, return data / workers
+- else if data set, return data (w = 1)
+- else return 1 (dp = 1, w = 1)
+*/}}
+{{- define "llm-d-modelservice.dataLocalParallelism" -}}
+{{- if and . .dataLocal -}}
+{{ .dataLocal }}
+{{- else if and . .data .workers -}}
+{{ $result :=  div (int .data) (int .workers) }}
+{{- if ne (int .data) (mul $result .workers) -}}
+{{- fail "parallelism.data must be a multiple of parallelism.workers" -}}
+{{- else -}}
+{{ $result }}
+{{- end -}}
+{{- else if and . .data -}}
+{{ .data }}
+{{- else -}}
+1
+{{- end -}}
+{{- end }}
+
+{{/*
+Desired number of workers --
+- if workers set, return it
+- else if data and dataLocal set, return data / dataLocal
+- else return 1 (dp = 1, dpl = 1)
+*/}}
+{{- define "llm-d-modelservice.numWorkers" -}}
+{{- if and . .workers -}}
+{{ .workers }}
+{{- else if and . .data .dataLocal -}}
+{{ $result :=  div (int .data) (int .dataLocal) }}
+{{- if ne (int .data) (mul $result .dataLocal) -}}
+{{- fail "parallelism.data must be a multiple of parallelism.dataLocal" -}}
+{{- else -}}
+{{ $result }}
+{{- end -}}
+{{- else -}}
+1
+{{- end -}}
+{{- end }}
+
+{{/*
+Required number of GPU per worker -- dpl * tp
+*/}}
+{{- define "llm-d-modelservice.numGpuPerWorker" -}}
+{{ mul  (include "llm-d-modelservice.dataLocalParallelism" .) (include "llm-d-modelservice.tensorParallelism" .) }}
 {{- end }}
 
 {{/*
@@ -172,21 +245,21 @@ nvidia.com/gpu
 
 {{/* P/D deployment container resources */}}
 {{- define "llm-d-modelservice.resources" -}}
-{{- $tensorParallelism := int (include "llm-d-modelservice.tensorParallelism" .parallelism) -}}
+{{- $numGpus := int (include "llm-d-modelservice.numGpuPerWorker" .parallelism) -}}
 {{- $acceleratorResource := include "llm-d-modelservice.acceleratorResource" . -}}
 {{- $limits := dict }}
 {{- if and .resources .resources.limits }}
 {{- $limits = deepCopy .resources.limits }}
 {{- end }}
-{{- if and (ge (int $tensorParallelism) 1) (ne $acceleratorResource "") }}
-{{- $limits = mergeOverwrite $limits (dict $acceleratorResource (toString $tensorParallelism)) }}
+{{- if and (ge (int $numGpus) 1) (ne $acceleratorResource "") }}
+{{- $limits = mergeOverwrite $limits (dict $acceleratorResource (toString $numGpus)) }}
 {{- end }}
 {{- $requests := dict }}
 {{- if and .resources .resources.requests }}
 {{- $requests = deepCopy .resources.requests }}
 {{- end }}
-{{- if and (ge (int $tensorParallelism) 1) (ne $acceleratorResource "") }}
-{{- $requests = mergeOverwrite $requests (dict $acceleratorResource (toString $tensorParallelism)) }}
+{{- if and (ge (int $numGpus) 1) (ne $acceleratorResource "") }}
+{{- $requests = mergeOverwrite $requests (dict $acceleratorResource (toString $numGpus)) }}
 {{- end }}
 resources:
   limits:
@@ -417,6 +490,16 @@ args:
   - --tensor-parallel-size
   - {{ $tensorParallelism | quote }}
   {{- end }}
+  {{- $dataParallelism := int (include "llm-d-modelservice.dataParallelism" .parallelism) -}}
+  {{- if gt (int $dataParallelism) 1 }}
+  - --data-parallel-size
+  - {{ $dataParallelism | quote }}
+  {{- end }}
+  {{- $dataLocalParallelism := int (include "llm-d-modelservice.dataLocalParallelism" .parallelism) -}}
+  {{- if gt (int $dataLocalParallelism) 1 }}
+  - --data-parallel-size-local
+  - {{ $dataLocalParallelism | quote }}
+  {{- end }}
   - --served-model-name
   - {{ .Values.modelArtifacts.name | quote }}
 {{- with .container.args }}
@@ -435,6 +518,16 @@ args:
   - --tensor-parallel-size
   - {{ $tensorParallelism | quote }}
   {{- end }}
+  {{- $dataParallelism := int (include "llm-d-modelservice.dataParallelism" .parallelism) -}}
+  {{- if gt (int $dataParallelism) 1 }}
+  - --data-parallel-size
+  - {{ $dataParallelism | quote }}
+  {{- end }}
+  {{- $dataLocalParallelism := int (include "llm-d-modelservice.dataLocalParallelism" .parallelism) -}}
+  {{- if gt (int $dataLocalParallelism) 1 }}
+  - --data-parallel-size-local
+  - {{ $dataLocalParallelism | quote }}
+  {{- end }}
   - --served-model-name
   - {{ .Values.modelArtifacts.name | quote }}
 {{- with .container.args }}
@@ -516,4 +609,6 @@ context is a dict with helm root context plus:
   value: {{ include "llm-d-modelservice.dataParallelism" .parallelism | quote }}
 - name: TP_SIZE
   value: {{ include "llm-d-modelservice.tensorParallelism" .parallelism | quote }}
+- name: DP_SIZE_LOCAL
+  value: {{ include "llm-d-modelservice.dataLocalParallelism" .parallelism | quote }}
 {{- end }} {{- /* define "llm-d-modelservice.parallelismEnv" */}}
diff --git a/charts/llm-d-modelservice/templates/decode-lws.yaml b/charts/llm-d-modelservice/templates/decode-lws.yaml
index d6b28555..2172169b 100644
--- a/charts/llm-d-modelservice/templates/decode-lws.yaml
+++ b/charts/llm-d-modelservice/templates/decode-lws.yaml
@@ -20,7 +20,7 @@ spec:
   replicas: {{ ternary .Values.decode.replicas 1 (hasKey .Values.decode "replicas") }}
   {{- end }}
   leaderWorkerTemplate:
-    size: {{ int (include "llm-d-modelservice.dataParallelism" .Values.decode.parallelism) }}
+    size: {{ int (include "llm-d-modelservice.numWorkers" .Values.decode.parallelism) }}
     {{- if .Values.decode.subGroupPolicy }}
     subGroupPolicy:
     {{- toYaml .Values.decode.subGroupPolicy | nindent 6 }}
diff --git a/charts/llm-d-modelservice/templates/prefill-lws.yaml b/charts/llm-d-modelservice/templates/prefill-lws.yaml
index 94e4f3ae..3ce797c4 100644
--- a/charts/llm-d-modelservice/templates/prefill-lws.yaml
+++ b/charts/llm-d-modelservice/templates/prefill-lws.yaml
@@ -20,7 +20,7 @@ spec:
   replicas: {{ ternary .Values.prefill.replicas 1 (hasKey .Values.prefill "replicas") }}
   {{- end }}
   leaderWorkerTemplate:
-    size: {{ int (include "llm-d-modelservice.dataParallelism" .Values.prefill.parallelism) }}
+    size: {{ int (include "llm-d-modelservice.numWorkers" .Values.prefill.parallelism) }}
     {{- if .Values.prefill.subGroupPolicy }}
     subGroupPolicy:
     {{- toYaml .Values.prefill.subGroupPolicy | nindent 6 }}
diff --git a/examples/output-cpu.yaml b/examples/output-cpu.yaml
index a7c8cf6c..3ff57c0b 100644
--- a/examples/output-cpu.yaml
+++ b/examples/output-cpu.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: cpu-sim-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: cpu-sim-llm-d-modelservice-decode
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -76,6 +76,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HOME
             value: /model-cache
@@ -99,7 +101,7 @@ kind: Deployment
 metadata:
   name: cpu-sim-llm-d-modelservice-prefill
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -142,6 +144,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HOME
             value: /model-cache
diff --git a/examples/output-dra.yaml b/examples/output-dra.yaml
index e773db26..e545b657 100644
--- a/examples/output-dra.yaml
+++ b/examples/output-dra.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: dra-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: dra-llm-d-modelservice-decode
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -78,6 +78,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HUB_CACHE
             value: /model-cache/
diff --git a/examples/output-gaudi.yaml b/examples/output-gaudi.yaml
new file mode 100644
index 00000000..b2897a89
--- /dev/null
+++ b/examples/output-gaudi.yaml
@@ -0,0 +1,101 @@
+# generated by generate-example-output.sh
+---
+# Source: llm-d-modelservice/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: gaudi-llm-d-modelservice
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+---
+# Source: llm-d-modelservice/templates/decode-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: gaudi-llm-d-modelservice-decode
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: random_model
+      llm-d.ai/role: decode
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: random_model
+        llm-d.ai/role: decode
+    spec:
+      
+    
+      serviceAccountName: gaudi-llm-d-modelservice
+      
+      volumes:
+        - emptyDir: {}
+          name: metrics-volume
+        - name: model-storage
+          persistentVolumeClaim:
+            claimName: model-pvc
+            readOnly: true
+      containers:
+        - name: vllm
+          image: opea/vllm-gaudi:1.22.0
+          
+          args:
+            - --model
+            - meta-llama/Llama-3.1-8B-Instruct
+            - --port
+            - "8000"
+            - --served-model-name
+            - "meta-llama/Llama-3.1-8B-Instruct"
+            
+            - --block-size=128
+            - --max-num-seqs=256
+            - --max-seq-len-to-capture=2048
+            - --max-model-len=2048
+            - --max-num-batched-token=16000
+          env:
+          - name: OMPI_MCA_btl_vader_single_copy_mechanism
+            value: none
+          - name: HABANA_LOGS
+            value: /tmp/habana_logs
+          - name: VLLM_SKIP_WARMUP
+            value: "true"
+          - name: DO_NOT_TRACK
+            value: "1"
+          - name: VLLM_USE_V1
+            value: "1"
+          - name: DP_SIZE
+            value: "1"
+          - name: TP_SIZE
+            value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
+          
+          - name: HF_HUB_CACHE
+            value: /model-cache/
+          - name: HF_TOKEN
+            valueFrom:
+              secretKeyRef:
+                name: llm-d-hf-token
+                key: HF_TOKEN
+          ports:
+          - containerPort: 8200
+            protocol: TCP
+          
+          resources:
+            limits:
+              habana.ai/gaudi: "1"
+            requests:
+              habana.ai/gaudi: "1"
+          
+          volumeMounts:
+            - name: model-storage
+              mountPath: /model-cache
diff --git a/examples/output-pd.yaml b/examples/output-pd.yaml
index 3a9294f8..a5ecd9ba 100644
--- a/examples/output-pd.yaml
+++ b/examples/output-pd.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: pd-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: pd-llm-d-modelservice-decode
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -92,6 +92,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HOME
             value: /model-cache
@@ -121,7 +123,7 @@ kind: Deployment
 metadata:
   name: pd-llm-d-modelservice-prefill
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -180,6 +182,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HOME
             value: /model-cache
diff --git a/examples/output-pvc-hf.yaml b/examples/output-pvc-hf.yaml
index cb3d97b9..76c318cb 100644
--- a/examples/output-pvc-hf.yaml
+++ b/examples/output-pvc-hf.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: pvc-hf-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: pvc-hf-llm-d-modelservice-decode
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -92,6 +92,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HUB_CACHE
             value: /model-cache/path/to/hf_hub_cache
@@ -121,7 +123,7 @@ kind: Deployment
 metadata:
   name: pvc-hf-llm-d-modelservice-prefill
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -180,6 +182,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HUB_CACHE
             value: /model-cache/path/to/hf_hub_cache
diff --git a/examples/output-pvc.yaml b/examples/output-pvc.yaml
index 2a7dd867..0e5ff307 100644
--- a/examples/output-pvc.yaml
+++ b/examples/output-pvc.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: pvc-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: pvc-llm-d-modelservice-decode
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -92,6 +92,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           ports:
           - containerPort: 8200
@@ -119,7 +121,7 @@ kind: Deployment
 metadata:
   name: pvc-llm-d-modelservice-prefill
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -178,6 +180,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           ports:
           - containerPort: 8000
diff --git a/examples/output-requester.yaml b/examples/output-requester.yaml
index 29456785..28339e62 100644
--- a/examples/output-requester.yaml
+++ b/examples/output-requester.yaml
@@ -6,7 +6,7 @@ kind: ServiceAccount
 metadata:
   name: requester-llm-d-modelservice
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 ---
@@ -81,6 +81,8 @@ spec:
                   value: "1"
                 - name: TP_SIZE
                   value: "1"
+                - name: DP_SIZE_LOCAL
+                  value: "1"
                 
                 - name: HF_HOME
                   value: /model-cache
@@ -138,7 +140,7 @@ kind: Deployment
 metadata:
   name: requester-llm-d-modelservice-prefill
   labels:
-    helm.sh/chart: llm-d-modelservice-v0.3.4
+    helm.sh/chart: llm-d-modelservice-v0.3.5
     app.kubernetes.io/version: "v0.2.0"
     app.kubernetes.io/managed-by: Helm
 spec:
@@ -197,6 +199,8 @@ spec:
             value: "1"
           - name: TP_SIZE
             value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
           
           - name: HF_HOME
             value: /model-cache
diff --git a/examples/output-xpu-pd.yaml b/examples/output-xpu-pd.yaml
new file mode 100644
index 00000000..2ce8dd30
--- /dev/null
+++ b/examples/output-xpu-pd.yaml
@@ -0,0 +1,271 @@
+# generated by generate-example-output.sh
+---
+# Source: llm-d-modelservice/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: xpu-pd-llm-d-modelservice
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+---
+# Source: llm-d-modelservice/templates/decode-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: xpu-pd-llm-d-modelservice-decode
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: microsoft-dialogpt-large
+      llm-d.ai/role: decode
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: microsoft-dialogpt-large
+        llm-d.ai/role: decode
+    spec:
+      initContainers:
+        - name: routing-proxy
+          args:
+            - --port=8000
+            - --vllm-port=8200
+            - --connector=nixlv2
+            - -v=5
+            - --secure-proxy=false
+          image: ghcr.io/llm-d/llm-d-routing-sidecar:v0.2.0
+          imagePullPolicy: Always
+          ports:
+            - containerPort: 8000
+          resources: {}
+          restartPolicy: Always
+          securityContext:
+            allowPrivilegeEscalation: false
+            runAsNonRoot: true
+    
+      serviceAccountName: xpu-pd-llm-d-modelservice
+      
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                - key: accelerator
+                  operator: In
+                  values:
+                    - intel-xpu
+      volumes:
+        - emptyDir: {}
+          name: metrics-volume
+        - name: model-storage
+          emptyDir:
+            sizeLimit: 10Gi
+        
+      containers:
+        - name: vllm
+          image: ghcr.io/llm-d/llm-d-xpu:v0.2.0
+          imagePullPolicy: Never
+          
+          command:
+            - python3
+            - -m
+            - vllm.entrypoints.openai.api_server
+          args:
+            - --model
+            - microsoft/DialoGPT-large
+            - --enforce-eager
+            - --tensor-parallel-size
+            - "1"
+            - --port
+            - "8200"
+            - --host
+            - 0.0.0.0
+            - --kv-transfer-config
+            - '{"kv_connector":"NixlConnector", "kv_role":"kv_consumer"}'
+          env:
+          - name: ZE_AFFINITY_MASK
+            value: "0"
+          - name: ZE_ENABLE_PCI_ID_DEVICE_ORDER
+            value: "1"
+          - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+            value: "5557"
+          - name: VLLM_LOGGING_LEVEL
+            value: DEBUG
+          - name: TORCH_LLM_ALLREDUCE
+            value: "1"
+          - name: VLLM_USE_V1
+            value: "1"
+          - name: CCL_ZE_IPC_EXCHANGE
+            value: pidfd
+          - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+            value: "1"
+          - name: VLLM_WORKER_MULTIPROC_METHOD
+            value: spawn
+          - name: DP_SIZE
+            value: "1"
+          - name: TP_SIZE
+            value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
+          
+          - name: HF_HOME
+            value: /model-cache
+          
+          - name: VLLM_USE_V1
+            value: "1"
+          - name: TORCH_LLM_ALLREDUCE
+            value: "1"
+          - name: VLLM_WORKER_MULTIPROC_METHOD
+            value: "spawn"
+          ports:
+          - containerPort: 8200
+            protocol: TCP
+          - containerPort: 5557
+            protocol: TCP
+          
+          resources:
+            limits:
+              cpu: "8"
+              gpu.intel.com/i915: "1"
+              memory: 24Gi
+            requests:
+              cpu: "4"
+              gpu.intel.com/i915: "1"
+              memory: 12Gi
+          
+          volumeMounts:
+            - name: model-storage
+              mountPath: /model-cache
+---
+# Source: llm-d-modelservice/templates/prefill-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: xpu-pd-llm-d-modelservice-prefill
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: microsoft-dialogpt-large
+      llm-d.ai/role: prefill
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: microsoft-dialogpt-large
+        llm-d.ai/role: prefill
+    spec:
+    
+      serviceAccountName: xpu-pd-llm-d-modelservice
+      
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                - key: accelerator
+                  operator: In
+                  values:
+                    - intel-xpu
+      volumes:
+        - emptyDir: {}
+          name: metrics-volume
+        - name: model-storage
+          emptyDir:
+            sizeLimit: 10Gi
+        
+      containers:
+        - name: vllm
+          image: ghcr.io/llm-d/llm-d-xpu:v0.2.0
+          
+          command:
+            - python3
+            - -m
+            - vllm.entrypoints.openai.api_server
+          args:
+            - --model
+            - microsoft/DialoGPT-large
+            - --enforce-eager
+            - --tensor-parallel-size
+            - "1"
+            - --port
+            - "8000"
+            - --host
+            - 0.0.0.0
+            - --kv-transfer-config
+            - '{"kv_connector":"NixlConnector", "kv_role":"kv_producer"}'
+          env:
+          - name: ZE_AFFINITY_MASK
+            value: "1"
+          - name: ZE_ENABLE_PCI_ID_DEVICE_ORDER
+            value: "1"
+          - name: VLLM_NIXL_SIDE_CHANNEL_PORT
+            value: "5557"
+          - name: VLLM_NIXL_SIDE_CHANNEL_HOST
+            valueFrom:
+              fieldRef:
+                fieldPath: status.podIP
+          - name: VLLM_LOGGING_LEVEL
+            value: DEBUG
+          - name: TORCH_LLM_ALLREDUCE
+            value: "1"
+          - name: VLLM_USE_V1
+            value: "1"
+          - name: CCL_ZE_IPC_EXCHANGE
+            value: pidfd
+          - name: VLLM_ALLOW_LONG_MAX_MODEL_LEN
+            value: "1"
+          - name: VLLM_WORKER_MULTIPROC_METHOD
+            value: spawn
+          - name: DP_SIZE
+            value: "1"
+          - name: TP_SIZE
+            value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
+          
+          - name: HF_HOME
+            value: /model-cache
+          
+          - name: VLLM_USE_V1
+            value: "1"
+          - name: TORCH_LLM_ALLREDUCE
+            value: "1"
+          - name: VLLM_WORKER_MULTIPROC_METHOD
+            value: "spawn"
+          ports:
+          - containerPort: 8000
+            protocol: TCP
+          - containerPort: 5557
+            protocol: TCP
+          
+          resources:
+            limits:
+              cpu: "16"
+              gpu.intel.com/i915: "1"
+              memory: 32Gi
+            requests:
+              cpu: "8"
+              gpu.intel.com/i915: "1"
+              memory: 16Gi
+          
+          volumeMounts:
+            - name: model-storage
+              mountPath: /model-cache
diff --git a/examples/output-xpu.yaml b/examples/output-xpu.yaml
new file mode 100644
index 00000000..148fbc61
--- /dev/null
+++ b/examples/output-xpu.yaml
@@ -0,0 +1,109 @@
+# generated by generate-example-output.sh
+---
+# Source: llm-d-modelservice/templates/serviceaccount.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: xpu-llm-d-modelservice
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+---
+# Source: llm-d-modelservice/templates/decode-deployment.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: xpu-llm-d-modelservice-decode
+  labels:
+    helm.sh/chart: llm-d-modelservice-v0.3.5
+    app.kubernetes.io/version: "v0.2.0"
+    app.kubernetes.io/managed-by: Helm
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      llm-d.ai/inferenceServing: "true"
+      llm-d.ai/model: deepseek-ai-deepSeek-r1-distill-qwen-1-5B
+      llm-d.ai/role: decode
+  template:
+    metadata:
+      labels:
+        llm-d.ai/inferenceServing: "true"
+        llm-d.ai/model: deepseek-ai-deepSeek-r1-distill-qwen-1-5B
+        llm-d.ai/role: decode
+    spec:
+      
+    
+      serviceAccountName: xpu-llm-d-modelservice
+      
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                - key: accelerator
+                  operator: In
+                  values:
+                    - intel-xpu
+                    - intel-gpu-max
+      volumes:
+        - emptyDir: {}
+          name: metrics-volume
+        - name: model-storage
+          emptyDir:
+            sizeLimit: 10Gi
+        
+      containers:
+        - name: vllm
+          image: ghcr.io/llm-d/llm-d-xpu:v0.2.0
+          
+          args:
+            - --model
+            - deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
+            - --port
+            - "8000"
+            - --served-model-name
+            - "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
+            
+            - --enforce-eager
+            - --dtype
+            - float16
+            - --disable-sliding-window
+            - --gpu-memory-util
+            - "0.9"
+            - --no-enable-prefix-caching
+            - --max-num-batched-tokens
+            - "4096"
+            - --disable-log-requests
+            - --max-model-len
+            - "4096"
+            - --block-size
+            - "64"
+          env:
+          - name: DP_SIZE
+            value: "1"
+          - name: TP_SIZE
+            value: "1"
+          - name: DP_SIZE_LOCAL
+            value: "1"
+          
+          - name: HF_HOME
+            value: /model-cache
+          ports:
+          - containerPort: 8200
+            protocol: TCP
+          
+          resources:
+            limits:
+              cpu: "8"
+              gpu.intel.com/xe: "1"
+              memory: 24Gi
+            requests:
+              cpu: "4"
+              gpu.intel.com/xe: "1"
+              memory: 12Gi
+          
+          volumeMounts:
+            - name: model-storage
+              mountPath: /model-cache
diff --git a/examples/values-cpu.yaml b/examples/values-cpu.yaml
index c79a7e9f..561d6cdf 100644
--- a/examples/values-cpu.yaml
+++ b/examples/values-cpu.yaml
@@ -20,6 +20,9 @@ routing:
   proxy:
     secure: false
 
+accelerator:
+  type: cpu
+
 # Decode pod configuation
 decode:
   replicas: 1
diff --git a/examples/values-pd.yaml b/examples/values-pd.yaml
index 7c1f8355..18097ee5 100644
--- a/examples/values-pd.yaml
+++ b/examples/values-pd.yaml
@@ -57,11 +57,9 @@ decode:
       limits:
         memory: 16Gi
         cpu: "16"
-        nvidia.com/gpu: "1"
       requests:
         cpu: "16"
         memory: 16Gi
-        nvidia.com/gpu: "1"
     mountModelVolume: true
 
 # Prefill pod configuation
@@ -98,9 +96,7 @@ prefill:
       limits:
         memory: 16Gi
         cpu: "16"
-        nvidia.com/gpu: "1"
       requests:
         cpu: "16"
         memory: 16Gi
-        nvidia.com/gpu: "1"
     mountModelVolume: true
diff --git a/examples/values-xpu-pd.yaml b/examples/values-xpu-pd.yaml
index 74e4f8b2..42006cae 100644
--- a/examples/values-xpu-pd.yaml
+++ b/examples/values-xpu-pd.yaml
@@ -145,11 +145,9 @@ prefill:
       limits:
         memory: 32Gi
         cpu: "16"
-        gpu.intel.com/i915: "1"
       requests:
         cpu: "8"
         memory: 16Gi
-        gpu.intel.com/i915: "1"
     mountModelVolume: true
 
   acceleratorTypes:
diff --git a/examples/values-xpu.yaml b/examples/values-xpu.yaml
index 82719ebf..6b71b07d 100644
--- a/examples/values-xpu.yaml
+++ b/examples/values-xpu.yaml
@@ -53,11 +53,9 @@ decode:
       limits:
         memory: 24Gi
         cpu: "8"
-        gpu.intel.com/xe: "1"
       requests:
         cpu: "4"
         memory: 12Gi
-        gpu.intel.com/xe: "1"
 
     mountModelVolume: true
 
diff --git a/hack/generate-example-output.sh b/hack/generate-example-output.sh
index 8646f406..3b852c03 100755
--- a/hack/generate-example-output.sh
+++ b/hack/generate-example-output.sh
@@ -19,9 +19,21 @@ generate_output() {
 # Generate output-cpu.yaml (Simulated CPU deployment)
 generate_output "cpu-sim" "examples/values-cpu.yaml" "examples/output-cpu.yaml"
 
+# Generate output-dra.yaml (Dynamic Resource Allocation deployment for Intel B50 GPU device)
+generate_output "dra" "examples/values-dra.yaml" "examples/output-dra.yaml" '--set modelArtifacts.uri=pvc+hf://model-pvc/meta-llama/Llama-3.1-8B-Instruct'
+
+# Generate output-gaudi.yaml (Intel Gaudi)
+generate_output "gaudi" "examples/values-gaudi.yaml" "examples/output-gaudi.yaml"
+
 # Generate output-pd.yaml (PD deployment)
 generate_output "pd" "examples/values-pd.yaml" "examples/output-pd.yaml"
 
+# Generate output-xpu-pd.yaml (Intel i915)
+generate_output "xpu-pd" "examples/values-xpu-pd.yaml" "examples/output-xpu-pd.yaml"
+
+# Generate output-xpu.yaml (Intel Xe GPU and affinity)
+generate_output "xpu" "examples/values-xpu.yaml" "examples/output-xpu.yaml"
+
 # Generate output-requester.yaml (Requester deployment)
 generate_output "requester" "examples/values-requester.yaml" "examples/output-requester.yaml"
 
@@ -30,6 +42,3 @@ generate_output "pvc" "examples/values-pd.yaml" "examples/output-pvc.yaml" '--se
 
 # Generate output-pvc-hf.yaml (PVC HuggingFace model deployment)
 generate_output "pvc-hf" "examples/values-pd.yaml" "examples/output-pvc-hf.yaml" '--set modelArtifacts.uri=pvc+hf://pvc-name/path/to/hf_hub_cache/facebook/opt-125m'
-
-# Generate output-dra.yaml (Dynamic Resource Allocation deployment for Intel B50 GPU device)
-generate_output "dra" "examples/values-dra.yaml" "examples/output-dra.yaml" '--set modelArtifacts.uri=pvc+hf://model-pvc/meta-llama/Llama-3.1-8B-Instruct'