Skip to content

Commit a115250

Browse files
Re-integrate HPU after upstream refactors (vllm-project#20)
* Fix setup.py for HPU * Fix vllm._C import ops -> vllm.hpu import ops * more of the same thing * re-add hpex rmsnorm and rope; but rope is crashing * remove unnecessary comments * add vllm/hpu files * add hpu autodetection * Add HabanaAttention stub * revert accidental changes * revert non-habana backend attention changes * add habana attention/worker/executor, sampling fails now * Restore unnecessarily changed files * enable HabanaMemoryProfiler * Make sampler pass * restore habana fused rope * prefill is now working!!! * fix prefill padding; decode is now working!!!!! * revert accidental changes * remove unused stuff in habana_paged_attn.py * remove diagnostic stuff from llm_engine.py * use HabanaExecutorAsync in async_llm_engine.py * add habana copyright headers to habana_*.py files * fix prefill attention conformance * minor naming fixes * remove naive attention from habana_attn (it never worked anyway) * re-enable profile run * Add fake HPUGraph support * add more metrics * indentation fix * ~~recipe cache metrics don't work lalalala~~ * i'm done with metrics for now * fix corner case in which hl-smi is not available but synapse is * FIXME: temporary setup.py workaround * WIP: add tensor parallelism stubs * habana worker cleanup * tensor parallelism is now working * remove unused files * remove unused func * add hpugraphrunner * improve hpu layernorm * Port pipelined PA * Port context length bucketing * remove cudagraphrunner from hpu runner * restore HPUGraphRunner back from FakeHPUGraphRunner * handle rotary embeddings properly on gaudi3 * oopsie! captured_block_counts was incorrect! * captured_block_counts.append doesn't do anything * Restore habana_main KV cache memory layout * fix memory profiler * overhaul hpugraph capture * memory profiling overhaul * format memory properly in model warmup * add graph compilation profiler for graph capture phase * adroll back log lvl on graph capture message * Remove unnecessary view on residual connection in RMSNorm (vllm-project#25) --------- Co-authored-by: madamczykhabana <[email protected]>
1 parent 01bfb22 commit a115250

36 files changed

+4045
-113
lines changed

pyproject.toml

Lines changed: 0 additions & 57 deletions
This file was deleted.

requirements-hpu.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
cmake>=3.21
2+
ninja # For faster builds.
3+
psutil
4+
ray == 2.9.3
5+
sentencepiece # Required for LLaMA tokenizer.
6+
numpy
7+
fastapi
8+
uvicorn[standard]
9+
pydantic >= 2.0 # Required for OpenAI server.
10+
prometheus_client >= 0.18.0
11+
pynvml == 11.5.0
12+
triton >= 2.1.0
13+
outlines == 0.0.34
14+
pandas
15+
tabulate

setup.py

Lines changed: 35 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -174,8 +174,19 @@ def build_extensions(self) -> None:
174174
subprocess.check_call(['cmake', *build_args], cwd=self.build_temp)
175175

176176

177+
def _is_hpu() -> bool:
178+
return True
179+
is_hpu_available = True
180+
try:
181+
subprocess.run(["hl-smi"], capture_output=True, check=True)
182+
except (FileNotFoundError, PermissionError, subprocess.CalledProcessError):
183+
if not os.path.exists('/dev/hl0') and not os.path.exists('/dev/hl_controlD0'):
184+
is_hpu_available = False
185+
return is_hpu_available
186+
187+
177188
def _is_cuda() -> bool:
178-
return torch.version.cuda is not None and not _is_neuron()
189+
return torch.version.cuda is not None and not _is_neuron() and not _is_hpu()
179190

180191

181192
def _is_hip() -> bool:
@@ -190,7 +201,6 @@ def _is_neuron() -> bool:
190201
torch_neuronx_installed = False
191202
return torch_neuronx_installed
192203

193-
194204
def _install_punica() -> bool:
195205
return bool(int(os.getenv("VLLM_INSTALL_PUNICA_KERNELS", "0")))
196206

@@ -265,6 +275,17 @@ def find_version(filepath: str) -> str:
265275
return version_match.group(1)
266276
raise RuntimeError("Unable to find version string.")
267277

278+
def get_gaudi_sw_version():
279+
"""
280+
Returns the driver version.
281+
"""
282+
# Enable console printing for `hl-smi` check
283+
output = subprocess.run(
284+
"hl-smi", shell=True, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={"ENABLE_CONSOLE": "true"}
285+
)
286+
if output.returncode == 0 and output.stdout:
287+
return output.stdout.split("\n")[2].replace(" ", "").split(":")[1][:-1].split("-")[0]
288+
return "0.0.0" # when hl-smi is not available
268289

269290
def get_vllm_version() -> str:
270291
version = find_version(get_path("vllm", "__init__.py"))
@@ -286,6 +307,12 @@ def get_vllm_version() -> str:
286307
if neuron_version != MAIN_CUDA_VERSION:
287308
neuron_version_str = neuron_version.replace(".", "")[:3]
288309
version += f"+neuron{neuron_version_str}"
310+
elif _is_hpu():
311+
# Get the Intel Gaudi Software Suite version
312+
gaudi_sw_version = str(get_gaudi_sw_version())
313+
if gaudi_sw_version != MAIN_CUDA_VERSION:
314+
gaudi_sw_version = gaudi_sw_version.replace(".", "")[:3]
315+
version += f"+gaudi{gaudi_sw_version}"
289316
else:
290317
raise RuntimeError("Unknown runtime environment")
291318

@@ -318,9 +345,12 @@ def get_requirements() -> List[str]:
318345
elif _is_neuron():
319346
with open(get_path("requirements-neuron.txt")) as f:
320347
requirements = f.read().strip().split("\n")
348+
elif _is_hpu():
349+
with open(get_path("requirements-hpu.txt")) as f:
350+
requirements = f.read().strip().split("\n")
321351
else:
322352
raise ValueError(
323-
"Unsupported platform, please use CUDA, ROCM or Neuron.")
353+
"Unsupported platform, please use CUDA, ROCM, Neuron or HPU.")
324354

325355
return requirements
326356

@@ -333,7 +363,7 @@ def get_requirements() -> List[str]:
333363
if _install_punica():
334364
ext_modules.append(CMakeExtension(name="vllm._punica_C"))
335365

336-
if not _is_neuron():
366+
if not (_is_neuron() or _is_hpu()):
337367
ext_modules.append(CMakeExtension(name="vllm._C"))
338368

339369
package_data = {
@@ -369,6 +399,6 @@ def get_requirements() -> List[str]:
369399
python_requires=">=3.8",
370400
install_requires=get_requirements(),
371401
ext_modules=ext_modules,
372-
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
402+
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() or _is_hpu() else {},
373403
package_data=package_data,
374404
)

0 commit comments

Comments
 (0)