Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
easyblock = 'PythonPackage'

name = 'torchtext'
version = '0.14.1'
local_pytorch_version = '1.12.0'
versionsuffix = '-PyTorch-%s' % local_pytorch_version

homepage = 'https://github.com/pytorch/text'
description = "Data loaders and abstractions for text and NLP"

toolchain = {'name': 'foss', 'version': '2022a'}

# Sources are no longer available in PyPI, it only has wheels
source_urls = ['https://github.com/pytorch/text/archive/']
sources = ['v%(version)s.tar.gz']
patches = ['torchtext-0.14.1_deps_fix.patch']
checksums = [
{'v0.14.1.tar.gz': 'fd1ef3da7d9c20408c740f7dc7d02ad52a6048b46368355a1a7326d3bc4f2e63'},
{'torchtext-0.14.1_deps_fix.patch': '3830747bfb3624a94efa8c8132b1764277b38f3b80cc9f452fba2475465d7ec1'},
]

builddependencies = [
('RE2', '2022-06-01'),
('binutils', '2.38'),
('CMake', '3.24.3'),
]

dependencies = [
('Python', '3.10.4'),
('SciPy-bundle', '2022.05'),
('SentencePiece', '0.1.97'),
('tqdm', '4.64.0'),
('PyTorch', local_pytorch_version),
('double-conversion', '3.2.0'),
('utf8proc', '2.7.0'),
]

download_dep_fail = True,
use_pip = True
sanity_pip_check = True,

# Disable bundled libraries to use those from EB: RE2, SentencePiece
preinstallopts = "sed -i '/third_party/d;/^_init_submodule/d' setup.py && "

moduleclass = 'tools'
54 changes: 54 additions & 0 deletions easybuild/easyconfigs/t/torchtext/torchtext-0.14.1_deps_fix.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
The error "string_view has not been declared in std" appears when not using C++17
diff -u text/CMakeLists.txt.orig text/CMakeLists.txt
--- text/CMakeLists.txt.orig 2023-03-08 14:33:35.618931003 +0100
+++ text/CMakeLists.txt 2023-03-08 14:35:03.169072623 +0100
@@ -24,10 +24,10 @@
if(env_cxx_standard GREATER -1)
message(
WARNING "C++ standard version definition detected in environment variable."
- "PyTorch requires -std=c++14. Please remove -std=c++ settings in your environment.")
+ "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment.")
endif()

-set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_CXX_STANDARD 17)
set(CMAKE_C_STANDARD 11)

set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -63,5 +63,10 @@

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TORCH_COMPILED_WITH_CXX_ABI} -Wall ${TORCH_CXX_FLAGS}")

-add_subdirectory(third_party)
+find_package(double-conversion)
+find_package(re2)
+find_library(SENTENCEPIECE_LIBRARY sentencepiece PATHS $ENV{EBROOTSENTENCEPIECE}/lib64)
+find_library(SENTENCEPIECE_TRAIN_LIBRARY sentencepiece_train PATHS $ENV{EBROOTSENTENCEPIECE}/lib64)
+find_library(UTF8PROC_LIBRARY utf8proc PATHS $ENV{UTF8PROC}/lib64)
+
add_subdirectory(torchtext/csrc)
diff -u text/torchtext/csrc/CMakeLists.txt.orig text/torchtext/csrc/CMakeLists.txt
--- text/torchtext/csrc/CMakeLists.txt.orig 2023-03-08 14:11:53.387582628 +0100
+++ text/torchtext/csrc/CMakeLists.txt 2023-03-08 14:16:11.159140070 +0100
@@ -18,10 +18,6 @@
set(
LIBTORCHTEXT_INCLUDE_DIRS
${PROJECT_SOURCE_DIR}
- ${PROJECT_SOURCE_DIR}/third_party/sentencepiece/src
- $<TARGET_PROPERTY:re2,INCLUDE_DIRECTORIES>
- $<TARGET_PROPERTY:double-conversion,INCLUDE_DIRECTORIES>
- $<TARGET_PROPERTY:utf8proc,INCLUDE_DIRECTORIES>
${TORCH_INSTALL_PREFIX}/include
${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include
)
@@ -119,10 +115,6 @@
set(
EXTENSION_INCLUDE_DIRS
${PROJECT_SOURCE_DIR}
- ${PROJECT_SOURCE_DIR}/third_party/sentencepiece/src
- $<TARGET_PROPERTY:re2,INCLUDE_DIRECTORIES>
- $<TARGET_PROPERTY:double-conversion,INCLUDE_DIRECTORIES>
- $<TARGET_PROPERTY:utf8proc,INCLUDE_DIRECTORIES>
${TORCH_INSTALL_PREFIX}/include
${TORCH_INSTALL_PREFIX}/include/torch/csrc/api/include
)