@@ 6,7 6,7 @@ is later corrected. codegen_external.py is patched to avoid duplicate
functions and add the static keyword as in the existing generated file.
diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
-index cc0263dbbf..ac34e84b82 100644
+index cc0263dbb..ac34e84b8 100644
--- a/tools/gen_flatbuffers.sh
+++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@
@@ 32,10 32,10 @@ index cc0263dbbf..ac34e84b82 100644
-c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
-index 120520b139..0c8587f02d 100644
+index 5dcf1b284..0e20b0c10 100644
--- a/torch/csrc/jit/tensorexpr/codegen_external.py
+++ b/torch/csrc/jit/tensorexpr/codegen_external.py
-@@ -16,9 +16,14 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
native_functions = parse_native_yaml(native_functions_path, tags_path)
func_decls = []
func_registrations = []
@@ 51,7 51,7 @@ index 120520b139..0c8587f02d 100644
args = schema.arguments
# Only supports extern calls for functions with out variants
if not schema.is_out_fn():
-@@ -48,7 +53,7 @@ def gen_external(native_functions_path, tags_path, external_path):
+@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
# print(tensor_decls, name, arg_names)
func_decl = f"""\
@@ 61,7 61,7 @@ index 120520b139..0c8587f02d 100644
void** buf_data,
int64_t* buf_ranks,
diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
-index 7cfbb803f9..2e69bb1868 100644
+index b42948045..e1cfc73a5 100644
--- a/torchgen/decompositions/gen_jit_decompositions.py
+++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@
@@ 76,9 76,9 @@ index 7cfbb803f9..2e69bb1868 100644
+else:
+ decomposition_table = {}
- # from torchgen.code_template import CodeTemplate
-@@ -85,7 +89,7 @@ def write_decomposition_util_file(path: str) -> None:
+ # from torchgen.code_template import CodeTemplate
+@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
def main() -> None:
@@ 88,40 88,41 @@ index 7cfbb803f9..2e69bb1868 100644
write_decomposition_util_file(str(upgrader_path))
diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
-index e5287cffc5..57f3c38096 100644
+index 845034cb7..a1c5767c2 100644
--- a/torchgen/operator_versions/gen_mobile_upgraders.py
+++ b/torchgen/operator_versions/gen_mobile_upgraders.py
-@@ -2,10 +2,12 @@
- import os
+@@ -6,10 +6,13 @@ import os
from enum import Enum
+ from operator import itemgetter
from pathlib import Path
+import sys
- from typing import Any, Dict, List
+ from typing import Any
-import torch
-from torch.jit.generate_bytecode import generate_upgraders_bytecode
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ import torch
+ from torch.jit.generate_bytecode import generate_upgraders_bytecode
-
++
from torchgen.code_template import CodeTemplate
from torchgen.operator_versions.gen_mobile_upgraders_constant import (
-@@ -262,7 +264,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
+ MOBILE_UPGRADERS_HEADER_DESCRIPTION,
+@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
def construct_version_maps(
- upgrader_bytecode_function_to_index_map: Dict[str, Any]
+ upgrader_bytecode_function_to_index_map: dict[str, Any],
) -> str:
- version_map = torch._C._get_operator_version_map()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ version_map = torch._C._get_operator_version_map()
+ else:
+ version_map = {}
- sorted_version_map_ = sorted(version_map.items(), key=lambda item: item[0]) # type: ignore[no-any-return]
- sorted_version_map = {name: lst for name, lst in sorted_version_map_}
+ sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0)) # type: ignore[no-any-return]
+ sorted_version_map = dict(sorted_version_map_)
-@@ -379,7 +384,10 @@ def sort_upgrader(upgrader_list: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
- def main() -> None:
+ def main() -> None:
- upgrader_list = generate_upgraders_bytecode()
+ if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ upgrader_list = generate_upgraders_bytecode()
@@ 131,16 132,24 @@ index e5287cffc5..57f3c38096 100644
for up in sorted_upgrader_list:
print("after sort upgrader : ", next(iter(up)))
diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
-index c6336a6951..34e394d818 100644
+index 56a3d8bf0..ffd0785fd 100644
--- a/torchgen/shape_functions/gen_jit_shape_functions.py
+++ b/torchgen/shape_functions/gen_jit_shape_functions.py
-@@ -18,16 +18,20 @@ you are in the root directory of the Pytorch git repo"""
+@@ -1,6 +1,7 @@
+ #!/usr/bin/env python3
+ import os
+ import sys
++import importlib
+ from importlib.util import module_from_spec, spec_from_file_location
+ from itertools import chain
+ from pathlib import Path
+@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
if not file_path.exists():
- raise Exception(err_msg)
+ raise Exception(err_msg) # noqa: TRY002
--spec = importlib.util.spec_from_file_location(module_name, file_path)
+-spec = spec_from_file_location(module_name, file_path)
-assert spec is not None
--module = importlib.util.module_from_spec(spec)
+-module = module_from_spec(spec)
-sys.modules[module_name] = module
-assert spec.loader is not None
-assert module is not None
@@ 148,6 157,7 @@ index c6336a6951..34e394d818 100644
-
-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
-shape_compute_graph_mapping = module.shape_compute_graph_mapping
+-
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+ spec = importlib.util.spec_from_file_location(module_name, file_path)
+ assert spec is not None
@@ 159,9 169,10 @@ index c6336a6951..34e394d818 100644
+
+ bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+ shape_compute_graph_mapping = module.shape_compute_graph_mapping
++
+else:
+ bounded_compute_graph_mapping = {}
+ shape_compute_graph_mapping = {}
-
SHAPE_HEADER = r"""
+ /**
@@ 1,16 1,14 @@
Patch build files to also system libraries instead of bundled ones for the
libraries not supported or working only by specifying USE_SYSTEM_LIBS. This
includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
-ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool, qnnpack,
+httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.
-For QNNPACK, two versions were bundled and are required: The upstream one and
-an internal fork (now in the package qnnpack-pytorch).
diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
-index 96fc297..7f27b66 100644
+index 085af373e..3287429b4 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
-@@ -362,9 +362,9 @@ if(AT_NNPACK_ENABLED)
+@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
endif()
@@ 21,13 19,13 @@ index 96fc297..7f27b66 100644
+ list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
+endif(USE_MKLDNN)
- list(APPEND ATen_CPU_DEPENDENCY_LIBS cpuinfo)
-
+ if(USE_MKLDNN_ACL)
+ list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
-index 221e3f3..417f601 100644
+index d2d23b7ab..1a7e5a042 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
-@@ -110,9 +110,6 @@ if(NOT MSVC AND USE_XNNPACK)
+@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
if(NOT TARGET fxdiv)
set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
@@ 37,7 35,7 @@ index 221e3f3..417f601 100644
endif()
endif()
-@@ -975,7 +972,6 @@ elseif(USE_CUDA)
+@@ -1135,7 +1132,6 @@ if(USE_XPU)
endif()
if(NOT MSVC AND USE_XNNPACK)
@@ 45,15 43,26 @@ index 221e3f3..417f601 100644
endif()
# ==========================================================
-@@ -1314,6 +1310,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
+@@ -1254,8 +1250,8 @@ endif()
+ target_include_directories(torch_cpu PRIVATE
+ ${TORCH_ROOT}/third_party/cpp-httplib)
+
+-target_include_directories(torch_cpu PRIVATE
+- ${TORCH_ROOT}/third_party/nlohmann/include)
++find_package(httplib REQUIRED)
++target_link_libraries(torch_cpu PUBLIC httplib::httplib)
+
+ install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
+ DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
+@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
+target_link_libraries(torch_cpu PRIVATE miniz clog)
- target_include_directories(torch_cpu INTERFACE $<INSTALL_INTERFACE:include>)
- target_include_directories(torch_cpu PRIVATE ${Caffe2_CPU_INCLUDE})
- target_include_directories(torch_cpu SYSTEM PRIVATE "${Caffe2_DEPENDENCY_INCLUDE}")
-@@ -1570,7 +1567,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
+ if(USE_MPI)
+ target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
+ endif()
+@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
target_link_libraries(static_runtime_bench torch_library benchmark)
@@ 61,8 70,8 @@ index 221e3f3..417f601 100644
+ target_link_libraries(static_runtime_test torch_library gtest_main gtest)
endif()
- if(BUILD_TENSOREXPR_BENCHMARK)
-@@ -1601,7 +1598,7 @@ if(BUILD_MOBILE_TEST)
+ if(BUILD_MOBILE_BENCHMARK)
+@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
foreach(test_src ${ATen_MOBILE_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
@@ 71,32 80,61 @@ index 221e3f3..417f601 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
-@@ -1622,13 +1619,13 @@ if(BUILD_TEST)
+@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
if(NOT MSVC)
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
# TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
-- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main)
-+ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest)
+- target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
++ target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann)
if(USE_FBGEMM)
target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
endif()
+@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
+ endif()
else()
add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
-- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main)
-+ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library gtest_main gtest)
+- target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
++ target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
endif()
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
-@@ -1645,7 +1642,7 @@ if(BUILD_TEST)
+@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_CPU_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
- target_link_libraries(${test_name} torch_library gtest_main)
+ target_link_libraries(${test_name} torch_library gtest_main gtest)
- target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
- target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
- target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
-@@ -1703,7 +1700,7 @@ if(BUILD_TEST)
+ if(NOT MSVC)
+ target_link_libraries(${test_name} stdc++)
+ endif()
+@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
+ add_executable(${test_name} "${test_src}")
+ find_library(metal NAMES Metal)
+ find_library(foundation NAMES Foundation)
+- target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
++ target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
+ target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
+ target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_GPU_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main gtest)
+ if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
+ target_link_libraries(${test_name} torch::cudnn)
+ endif()
+@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_XPU_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main gtest)
+ target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
+ add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
get_filename_component(test_name ${test_src} NAME_WE)
add_executable(${test_name} "${test_src}")
@@ 105,23 143,32 @@ index 221e3f3..417f601 100644
target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
+ foreach(test_src ${Caffe2_HIP_TEST_SRCS})
+ get_filename_component(test_name ${test_src} NAME_WE)
+ add_executable(${test_name} "${test_src}")
+- target_link_libraries(${test_name} torch_library gtest_main)
++ target_link_libraries(${test_name} torch_library gtest_main gtest)
+ target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
+ target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
+ target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
-index 1552b59..67e1a9a 100644
+index ebbff0f29..dcded2590 100644
--- a/caffe2/serialize/CMakeLists.txt
+++ b/caffe2/serialize/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
list(APPEND Caffe2_CPU_SRCS
-- ${PROJECT_SOURCE_DIR}/third_party/miniz-2.1.0/miniz.c
+- ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
-index 8c0e3c2..d65576a 100644
+index be45936a8..bb1aa1cc1 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
-@@ -298,7 +298,7 @@ endif()
+@@ -276,7 +276,7 @@ endif()
# --- [ PocketFFT
set(AT_POCKETFFT_ENABLED 0)
if(NOT AT_MKL_ENABLED)
@@ 130,27 177,7 @@ index 8c0e3c2..d65576a 100644
if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
-@@ -501,19 +501,6 @@ if(USE_QNNPACK)
- set(QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
- set(QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
- set(QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
-- add_subdirectory(
-- "${QNNPACK_SOURCE_DIR}"
-- "${CONFU_DEPENDENCIES_BINARY_DIR}/QNNPACK")
--
-- # TODO: See https://github.com/pytorch/pytorch/issues/56285
-- if(CMAKE_CXX_COMPILER_ID MATCHES "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-- target_compile_options(qnnpack PRIVATE -Wno-deprecated-declarations)
-- endif()
--
-- # We build static versions of QNNPACK and pthreadpool but link
-- # them into a shared library for Caffe2, so they need PIC.
-- set_property(TARGET qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
-- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
-
- if(QNNPACK_CUSTOM_THREADPOOL)
- target_compile_definitions(
-@@ -562,13 +549,6 @@ if(USE_PYTORCH_QNNPACK)
+@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
@@ 161,10 188,33 @@ index 8c0e3c2..d65576a 100644
- # them into a shared library for Caffe2, so they need PIC.
- set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
- set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
+- # QNNPACK depends on gemmlowp headers
+- target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
+ endif()
- if(PYTORCH_QNNPACK_CUSTOM_THREADPOOL)
- target_compile_definitions(
-@@ -750,11 +730,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
+ list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
+@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
+ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
+ elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
+ add_library(XNNPACK SHARED IMPORTED)
+- add_library(microkernels-prod SHARED IMPORTED)
++ add_library(microkernels-prod INTERFACE IMPORTED)
+ find_library(XNNPACK_LIBRARY XNNPACK)
+- find_library(microkernels-prod_LIBRARY microkernels-prod)
+ set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
+- set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
+- if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
++ set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
++ if(NOT XNNPACK_LIBRARY)
+ message(FATAL_ERROR "Cannot find XNNPACK")
+ endif()
+ message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
+- list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
++ list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
+ endif()
+
+ # ---[ Vulkan deps
+@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
# this shouldn't be necessary anymore.
get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
@@ 176,9 226,9 @@ index 8c0e3c2..d65576a 100644
# We will not need to test benchmark lib itself.
set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
-@@ -829,16 +804,6 @@ if(USE_FBGEMM)
- else()
- set(FBGEMM_LIBRARY_TYPE "static" CACHE STRING "")
+@@ -732,16 +717,6 @@ if(USE_FBGEMM)
+ if(USE_ASAN)
+ set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
endif()
- add_subdirectory("${FBGEMM_SOURCE_DIR}")
- set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
@@ 190,44 240,39 @@ index 8c0e3c2..d65576a 100644
- target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
- target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
- endif()
+ if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ target_compile_options_if_supported(asmjit -Wno-extra-semi)
+ target_compile_options_if_supported(fbgemm -Wno-extra-semi)
+@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
+ "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
endif()
-
- if(USE_FBGEMM)
-@@ -1001,7 +966,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
- "${FP16_SOURCE_DIR}"
- "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
- add_library(fp16 STATIC "/usr/include/fp16.h")
+ add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
endif()
list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
-@@ -1395,7 +1360,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
-
- # Tensorpipe uses cuda_add_library
- torch_update_find_cuda_flags()
+@@ -1170,10 +1145,9 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
+ message(WARNING "Archived TensorPipe forces CMake compatibility mode")
+ set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
+ endif()
- add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
-
- list(APPEND Caffe2_DEPENDENCY_LIBS tensorpipe)
- if(USE_CUDA)
-@@ -1551,7 +1515,6 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
- set_target_properties(onnx_proto PROPERTIES CXX_STANDARD 17)
+ # Suppress warning to unblock libnop comiplation by clang-17
+ # See https://github.com/pytorch/pytorch/issues/151316
+ target_compile_options_if_supported(tensorpipe -Wno-missing-template-arg-list-after-template-kw)
+ if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
+ unset(CMAKE_POLICY_VERSION_MINIMUM)
endif()
- endif()
-- add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/foxi EXCLUDE_FROM_ALL)
-
- add_definitions(-DONNX_NAMESPACE=${ONNX_NAMESPACE})
- if(NOT USE_SYSTEM_ONNX)
-@@ -1582,7 +1545,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
+@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
endif()
set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
- list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+ list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
endif()
- include_directories(${FOXI_INCLUDE_DIRS})
- list(APPEND Caffe2_DEPENDENCY_LIBS foxi_loader)
-@@ -1752,9 +1715,8 @@ if(NOT INTERN_BUILD_MOBILE)
+ # Recover the build shared libs option.
+ set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
+@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
endif()
if(USE_MKLDNN)
include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
@@ 235,10 280,10 @@ index 8c0e3c2..d65576a 100644
+ if(DNNL_FOUND)
set(AT_MKLDNN_ENABLED 1)
- include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
- if(BUILD_CAFFE2_OPS)
- list(APPEND Caffe2_DEPENDENCY_LIBS caffe2::mkldnn)
- endif(BUILD_CAFFE2_OPS)
-@@ -1819,7 +1781,7 @@ endif()
+ else()
+ message(WARNING "MKLDNN could not be found.")
+ caffe2_update_option(USE_MKLDNN OFF)
+@@ -1583,7 +1556,7 @@ endif()
#
set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
@@ 247,7 292,7 @@ index 8c0e3c2..d65576a 100644
# Disable compiler feature checks for `fmt`.
#
-@@ -1828,7 +1790,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
# CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
# `fmt` is compatible with a superset of the compilers that PyTorch is, it
# shouldn't be too bad to just disable the checks.
@@ 256,7 301,7 @@ index 8c0e3c2..d65576a 100644
list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
-index a41343c..6075bdd 100644
+index 8a4a310d6..f413d2e61 100644
--- a/cmake/External/nnpack.cmake
+++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif()
@@ 268,7 313,7 @@ index a41343c..6075bdd 100644
message(STATUS "Brace yourself, we are building NNPACK")
set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
-@@ -114,6 +114,5 @@ endif()
+@@ -94,6 +94,5 @@ endif()
# (4) Catch-all: not supported.
##############################################################################
@@ 278,7 323,7 @@ index a41343c..6075bdd 100644
+set(NNPACK_FOUND TRUE)
+set(USE_NNPACK ON)
diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
-index 50404d3..ca067f0 100644
+index 87935625f..9f8fa3df8 100644
--- a/cmake/public/mkldnn.cmake
+++ b/cmake/public/mkldnn.cmake
@@ -4,7 +4,7 @@ if(CPU_AARCH64)
@@ 290,105 335,93 @@ index 50404d3..ca067f0 100644
if(NOT TARGET caffe2::mkldnn)
add_library(caffe2::mkldnn INTERFACE IMPORTED)
-@@ -15,7 +15,7 @@ set_property(
+@@ -15,4 +15,4 @@ set_property(
${MKLDNN_INCLUDE_DIR})
set_property(
TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
- ${MKLDNN_LIBRARIES})
+ DNNL::dnnl)
- if(BUILD_ONEDNN_GRAPH)
- if(NOT TARGET caffe2::dnnl_graph)
- add_library(caffe2::dnnl_graph INTERFACE IMPORTED)
diff --git a/setup.py b/setup.py
-index 34b2854..5db117f 100644
+index 61ee9363f..3691cc35c 100644
--- a/setup.py
+++ b/setup.py
-@@ -418,13 +418,9 @@ def build_deps():
+@@ -508,13 +508,9 @@ def build_deps():
# Windows has very poor support for them.
sym_files = [
- 'tools/shared/_utils_internal.py',
-- 'torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h',
-- 'torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h',
+ "tools/shared/_utils_internal.py",
+- "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
+- "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
]
orig_files = [
- 'torch/_utils_internal.py',
-- 'third_party/valgrind-headers/callgrind.h',
-- 'third_party/valgrind-headers/valgrind.h',
+ "torch/_utils_internal.py",
+- "third_party/valgrind-headers/callgrind.h",
+- "third_party/valgrind-headers/valgrind.h",
]
for sym_file, orig_file in zip(sym_files, orig_files):
same = False
diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
-index 89c6b91..0c60d08 100644
+index 5b423241d..e069accd6 100644
--- a/test/cpp/c10d/CMakeLists.txt
+++ b/test/cpp/c10d/CMakeLists.txt
-@@ -16,14 +16,14 @@ function(c10d_add_test test_src)
- add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
+@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
+ endif()
endfunction()
--c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main)
--c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main)
-+c10d_add_test(FileStoreTest.cpp torch_cpu gtest_main gtest)
-+c10d_add_test(TCPStoreTest.cpp torch_cpu gtest_main gtest)
- if(INSTALL_TEST)
- install(TARGETS FileStoreTest DESTINATION bin)
- install(TARGETS TCPStoreTest DESTINATION bin)
- endif()
+-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
+-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
++c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
++c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
if(NOT WIN32)
-- c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main)
-+ c10d_add_test(HashStoreTest.cpp torch_cpu gtest_main gtest)
- if(INSTALL_TEST)
- install(TARGETS HashStoreTest DESTINATION bin)
- endif()
-@@ -31,11 +31,11 @@ endif()
+- c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
+ endif()
if(USE_CUDA)
if(USE_GLOO AND USE_C10D_GLOO)
-- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main)
-+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
- if(INSTALL_TEST)
- install(TARGETS ProcessGroupGlooTest DESTINATION bin)
- endif()
-- c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main)
-+ c10d_add_test(ProcessGroupGlooAsyncTest.cpp torch_cpu c10d_cuda_test gtest_main gtest)
+- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
+- c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
++ c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
endif()
if(USE_NCCL AND USE_C10D_NCCL)
# NCCL is a private dependency of libtorch, but the tests include some
-@@ -44,10 +44,10 @@ if(USE_CUDA)
+@@ -45,10 +45,10 @@ if(USE_CUDA)
# a private dependency of the tests as well.
c10d_add_test(
ProcessGroupNCCLTest.cpp
-- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
-+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
c10d_add_test(
ProcessGroupNCCLErrorsTest.cpp
-- torch_cpu c10d_cuda_test gtest_main __caffe2_nccl)
-+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl)
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST)
- install(TARGETS ProcessGroupNCCLTest DESTINATION bin)
- install(TARGETS ProcessGroupNCCLErrorsTest DESTINATION bin)
-@@ -61,7 +61,7 @@ if(USE_CUDA)
+ install(TARGETS c10d_cuda_test DESTINATION lib)
+ endif()
+@@ -60,14 +60,14 @@ if(USE_CUDA)
# a private dependency of the tests as well.
c10d_add_test(
ProcessGroupUCCTest.cpp
-- torch_cpu c10d_cuda_test gtest_main __caffe2_ucc)
-+ torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc)
+- LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
++ LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
if(INSTALL_TEST)
- install(TARGETS ProcessGroupUCCTest DESTINATION bin)
install(TARGETS c10d_cuda_test DESTINATION lib)
-@@ -69,7 +69,7 @@ if(USE_CUDA)
+ endif()
endif()
else()
if(USE_GLOO AND USE_C10D_GLOO)
-- c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main)
-+ c10d_add_test(ProcessGroupGlooTest.cpp torch_cpu gtest_main gtest)
+- c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
++ c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
endif()
endif()
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
-index 7dff706..90b1003 100644
+index 9c409e078..6cddd8de4 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt
-@@ -54,7 +54,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
+@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
# pthreadpool header. For some build environment we need add the dependency
# explicitly.
if(USE_PTHREADPOOL)
@@ 396,4 429,17 @@ index 7dff706..90b1003 100644
+ target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
endif()
if(USE_CUDA)
- target_link_libraries(test_tensorexpr PRIVATE
+ target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
+diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
+index 8b8ebdc6e..034b5e56c 100644
+--- a/torch/CMakeLists.txt
++++ b/torch/CMakeLists.txt
+@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
+ Python::Module
+ pybind::pybind11
+ opentelemetry::api
+- httplib
+- nlohmann
+ shm
+ fmt::fmt-header-only
+ ATEN_CPU_FILES_GEN_LIB)