~ruther/guix-local

81d309de8745605eb76b85e6c445b896c2ba10f3 — Ayan Das 1 year, 1 month ago 785d771
gnu: python-pytorch: Update to 2.7.0.

* gnu/packages/machine-learning.scm (python-pytorch): Update to 2.7.0.
[source]: Add substitution for additional miniz.h include patterns.
[arguments]: Add 'skip-nccl-call' phase to use system NCCL.
* gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch,
gnu/packages/patches/python-pytorch-runpath-2.7.0.patch,
gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch,
gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch: New files.
* gnu/local.mk (dist_patch_DATA): Add them.

Signed-off-by: Ayan Das <bvits@riseup.net>
Signed-off-by: Ludovic Courtès <ludo@gnu.org>
M gnu/local.mk => gnu/local.mk +4 -0
@@ 2149,11 2149,15 @@ dist_patch_DATA =						\
  %D%/packages/patches/python-pyan3-fix-absolute-path-bug.patch \
  %D%/packages/patches/python-pyan3-fix-positional-arguments.patch \
  %D%/packages/patches/python-pytorch-fix-codegen.patch		\
  %D%/packages/patches/python-pytorch-fix-codegen-2.7.0.patch		\
  %D%/packages/patches/python-pytorch-for-r-torch-fix-codegen.patch \
  %D%/packages/patches/python-pytorch-for-r-torch-system-libraries.patch \
  %D%/packages/patches/python-pytorch-runpath.patch		\
  %D%/packages/patches/python-pytorch-runpath-2.7.0.patch		\
  %D%/packages/patches/python-pytorch-system-libraries.patch	\
  %D%/packages/patches/python-pytorch-system-libraries-2.7.0.patch	\
  %D%/packages/patches/python-pytorch-without-kineto.patch	\
  %D%/packages/patches/python-pytorch-without-kineto-2.7.0.patch	\
  %D%/packages/patches/python-robotframework-sshlibrary-rf5-compat.patch \
  %D%/packages/patches/python-unittest2-python3-compat.patch	\
  %D%/packages/patches/python-unittest2-remove-argparse.patch	\

M gnu/packages/machine-learning.scm => gnu/packages/machine-learning.scm +16 -8
@@ 4956,7 4956,7 @@ PyTorch.")
        (base32
         "0hdpkhcjry22fjx2zg2r48v7f4ljrclzj0li2pgk76kvyblfbyvm"))))))

(define %python-pytorch-version "2.5.1")
(define %python-pytorch-version "2.7.0")

(define %python-pytorch-src
  (origin


@@ 4967,14 4967,14 @@ PyTorch.")
    (file-name (git-file-name "python-pytorch" %python-pytorch-version))
    (sha256
     (base32
      "052cvagpmm9y7jspjpcyysx8yc5fhxnjl8rcz6nndis06v8dcj8s"))
    (patches (search-patches "python-pytorch-system-libraries.patch"
                             "python-pytorch-runpath.patch"
                             "python-pytorch-without-kineto.patch"
      "19prdpzx34n8y2q6wx9dn9vyms6zidjvfgh58d28rfcf5z7z5ra5"))
    (patches (search-patches "python-pytorch-system-libraries-2.7.0.patch"
                             "python-pytorch-runpath-2.7.0.patch"
                             "python-pytorch-without-kineto-2.7.0.patch"
                             ;; Some autogeneration scripts depend on the
                             ;; compile PyTorch library. Therefore, we create
                             ;; dummy versions which are regenerated later.
                             "python-pytorch-fix-codegen.patch"))
                             "python-pytorch-fix-codegen-2.7.0.patch"))
    (modules '((guix build utils)))
    (snippet
     '(begin


@@ 5124,8 5124,10 @@ PyTorch.")
          (add-before 'build 'use-system-libraries
            (lambda _
              (substitute* '("caffe2/serialize/crc.cc"
                             "caffe2/serialize/inline_container.cc")
                (("\"miniz\\.h\"") "<miniz/miniz.h>"))
                             "caffe2/serialize/inline_container.cc"
                             "torch/csrc/inductor/aoti_package/model_package_loader.cpp")
                (("\"miniz\\.h\"") "<miniz/miniz.h>")
                (("<miniz\\.h>") "<miniz/miniz.h>"))
              (substitute* "aten/src/ATen/native/vulkan/api/Allocator.h"
                (("<include/vk_mem_alloc.h>")
                 "<vk_mem_alloc.h>"))


@@ 5162,6 5164,12 @@ PyTorch.")
              (substitute* '("requirements.txt" "setup.py")
                (("sympy==1\\.13\\.1")
                 "sympy>=1.13.1"))))
          (add-after 'use-system-libraries 'skip-nccl-call
            (lambda _
              ;; Comment-out `checkout_nccl()` invokation in build_pytorch().
              (substitute* "tools/build_pytorch_libs.py"
                (("^[[:blank:]]*checkout_nccl\\(\\)" all)
                 (string-append "# " all "  # Guix: use system NCCL\n")))))
          ;; PyTorch is still built with AVX2 and AVX-512 support selected at
          ;; runtime, but these dependencies require it (nnpack only for
          ;; x86_64).

A gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch => gnu/packages/patches/python-pytorch-fix-codegen-2.7.0.patch +178 -0
@@ 0,0 1,178 @@
This patch fixes some scripts for generating source files.  For
gen_jit_decompositions.py, gen_mobile_upgraders.py and
gen_jit_shape_functions.py, which depend on the compiled PyTorch library, the
option to generate "dummy" source files is added for the initial build, which
is later corrected.  codegen_external.py is patched to avoid duplicate
functions and add the static keyword as in the existing generated file.

diff --git a/tools/gen_flatbuffers.sh b/tools/gen_flatbuffers.sh
index cc0263dbb..ac34e84b8 100644
--- a/tools/gen_flatbuffers.sh
+++ b/tools/gen_flatbuffers.sh
@@ -1,13 +1,13 @@
 #!/bin/bash
 ROOT=$(pwd)
-FF_LOCATION="$ROOT/third_party/flatbuffers"
-cd "$FF_LOCATION" || exit
-mkdir build
-cd build || exit
-cmake ..
-cmake --build . --target flatc
-mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
-./flatc --cpp --gen-mutable --scoped-enums \
+#FF_LOCATION="$ROOT/third_party/flatbuffers"
+#cd "$FF_LOCATION" || exit
+#mkdir build
+#cd build || exit
+#cmake ..
+#cmake --build . --target flatc
+#mkdir -p "$ROOT/build/torch/csrc/jit/serialization"
+flatc --cpp --gen-mutable --scoped-enums \
      -o "$ROOT/torch/csrc/jit/serialization" \
      -c "$ROOT/torch/csrc/jit/serialization/mobile_bytecode.fbs"
 echo '// @generated' >> "$ROOT/torch/csrc/jit/serialization/mobile_bytecode_generated.h"
diff --git a/torch/csrc/jit/tensorexpr/codegen_external.py b/torch/csrc/jit/tensorexpr/codegen_external.py
index 5dcf1b284..0e20b0c10 100644
--- a/torch/csrc/jit/tensorexpr/codegen_external.py
+++ b/torch/csrc/jit/tensorexpr/codegen_external.py
@@ -21,9 +21,14 @@ def gen_external(native_functions_path, tags_path, external_path):
     native_functions = parse_native_yaml(native_functions_path, tags_path)
     func_decls = []
     func_registrations = []
-    for func in native_functions:
+    done_names = set()
+    for func in native_functions[0]:
         schema = func.func
         name = schema.name.name.base
+        if name in done_names:
+            continue
+        else:
+            done_names.add(name)
         args = schema.arguments
         # Only supports extern calls for functions with out variants
         if not schema.is_out_fn():
@@ -63,7 +68,7 @@ def gen_external(native_functions_path, tags_path, external_path):
 
         # print(tensor_decls, name, arg_names)
         func_decl = f"""\
-void nnc_aten_{name}(
+static void nnc_aten_{name}(
     int64_t bufs_num,
     void** buf_data,
     int64_t* buf_ranks,
diff --git a/torchgen/decompositions/gen_jit_decompositions.py b/torchgen/decompositions/gen_jit_decompositions.py
index b42948045..e1cfc73a5 100644
--- a/torchgen/decompositions/gen_jit_decompositions.py
+++ b/torchgen/decompositions/gen_jit_decompositions.py
@@ -1,8 +1,12 @@
 #!/usr/bin/env python3
 import os
 from pathlib import Path
+import sys
 
-from torch.jit._decompositions import decomposition_table
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+    from torch.jit._decompositions import decomposition_table
+else:
+    decomposition_table = {}
 
 
 # from torchgen.code_template import CodeTemplate
@@ -86,7 +90,7 @@ def write_decomposition_util_file(path: str) -> None:
 
 
 def main() -> None:
-    pytorch_dir = Path(__file__).resolve().parents[3]
+    pytorch_dir = Path(__file__).resolve().parents[2]
     upgrader_path = pytorch_dir / "torch" / "csrc" / "jit" / "runtime"
     write_decomposition_util_file(str(upgrader_path))
 
diff --git a/torchgen/operator_versions/gen_mobile_upgraders.py b/torchgen/operator_versions/gen_mobile_upgraders.py
index 845034cb7..a1c5767c2 100644
--- a/torchgen/operator_versions/gen_mobile_upgraders.py
+++ b/torchgen/operator_versions/gen_mobile_upgraders.py
@@ -6,10 +6,13 @@ import os
 from enum import Enum
 from operator import itemgetter
 from pathlib import Path
+import sys
 from typing import Any
 
-import torch
-from torch.jit.generate_bytecode import generate_upgraders_bytecode
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+    import torch
+    from torch.jit.generate_bytecode import generate_upgraders_bytecode
+
 from torchgen.code_template import CodeTemplate
 from torchgen.operator_versions.gen_mobile_upgraders_constant import (
     MOBILE_UPGRADERS_HEADER_DESCRIPTION,
@@ -263,7 +266,10 @@ def construct_register_size(register_size_from_yaml: int) -> str:
 def construct_version_maps(
     upgrader_bytecode_function_to_index_map: dict[str, Any],
 ) -> str:
-    version_map = torch._C._get_operator_version_map()
+    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+        version_map = torch._C._get_operator_version_map()
+    else:
+        version_map = {}
     sorted_version_map_ = sorted(version_map.items(), key=itemgetter(0))  # type: ignore[no-any-return]
     sorted_version_map = dict(sorted_version_map_)
 
@@ -375,7 +381,10 @@ def sort_upgrader(upgrader_list: list[dict[str, Any]]) -> list[dict[str, Any]]:
 
 
 def main() -> None:
-    upgrader_list = generate_upgraders_bytecode()
+    if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+        upgrader_list = generate_upgraders_bytecode()
+    else:
+        upgrader_list = []
     sorted_upgrader_list = sort_upgrader(upgrader_list)
     for up in sorted_upgrader_list:
         print("after sort upgrader : ", next(iter(up)))
diff --git a/torchgen/shape_functions/gen_jit_shape_functions.py b/torchgen/shape_functions/gen_jit_shape_functions.py
index 56a3d8bf0..ffd0785fd 100644
--- a/torchgen/shape_functions/gen_jit_shape_functions.py
+++ b/torchgen/shape_functions/gen_jit_shape_functions.py
@@ -1,6 +1,7 @@
 #!/usr/bin/env python3
 import os
 import sys
+import importlib
 from importlib.util import module_from_spec, spec_from_file_location
 from itertools import chain
 from pathlib import Path
@@ -18,17 +19,21 @@ you are in the root directory of the Pytorch git repo"""
 if not file_path.exists():
     raise Exception(err_msg)  # noqa: TRY002
 
-spec = spec_from_file_location(module_name, file_path)
-assert spec is not None
-module = module_from_spec(spec)
-sys.modules[module_name] = module
-assert spec.loader is not None
-assert module is not None
-spec.loader.exec_module(module)
-
-bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
-shape_compute_graph_mapping = module.shape_compute_graph_mapping
-
+if len(sys.argv) < 2 or sys.argv[1] != "dummy":
+    spec = importlib.util.spec_from_file_location(module_name, file_path)
+    assert spec is not None
+    module = importlib.util.module_from_spec(spec)
+    sys.modules[module_name] = module
+    assert spec.loader is not None
+    assert module is not None
+    spec.loader.exec_module(module)
+
+    bounded_compute_graph_mapping = module.bounded_compute_graph_mapping
+    shape_compute_graph_mapping = module.shape_compute_graph_mapping
+
+else:
+    bounded_compute_graph_mapping = {}
+    shape_compute_graph_mapping = {}
 
 SHAPE_HEADER = r"""
 /**

A gnu/packages/patches/python-pytorch-runpath-2.7.0.patch => gnu/packages/patches/python-pytorch-runpath-2.7.0.patch +30 -0
@@ 0,0 1,30 @@
Libraries (such as 'libtorch_cpu.so') and executables (such as 'torch_shm_manager')
get installed, quite surprisingly, to 'lib/python3.8/site-packages/{bin,lib}'.
Make sure RUNPATH matches that.

diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index be45936a8..7b19e5359 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -4,7 +4,7 @@ if(APPLE)
   set(CMAKE_MACOSX_RPATH ON)
   set(_rpath_portable_origin "@loader_path")
 else()
-  set(_rpath_portable_origin $ORIGIN)
+  set(_rpath_portable_origin $ORIGIN/../lib)
 endif(APPLE)
 # Use separate rpaths during build and install phases
 set(CMAKE_SKIP_BUILD_RPATH  FALSE)
diff --git a/functorch/CMakeLists.txt b/functorch/CMakeLists.txt
index bdfa4bfe4..2a75e3825 100644
--- a/functorch/CMakeLists.txt
+++ b/functorch/CMakeLists.txt
@@ -26,7 +26,7 @@ target_link_libraries(${PROJECT_NAME} PRIVATE pybind::pybind11)
 
 set_target_properties(${PROJECT_NAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY
       ${CMAKE_BINARY_DIR}/functorch)
-set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "${_rpath_portable_origin}/../torch/lib")
+set_target_properties(${PROJECT_NAME} PROPERTIES INSTALL_RPATH "$ORIGIN/../torch/lib")
 
 # Copy-pasted prefix/suffix logic for Python extensions from
 # https://github.com/pytorch/pytorch/blob/33bb8ae350611760139457b85842b1d7edf9aa11/caffe2/CMakeLists.txt#L1975

A gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch => gnu/packages/patches/python-pytorch-system-libraries-2.7.0.patch +442 -0
@@ 0,0 1,442 @@
Patch build files to also system libraries instead of bundled ones for the
libraries not supported or working only by specifying USE_SYSTEM_LIBS.  This
includes using the clog, cpuinfo, fbgemm, foxi, fp16, fxdiv, googletest,
httlib, ideep, miniz, nnpack, oneapi-dnnl, pocketfft, pthreadpool,
qnnpack-pytorch, tensorpipe, valgrind and xnnpack packages.

diff --git a/aten/src/ATen/CMakeLists.txt b/aten/src/ATen/CMakeLists.txt
index 085af373e..3287429b4 100644
--- a/aten/src/ATen/CMakeLists.txt
+++ b/aten/src/ATen/CMakeLists.txt
@@ -468,9 +468,9 @@ if(AT_NNPACK_ENABLED)
   list(APPEND ATen_CPU_DEPENDENCY_LIBS nnpack) # cpuinfo is added below
 endif()
 
-if(MKLDNN_FOUND)
-  list(APPEND ATen_CPU_DEPENDENCY_LIBS ${MKLDNN_LIBRARIES})
-endif(MKLDNN_FOUND)
+if(USE_MKLDNN)
+  list(APPEND ATen_CPU_DEPENDENCY_LIBS DNNL::dnnl)
+endif(USE_MKLDNN)
 
 if(USE_MKLDNN_ACL)
     list(APPEND ATen_CPU_INCLUDE ${ACL_INCLUDE_DIRS})
diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt
index d2d23b7ab..1a7e5a042 100644
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@@ -91,9 +91,6 @@ if(NOT MSVC AND USE_XNNPACK)
   if(NOT TARGET fxdiv)
     set(FXDIV_BUILD_TESTS OFF CACHE BOOL "")
     set(FXDIV_BUILD_BENCHMARKS OFF CACHE BOOL "")
-    add_subdirectory(
-      "${FXDIV_SOURCE_DIR}"
-      "${CMAKE_BINARY_DIR}/FXdiv")
   endif()
 endif()
 
@@ -1135,7 +1132,6 @@ if(USE_XPU)
 endif()
 
 if(NOT MSVC AND USE_XNNPACK)
-  TARGET_LINK_LIBRARIES(torch_cpu PRIVATE fxdiv)
 endif()
 
 # ==========================================================
@@ -1254,8 +1250,8 @@ endif()
 target_include_directories(torch_cpu PRIVATE
   ${TORCH_ROOT}/third_party/cpp-httplib)
 
-target_include_directories(torch_cpu PRIVATE
-  ${TORCH_ROOT}/third_party/nlohmann/include)
+find_package(httplib REQUIRED)
+target_link_libraries(torch_cpu PUBLIC httplib::httplib)
 
 install(DIRECTORY "${TORCH_SRC_DIR}/csrc"
   DESTINATION ${TORCH_INSTALL_INCLUDE_DIR}/torch
@@ -1494,6 +1490,7 @@ target_link_libraries(torch_cpu PUBLIC c10)
 target_link_libraries(torch_cpu PUBLIC ${Caffe2_PUBLIC_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_LIBS})
 target_link_libraries(torch_cpu PRIVATE ${Caffe2_DEPENDENCY_WHOLE_LINK_LIBS})
+target_link_libraries(torch_cpu PRIVATE miniz clog)
 if(USE_MPI)
   target_link_libraries(torch_cpu PRIVATE MPI::MPI_CXX)
 endif()
@@ -1728,7 +1725,7 @@ if(BUILD_STATIC_RUNTIME_BENCHMARK)
   add_executable(static_runtime_bench "${STATIC_RUNTIME_BENCHMARK_SRCS}")
   add_executable(static_runtime_test "${STATIC_RUNTIME_TEST_SRCS}")
   target_link_libraries(static_runtime_bench torch_library benchmark)
-  target_link_libraries(static_runtime_test torch_library gtest_main)
+  target_link_libraries(static_runtime_test torch_library gtest_main gtest)
 endif()
 
 if(BUILD_MOBILE_BENCHMARK)
@@ -1747,7 +1744,7 @@ if(BUILD_MOBILE_TEST)
   foreach(test_src ${ATen_MOBILE_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} torch_library gtest_main)
+    target_link_libraries(${test_name} torch_library gtest_main gtest)
     target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
     target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
     target_include_directories(${test_name} PRIVATE ${ATen_CPU_INCLUDE})
@@ -1768,7 +1765,7 @@ if(BUILD_TEST)
         if(NOT MSVC)
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}" ../aten/src/ATen/native/quantized/AffineQuantizerBase.cpp)
           # TODO: Get rid of c10 dependency (which is only needed for the implementation of AT_ERROR)
-          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main nlohmann)
+          target_link_libraries(${test_name}_${CPU_CAPABILITY} c10 sleef gtest_main gtest nlohmann)
           if(USE_FBGEMM)
             target_link_libraries(${test_name}_${CPU_CAPABILITY} fbgemm)
           endif()
@@ -1782,7 +1779,7 @@ if(BUILD_TEST)
           endif()
         else()
           add_executable(${test_name}_${CPU_CAPABILITY} "${test_src}")
-          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main)
+          target_link_libraries(${test_name}_${CPU_CAPABILITY} torch_library sleef gtest_main gtest)
         endif()
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<INSTALL_INTERFACE:include>)
         target_include_directories(${test_name}_${CPU_CAPABILITY} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
@@ -1799,7 +1796,7 @@ if(BUILD_TEST)
   foreach(test_src ${Caffe2_CPU_TEST_SRCS})
     get_filename_component(test_name ${test_src} NAME_WE)
     add_executable(${test_name} "${test_src}")
-    target_link_libraries(${test_name} torch_library gtest_main)
+    target_link_libraries(${test_name} torch_library gtest_main gtest)
     if(NOT MSVC)
       target_link_libraries(${test_name} stdc++)
     endif()
@@ -1823,7 +1820,7 @@ if(BUILD_TEST)
       add_executable(${test_name} "${test_src}")
       find_library(metal NAMES Metal)
       find_library(foundation NAMES Foundation)
-      target_link_libraries(${test_name} torch_library gtest_main ${metal} ${foundation})
+      target_link_libraries(${test_name} torch_library gtest_main gtest ${metal} ${foundation})
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
@@ -1843,7 +1840,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_GPU_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
-      target_link_libraries(${test_name} torch_library gtest_main)
+      target_link_libraries(${test_name} torch_library gtest_main gtest)
       if(USE_CUDNN AND ${test_name} MATCHES "cudnn")
         target_link_libraries(${test_name} torch::cudnn)
       endif()
@@ -1865,7 +1862,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_XPU_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
-      target_link_libraries(${test_name} torch_library gtest_main)
+      target_link_libraries(${test_name} torch_library gtest_main gtest)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1880,7 +1877,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_VULKAN_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
-      target_link_libraries(${test_name} torch_library gtest_main)
+      target_link_libraries(${test_name} torch_library gtest_main gtest)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE})
       add_test(NAME ${test_name} COMMAND $<TARGET_FILE:${test_name}>)
@@ -1899,7 +1896,7 @@ if(BUILD_TEST)
     foreach(test_src ${Caffe2_HIP_TEST_SRCS})
       get_filename_component(test_name ${test_src} NAME_WE)
       add_executable(${test_name} "${test_src}")
-      target_link_libraries(${test_name} torch_library gtest_main)
+      target_link_libraries(${test_name} torch_library gtest_main gtest)
       target_include_directories(${test_name} PRIVATE $<INSTALL_INTERFACE:include>)
       target_include_directories(${test_name} PRIVATE ${Caffe2_CPU_INCLUDE} ${Caffe2_HIP_INCLUDE})
       target_compile_options(${test_name} PRIVATE ${HIP_CXX_FLAGS})
diff --git a/caffe2/serialize/CMakeLists.txt b/caffe2/serialize/CMakeLists.txt
index ebbff0f29..dcded2590 100644
--- a/caffe2/serialize/CMakeLists.txt
+++ b/caffe2/serialize/CMakeLists.txt
@@ -2,7 +2,6 @@ file(GLOB tmp *_test.cc)
 
 set(Caffe2_CPU_TEST_SRCS ${Caffe2_CPU_TEST_SRCS} ${tmp})
 list(APPEND Caffe2_CPU_SRCS
-  ${PROJECT_SOURCE_DIR}/third_party/miniz-3.0.2/miniz.c
   ${CMAKE_CURRENT_SOURCE_DIR}/inline_container.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/istream_adapter.cc
   ${CMAKE_CURRENT_SOURCE_DIR}/file_adapter.cc
diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake
index be45936a8..bb1aa1cc1 100644
--- a/cmake/Dependencies.cmake
+++ b/cmake/Dependencies.cmake
@@ -276,7 +276,7 @@ endif()
 # --- [ PocketFFT
 set(AT_POCKETFFT_ENABLED 0)
 if(NOT AT_MKL_ENABLED)
-  set(POCKETFFT_INCLUDE_DIR "${Torch_SOURCE_DIR}/third_party/pocketfft/")
+  set(POCKETFFT_INCLUDE_DIR "#POCKETFFT_INCLUDE_DIR")
   if(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}")
     message(FATAL_ERROR "pocketfft directory not found, expected ${POCKETFFT_INCLUDE_DIR}")
   elif(NOT EXISTS "${POCKETFFT_INCLUDE_DIR}/pocketfft_hdronly.h")
@@ -460,15 +460,6 @@ if(USE_PYTORCH_QNNPACK)
       set(PYTORCH_QNNPACK_BUILD_TESTS OFF CACHE BOOL "")
       set(PYTORCH_QNNPACK_BUILD_BENCHMARKS OFF CACHE BOOL "")
       set(PYTORCH_QNNPACK_LIBRARY_TYPE "static" CACHE STRING "")
-      add_subdirectory(
-        "${PYTORCH_QNNPACK_SOURCE_DIR}"
-        "${CONFU_DEPENDENCIES_BINARY_DIR}/pytorch_qnnpack")
-      # We build static versions of QNNPACK and pthreadpool but link
-      # them into a shared library for Caffe2, so they need PIC.
-      set_property(TARGET pytorch_qnnpack PROPERTY POSITION_INDEPENDENT_CODE ON)
-      set_property(TARGET cpuinfo PROPERTY POSITION_INDEPENDENT_CODE ON)
-      # QNNPACK depends on gemmlowp headers
-      target_include_directories(pytorch_qnnpack PRIVATE "${CAFFE2_THIRD_PARTY_ROOT}/gemmlowp")
     endif()
 
     list(APPEND Caffe2_DEPENDENCY_LIBS pytorch_qnnpack)
@@ -558,16 +549,15 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
   list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
 elseif(NOT TARGET XNNPACK AND USE_SYSTEM_XNNPACK)
   add_library(XNNPACK SHARED IMPORTED)
-  add_library(microkernels-prod SHARED IMPORTED)
+  add_library(microkernels-prod INTERFACE IMPORTED)
   find_library(XNNPACK_LIBRARY XNNPACK)
-  find_library(microkernels-prod_LIBRARY microkernels-prod)
   set_property(TARGET XNNPACK PROPERTY IMPORTED_LOCATION "${XNNPACK_LIBRARY}")
-  set_property(TARGET microkernels-prod PROPERTY IMPORTED_LOCATION "${microkernels-prod_LIBRARY}")
-  if(NOT XNNPACK_LIBRARY or NOT microkernels-prod_LIBRARY)
+  set_property(TARGET microkernels-prod PROPERTY INTERFACE_LINK_LIBRARIES XNNPACK)
+  if(NOT XNNPACK_LIBRARY)
     message(FATAL_ERROR "Cannot find XNNPACK")
   endif()
   message("-- Found XNNPACK: ${XNNPACK_LIBRARY}")
-  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK microkernels-prod)
+  list(APPEND Caffe2_DEPENDENCY_LIBS XNNPACK)
 endif()
 
 # ---[ Vulkan deps
@@ -650,11 +640,6 @@ if(BUILD_TEST OR BUILD_MOBILE_BENCHMARK OR BUILD_MOBILE_TEST)
   # this shouldn't be necessary anymore.
   get_property(INC_DIR_temp DIRECTORY PROPERTY INCLUDE_DIRECTORIES)
   set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES "")
-  add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest)
-  set_property(DIRECTORY PROPERTY INCLUDE_DIRECTORIES ${INC_DIR_temp})
-
-  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googletest/include)
-  include_directories(BEFORE SYSTEM ${CMAKE_CURRENT_LIST_DIR}/../third_party/googletest/googlemock/include)
 
   # We will not need to test benchmark lib itself.
   set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "Disable benchmark testing as we don't need it.")
@@ -732,16 +717,6 @@ if(USE_FBGEMM)
     if(USE_ASAN)
       set(USE_SANITIZER "address,undefined" CACHE STRING "-fsanitize options for FBGEMM")
     endif()
-    add_subdirectory("${FBGEMM_SOURCE_DIR}")
-    set_property(TARGET fbgemm_generic PROPERTY POSITION_INDEPENDENT_CODE ON)
-    set_property(TARGET fbgemm_avx2 PROPERTY POSITION_INDEPENDENT_CODE ON)
-    set_property(TARGET fbgemm_avx512 PROPERTY POSITION_INDEPENDENT_CODE ON)
-    set_property(TARGET fbgemm PROPERTY POSITION_INDEPENDENT_CODE ON)
-    if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 13.0.0)
-      # See https://github.com/pytorch/pytorch/issues/74352
-      target_compile_options_if_supported(asmjit -Wno-deprecated-copy)
-      target_compile_options_if_supported(asmjit -Wno-unused-but-set-variable)
-    endif()
     if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
       target_compile_options_if_supported(asmjit -Wno-extra-semi)
       target_compile_options_if_supported(fbgemm -Wno-extra-semi)
@@ -829,7 +804,7 @@ if(NOT TARGET fp16 AND NOT USE_SYSTEM_FP16)
       "${CONFU_DEPENDENCIES_BINARY_DIR}/FP16")
   endif()
 elseif(NOT TARGET fp16 AND USE_SYSTEM_FP16)
-  add_library(fp16 STATIC "/usr/include/fp16.h")
+  add_library(fp16 STATIC "#FP16_INCLUDE_DIR")
   set_target_properties(fp16 PROPERTIES LINKER_LANGUAGE C)
 endif()
 list(APPEND Caffe2_DEPENDENCY_LIBS fp16)
@@ -1170,7 +1145,6 @@ if(USE_DISTRIBUTED AND USE_TENSORPIPE)
       message(WARNING "Archived TensorPipe forces CMake compatibility mode")
       set(CMAKE_POLICY_VERSION_MINIMUM 3.5)
     endif()
-    add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/tensorpipe)
     if(CMAKE_VERSION VERSION_GREATER_EQUAL "4.0.0")
       unset(CMAKE_POLICY_VERSION_MINIMUM)
     endif()
@@ -1340,7 +1314,7 @@ if(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO AND NOT INTERN_DISABLE_ONNX)
     endif()
     set_property(TARGET onnx_proto PROPERTY IMPORTED_LOCATION ${ONNX_PROTO_LIBRARY})
     message("-- Found onnx: ${ONNX_LIBRARY} ${ONNX_PROTO_LIBRARY}")
-    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx)
+    list(APPEND Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer)
   endif()
   # Recover the build shared libs option.
   set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS})
@@ -1500,9 +1474,8 @@ if(NOT INTERN_BUILD_MOBILE)
   endif()
   if(USE_MKLDNN)
     include(${CMAKE_CURRENT_LIST_DIR}/public/mkldnn.cmake)
-    if(MKLDNN_FOUND)
+    if(DNNL_FOUND)
       set(AT_MKLDNN_ENABLED 1)
-      include_directories(AFTER SYSTEM ${MKLDNN_INCLUDE_DIR})
     else()
       message(WARNING "MKLDNN could not be found.")
       caffe2_update_option(USE_MKLDNN OFF)
@@ -1583,7 +1556,7 @@ endif()
 #
 set(TEMP_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
 set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libs" FORCE)
-add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
+find_package(fmt)
 
 # Disable compiler feature checks for `fmt`.
 #
@@ -1592,7 +1565,6 @@ add_subdirectory(${PROJECT_SOURCE_DIR}/third_party/fmt)
 # CMAKE_CXX_FLAGS in ways that break feature checks. Since we already know
 # `fmt` is compatible with a superset of the compilers that PyTorch is, it
 # shouldn't be too bad to just disable the checks.
-set_target_properties(fmt-header-only PROPERTIES INTERFACE_COMPILE_FEATURES "")
 
 list(APPEND Caffe2_DEPENDENCY_LIBS fmt::fmt-header-only)
 set(BUILD_SHARED_LIBS ${TEMP_BUILD_SHARED_LIBS} CACHE BOOL "Build shared libs" FORCE)
diff --git a/cmake/External/nnpack.cmake b/cmake/External/nnpack.cmake
index 8a4a310d6..f413d2e61 100644
--- a/cmake/External/nnpack.cmake
+++ b/cmake/External/nnpack.cmake
@@ -40,7 +40,7 @@ endif()
 # (3) Android, iOS, Linux, macOS - supported
 ##############################################################################
 
-if(ANDROID OR IOS OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Darwin")
+if(FALSE)
   message(STATUS "Brace yourself, we are building NNPACK")
   set(CAFFE2_THIRD_PARTY_ROOT ${PROJECT_SOURCE_DIR}/third_party)
 
@@ -94,6 +94,5 @@ endif()
 # (4) Catch-all: not supported.
 ##############################################################################
 
-message(WARNING "Unknown platform - I don't know how to build NNPACK. "
-                "See cmake/External/nnpack.cmake for details.")
-set(USE_NNPACK OFF)
+set(NNPACK_FOUND TRUE)
+set(USE_NNPACK ON)
diff --git a/cmake/public/mkldnn.cmake b/cmake/public/mkldnn.cmake
index 87935625f..9f8fa3df8 100644
--- a/cmake/public/mkldnn.cmake
+++ b/cmake/public/mkldnn.cmake
@@ -4,7 +4,7 @@ if(CPU_AARCH64)
   include(${CMAKE_CURRENT_LIST_DIR}/ComputeLibrary.cmake)
 endif()
 
-find_package(MKLDNN QUIET)
+find_package(DNNL REQUIRED)
 
 if(NOT TARGET caffe2::mkldnn)
   add_library(caffe2::mkldnn INTERFACE IMPORTED)
@@ -15,4 +15,4 @@ set_property(
   ${MKLDNN_INCLUDE_DIR})
 set_property(
   TARGET caffe2::mkldnn PROPERTY INTERFACE_LINK_LIBRARIES
-  ${MKLDNN_LIBRARIES})
+  DNNL::dnnl)
diff --git a/setup.py b/setup.py
index 61ee9363f..3691cc35c 100644
--- a/setup.py
+++ b/setup.py
@@ -508,13 +508,9 @@ def build_deps():
     # Windows has very poor support for them.
     sym_files = [
         "tools/shared/_utils_internal.py",
-        "torch/utils/benchmark/utils/valgrind_wrapper/callgrind.h",
-        "torch/utils/benchmark/utils/valgrind_wrapper/valgrind.h",
     ]
     orig_files = [
         "torch/_utils_internal.py",
-        "third_party/valgrind-headers/callgrind.h",
-        "third_party/valgrind-headers/valgrind.h",
     ]
     for sym_file, orig_file in zip(sym_files, orig_files):
         same = False
diff --git a/test/cpp/c10d/CMakeLists.txt b/test/cpp/c10d/CMakeLists.txt
index 5b423241d..e069accd6 100644
--- a/test/cpp/c10d/CMakeLists.txt
+++ b/test/cpp/c10d/CMakeLists.txt
@@ -26,17 +26,17 @@ function(c10d_add_test test_src)
   endif()
 endfunction()
 
-c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
-c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
-c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+c10d_add_test(BackoffTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
+c10d_add_test(FileStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
+c10d_add_test(TCPStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
 if(NOT WIN32)
-  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST ${INSTALL_TEST})
+  c10d_add_test(HashStoreTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
 endif()
 
 if(USE_CUDA)
   if(USE_GLOO AND USE_C10D_GLOO)
-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
-    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main INSTALL_TEST ${INSTALL_TEST})
+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
+    c10d_add_test(ProcessGroupGlooAsyncTest.cpp LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest INSTALL_TEST ${INSTALL_TEST})
   endif()
   if(USE_NCCL AND USE_C10D_NCCL)
     # NCCL is a private dependency of libtorch, but the tests include some
@@ -45,10 +45,10 @@ if(USE_CUDA)
     # a private dependency of the tests as well.
     c10d_add_test(
       ProcessGroupNCCLTest.cpp
-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
     c10d_add_test(
       ProcessGroupNCCLErrorsTest.cpp
-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_nccl INSTALL_TEST ${INSTALL_TEST})
     if(INSTALL_TEST)
       install(TARGETS c10d_cuda_test DESTINATION lib)
     endif()
@@ -60,14 +60,14 @@ if(USE_CUDA)
     # a private dependency of the tests as well.
     c10d_add_test(
       ProcessGroupUCCTest.cpp
-      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
+      LINK_LIBRARIES torch_cpu c10d_cuda_test gtest_main gtest __caffe2_ucc INSTALL_TEST ${INSTALL_TEST})
     if(INSTALL_TEST)
       install(TARGETS c10d_cuda_test DESTINATION lib)
     endif()
   endif()
 else()
   if(USE_GLOO AND USE_C10D_GLOO)
-    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main INSTALL_TEST OFF)
+    c10d_add_test(ProcessGroupGlooTest.cpp LINK_LIBRARIES torch_cpu gtest_main gtest INSTALL_TEST OFF)
   endif()
 endif()
 
diff --git a/test/cpp/tensorexpr/CMakeLists.txt b/test/cpp/tensorexpr/CMakeLists.txt
index 9c409e078..6cddd8de4 100644
--- a/test/cpp/tensorexpr/CMakeLists.txt
+++ b/test/cpp/tensorexpr/CMakeLists.txt
@@ -51,7 +51,7 @@ target_include_directories(tutorial_tensorexpr PRIVATE ${ATen_CPU_INCLUDE})
 # pthreadpool header. For some build environment we need add the dependency
 # explicitly.
 if(USE_PTHREADPOOL)
-  target_link_libraries(test_tensorexpr PRIVATE pthreadpool_interface)
+  target_link_libraries(test_tensorexpr PRIVATE pthreadpool)
 endif()
 if(USE_CUDA)
   target_compile_definitions(test_tensorexpr PRIVATE USE_CUDA)
diff --git a/torch/CMakeLists.txt b/torch/CMakeLists.txt
index 8b8ebdc6e..034b5e56c 100644
--- a/torch/CMakeLists.txt
+++ b/torch/CMakeLists.txt
@@ -82,8 +82,6 @@ set(TORCH_PYTHON_LINK_LIBRARIES
     Python::Module
     pybind::pybind11
     opentelemetry::api
-    httplib
-    nlohmann
     shm
     fmt::fmt-header-only
     ATEN_CPU_FILES_GEN_LIB)

A gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch => gnu/packages/patches/python-pytorch-without-kineto-2.7.0.patch +64 -0
@@ 0,0 1,64 @@
Even when building without Kineto, the <ActivityType.h> header is still
imported and the ActivityType type is used. This patch was copied from
https://github.com/pytorch/pytorch/pull/111048 and adapted.

diff --git a/torch/csrc/profiler/kineto_shim.h b/torch/csrc/profiler/kineto_shim.h
index c4efd7785..2caef1f1e 100644
--- a/torch/csrc/profiler/kineto_shim.h
+++ b/torch/csrc/profiler/kineto_shim.h
@@ -12,7 +12,55 @@
 #undef USE_KINETO
 #endif
 
+#ifdef USE_KINETO
 #include <ActivityType.h>
+#else
+namespace libkineto {
+// copied from header
+/*
+ * Copyright (c) Meta Platforms, Inc. and affiliates.
+ * All rights reserved.
+ *
+ * This source code is licensed under the BSD-style license found in the
+ * LICENSE file in the root directory of this source tree.
+ */
+
+// Note : All activity types are not enabled by default. Please add them
+// at correct position in the enum
+enum class ActivityType {
+    // Activity types enabled by default
+    CPU_OP = 0, // cpu side ops
+    USER_ANNOTATION,
+    GPU_USER_ANNOTATION,
+    GPU_MEMCPY,
+    GPU_MEMSET,
+    CONCURRENT_KERNEL, // on-device kernels
+    EXTERNAL_CORRELATION,
+    CUDA_RUNTIME, // host side cuda runtime events
+    CUDA_DRIVER, // host side cuda driver events
+    CPU_INSTANT_EVENT, // host side point-like events
+    PYTHON_FUNCTION,
+    OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
+
+    // Optional Activity types
+    CUDA_SYNC, // synchronization events between runtime and kernels
+    GLOW_RUNTIME, // host side glow runtime events
+    MTIA_RUNTIME, // host side MTIA runtime events
+    CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
+    MTIA_CCP_EVENTS, // MTIA ondevice CCP events
+    HPU_OP, // HPU host side runtime event
+    XPU_RUNTIME, // host side xpu runtime events
+    MTIA_WORKLOADD,
+
+    PRIVATEUSE1_RUNTIME,
+    PRIVATEUSE1_DRIVER,
+
+    ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
+    OPTIONAL_ACTIVITY_TYPE_START = CUDA_SYNC,
+};
+}
+
+#endif
 
 #include <torch/csrc/Export.h>
 #include <torch/csrc/profiler/api.h>