diff --git a/pytorch-C.patch b/pytorch-C.patch
deleted file mode 100644
index 26cc0296cad8362e5aae8f4bae9e832ac6e57ac5..0000000000000000000000000000000000000000
--- a/pytorch-C.patch
+++ /dev/null
@@ -1,19 +0,0 @@
---- torch/CMakeLists.txt.python	2022-10-11 18:48:56.534889586 +0300
-+++ torch/CMakeLists.txt	2022-10-11 18:52:32.368263631 +0300
-@@ -279,6 +279,16 @@
-     list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL)
- endif()
- 
-+add_library(_C SHARED ${TORCH_SRC_DIR}/csrc/stub.c)
-+target_link_libraries(_C ${PYTHON_LIBRARIES} torch_python)
-+target_compile_definitions(_C PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS})
-+target_compile_options(_C PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS})
-+target_include_directories(_C PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES})
-+set_target_properties(_C PROPERTIES PREFIX "")
-+if (TORCH_PYTHON_LINK_FLAGS)
-+    set_target_properties(_C PROPERTIES LINK_FLAGS "${TORCH_PYTHON_LINK_FLAGS}")
-+endif()
-+
- if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-   # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80947 in EmbeddingBag.cpp
-   set_source_files_properties(${TORCH_SRC_DIR}/csrc/utils/throughput_benchmark.cpp PROPERTIES COMPILE_FLAGS -Wno-attributes)
diff --git a/pytorch-cuda12.patch b/pytorch-cuda12.patch
deleted file mode 100644
index fb0f0386646c4dc0db5231b3669c5f7ab90f7387..0000000000000000000000000000000000000000
--- a/pytorch-cuda12.patch
+++ /dev/null
@@ -1,89 +0,0 @@
-diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu b/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu
-index 22cf38f..9b82341 100644
---- a/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu
-+++ b/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- #include <type_traits>
- 
- #include <ATen/ATen.h>
-diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu
-index 56cac2a..933e4bf 100644
---- a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu
-+++ b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu
-@@ -1,3 +1,7 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
-+
- #include <type_traits>
- 
- #include <ATen/ATen.h>
-diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu
-index 56a4e49..14ac1b1 100644
---- a/aten/src/ATen/native/transformers/cuda/attention.cu
-+++ b/aten/src/ATen/native/transformers/cuda/attention.cu
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- #include <type_traits>
- 
- #include <ATen/ATen.h>
-diff --git a/aten/src/ATen/native/transformers/cuda/attention_backward.cu b/aten/src/ATen/native/transformers/cuda/attention_backward.cu
-index 62d4de2..5ca325c 100644
---- a/aten/src/ATen/native/transformers/cuda/attention_backward.cu
-+++ b/aten/src/ATen/native/transformers/cuda/attention_backward.cu
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- #include <type_traits>
- 
- #include <ATen/ATen.h>
-diff --git a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h
-index e629aaa..5169b2b 100644
---- a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h
-+++ b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- #pragma once
- #include <ATen/ATen.h>
- #include <cmath>
-diff --git a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h
-index 5207daa..319526e 100644
---- a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h
-+++ b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- #include <ATen/ATen.h>
- #include <cmath>
- #include <vector>
-diff --git a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h
-index f2730b6..67562d9 100644
---- a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h
-+++ b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- // Copyright (c) 2022, Tri Dao.
- 
- #pragma once
-diff --git a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h
-index dc98732..23fd90c 100644
---- a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h
-+++ b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h
-@@ -1,3 +1,6 @@
-+#undef __CUDA_NO_HALF_OPERATORS__
-+#undef __CUDA_NO_HALF_CONVERSIONS__
-+#undef __CUDA_NO_HALF2_OPERATORS__
- // Copyright (c) 2022, Tri Dao.
- 
- #pragma once
diff --git a/pytorch-gcc11.patch b/pytorch-gcc11.patch
deleted file mode 100644
index 3ed573d446994f3cc74ad54b4bb1047f90b543f4..0000000000000000000000000000000000000000
--- a/pytorch-gcc11.patch
+++ /dev/null
@@ -1,27 +0,0 @@
---- pytorch/caffe2/operators/conv_pool_op_base.h.orig	2020-12-19 02:12:49.218610548 +0200
-+++ pytorch/caffe2/operators/conv_pool_op_base.h	2020-12-18 20:28:25.278808495 +0200
-@@ -642,6 +642,7 @@
-   }
- 
-   virtual ~ConvPoolOpBase() {}
-+  StorageOrder order_;
- 
-  protected:
-   LegacyPadding legacy_pad_;
-@@ -654,7 +655,6 @@
-   bool float16_compute_;
- 
-   int group_;
--  StorageOrder order_;
-   bool shared_buffer_;
-   Workspace* ws_;
- 
---- pytorch/c10/util/hash.h.orig	2020-12-20 02:32:12.985897928 +0200
-+++ pytorch/c10/util/hash.h	2020-12-20 02:32:27.560746725 +0200
-@@ -2,6 +2,7 @@
- 
- #include <functional>
- #include <vector>
-+#include <cstddef>
- #include <c10/util/complex.h>
- namespace c10 {
diff --git a/pytorch-onednn.patch b/pytorch-onednn.patch
deleted file mode 100644
index 4cda8a40c7bde40bb7ae7d35113925bd109a4b91..0000000000000000000000000000000000000000
--- a/pytorch-onednn.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-diff --git a/aten/src/ATen/Version.cpp b/aten/src/ATen/Version.cpp
-index 0c0ea61c..cff309a9 100644
---- a/aten/src/ATen/Version.cpp
-+++ b/aten/src/ATen/Version.cpp
-@@ -6,7 +6,7 @@
- #endif
- 
- #if AT_MKLDNN_ENABLED()
--#include <mkldnn.hpp>
-+#include <dnnl.hpp>
- #include <ideep.hpp>
- #endif
- 
-@@ -42,12 +42,12 @@ std::string get_mkldnn_version() {
-     // Apparently no way to get ideep version?
-     // https://github.com/intel/ideep/issues/29
-     {
--      const mkldnn_version_t* ver = mkldnn_version();
--      ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch
-+      const dnnl_version_t* ver = dnnl_version();
-+      ss << "Intel(R) ONEDNN v" << ver->major << "." << ver->minor << "." << ver->patch
-          << " (Git Hash " << ver->hash << ")";
-     }
-   #else
--    ss << "MKLDNN not found";
-+    ss << "ONEDNN not found";
-   #endif
-   return ss.str();
- }
diff --git a/pytorch-quant-cpp.patch b/pytorch-quant-cpp.patch
deleted file mode 100644
index e20123fb2d6e1aa91b732e355561193a2d568321..0000000000000000000000000000000000000000
--- a/pytorch-quant-cpp.patch
+++ /dev/null
@@ -1,83 +0,0 @@
-diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp
-index ec1700665..03b1f7b3b 100644
---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp
-+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp
-@@ -238,12 +238,12 @@ class QLinearInt8 final {
- };
- 
- TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) {
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear"),
--      TORCH_FN(QLinearInt8<false>::run));
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear_relu"),
--      TORCH_FN(QLinearInt8<true>::run));
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear"),
-+//      TORCH_FN(QLinearInt8<false>::run));
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear_relu"),
-+//      TORCH_FN(QLinearInt8<true>::run));
- }
- 
- } // namespace
-diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp
-index 8d17b4285..9fde9ab37 100644
---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp
-+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp
-@@ -178,12 +178,12 @@ class QLinearDynamicInt8 final {
- };
- 
- TORCH_LIBRARY_IMPL(sparse, CPU, m) {
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear_dynamic"),
--      TORCH_FN(QLinearDynamicInt8<false>::run));
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear_relu_dynamic"),
--      TORCH_FN(QLinearDynamicInt8<true>::run));
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear_dynamic"),
-+//      TORCH_FN(QLinearDynamicInt8<false>::run));
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear_relu_dynamic"),
-+//      TORCH_FN(QLinearDynamicInt8<true>::run));
- }
- 
- } // namespace
-diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp
-index 7c780806c..9d4b59dc4 100644
---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp
-+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp
-@@ -230,9 +230,9 @@ class QLinearPackWeightInt8 final {
- };
- 
- TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) {
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear_prepack"),
--      TORCH_FN(QLinearPackWeightInt8::run));
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear_prepack"),
-+//      TORCH_FN(QLinearPackWeightInt8::run));
- }
- }  // namespace
- }}  // namespace ao::sparse
-diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp
-index 2c47f3ace..42affc71c 100644
---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp
-+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp
-@@ -68,10 +68,10 @@ class QLinearUnpackWeightInt8 final {
-   }
- };
- 
--TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) {
--  m.impl(
--      TORCH_SELECTIVE_NAME("sparse::qlinear_unpack"),
--      TORCH_FN(QLinearUnpackWeightInt8::run));
--}
-+//TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) {
-+//  m.impl(
-+//      TORCH_SELECTIVE_NAME("sparse::qlinear_unpack"),
-+//      TORCH_FN(QLinearUnpackWeightInt8::run));
-+//}
- }  // namespace
- }}  // namespace ao::sparse
diff --git a/pytorch-xnnpack.patch b/pytorch-xnnpack.patch
deleted file mode 100644
index 95fa4a5bf5a58a63959432da042f5bb7dd37234b..0000000000000000000000000000000000000000
--- a/pytorch-xnnpack.patch
+++ /dev/null
@@ -1,211 +0,0 @@
-diff --git a/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h b/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h
-index fdc2190..3ee69d7 100644
---- a/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h
-+++ b/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h
-@@ -100,6 +100,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
-         op_max,         /* int8_t output_max                    */
-         flags,          /* uint32_t flags                       */
-         nullptr,        /* xnn_caches_t caches                  */
-+        nullptr,        /* xnn_weights_cache                    */
-         op);            /* xnn_operator_t* deconvolution_op_out */
- 
-   }
-@@ -132,6 +133,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
-         op_max,         /* int8_t output_max                  */
-         flags,          /* uint32_t flags                     */
-         nullptr,        /* xnn_caches_t caches                */
-+        nullptr,        /* xnn_weights_cache                    */
-         op);            /* xnn_operator_t* convolution_op_out */
-   } else { /* per_channel */
-     return xnn_create_convolution2d_nhwc_qc8(
-@@ -161,6 +163,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
-         op_max,         /* int8_t output_max                  */
-         flags,          /* uint32_t flags                     */
-         nullptr,        /* xnn_caches_t caches                */
-+        nullptr,        /* xnn_weights_cache                    */
-         op);            /* xnn_operator_t* convolution_op_out */
-   }
- }
-@@ -198,21 +201,23 @@ enum xnn_status xnnp_setup_convolution2d_nhwc(
-   if (!per_channel) {
-     return xnn_setup_convolution2d_nhwc_qs8(
-         op,       /* xnn_operator_t convolution_op */
--        batch,    /* size_t batch_size             */
--        in_h,     /* size_t input_height           */
--        in_w,     /* size_t input_width            */
-+//        batch,    /* size_t batch_size             */
-+//        in_h,     /* size_t input_height           */
-+//        in_w,     /* size_t input_width            */
-         inp,      /* const int8_t* input           */
--        outp,     /* int8_t* output                */
--        pt_pool); /* pthreadpool_t threadpool      */
-+        outp     /* int8_t* output                */
-+//        pt_pool
-+        ); /* pthreadpool_t threadpool      */
-   } else { /* per_channel */
-     return xnn_setup_convolution2d_nhwc_qc8(
-         op,       /* xnn_operator_t convolution_op */
--        batch,    /* size_t batch_size             */
--        in_h,     /* size_t input_height           */
--        in_w,     /* size_t input_width            */
-+//        batch,    /* size_t batch_size             */
-+//        in_h,     /* size_t input_height           */
-+//        in_w,     /* size_t input_width            */
-         inp,      /* const int8_t* input           */
--        outp,     /* int8_t* output                */
--        pt_pool); /* pthreadpool_t threadpool      */
-+        outp     /* int8_t* output                */
-+//        pt_pool
-+        ); /* pthreadpool_t threadpool      */
-   }
- }
- 
-@@ -258,6 +263,7 @@ enum xnn_status xnnp_create_fully_connected_nc(
-       output_max,              /* int8_t output_max                      */
-       flags,                   /* uint32_t flags                         */
-       nullptr,                 /* xnn_caches_t caches                    */
-+      nullptr,                 /* xnn_weights_cache                    */
-       fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */
- }
- 
-diff --git a/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp b/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp
-index 1d1a77a..bbda905 100644
---- a/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp
-+++ b/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp
-@@ -269,14 +269,15 @@ enum xnn_status xnnp_setup_add_nd(
-     pthreadpool_t pt_pool) {
-   return xnn_setup_add_nd_qs8(
-       op,             /* xnn_operator_t add_op      */
--      a_shape.size(), /* size_t num_input1_dims     */
--      a_shape.data(), /* const size_t* input1_shape */
--      b_shape.size(), /* size_t num_input2_dims     */
--      b_shape.data(), /* const size_t* input2_shape */
-+//      a_shape.size(), /* size_t num_input1_dims     */
-+//      a_shape.data(), /* const size_t* input1_shape */
-+//      b_shape.size(), /* size_t num_input2_dims     */
-+//      b_shape.data(), /* const size_t* input2_shape */
-       da,             /* const int8_t* input1       */
-       db,             /* const int8_t* input2       */
--      dc,             /* int8_t* output             */
--      pt_pool);       /* pthreadpool_t threadpool   */
-+      dc             /* int8_t* output             */
-+//      pt_pool
-+      );       /* pthreadpool_t threadpool   */
- }
- 
- template <typename scalar_t, bool ReLUFused = false>
-diff --git a/aten/src/ATen/native/quantized/cpu/qmul.cpp b/aten/src/ATen/native/quantized/cpu/qmul.cpp
-index aa6ad0e..7318be3 100644
---- a/aten/src/ATen/native/quantized/cpu/qmul.cpp
-+++ b/aten/src/ATen/native/quantized/cpu/qmul.cpp
-@@ -142,14 +142,15 @@ Tensor _mul_out_xnnpack(
-   // set up operator
-   status = xnn_setup_multiply_nd_qs8(
-       xnnp_qmul_operator.get(),
--      self_shape.size(),
--      self_shape.data(),
--      other_shape.size(),
--      other_shape.data(),
-+//      self_shape.size(),
-+//      self_shape.data(),
-+//      other_shape.size(),
-+//      other_shape.data(),
-       reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()),
-       reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()),
--      reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>()),
--      caffe2::pthreadpool_());
-+      reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>())
-+//      caffe2::pthreadpool_()
-+      );
- 
-   TORCH_CHECK(
-       status == xnn_status_success,
-diff --git a/aten/src/ATen/native/xnnpack/Activation.cpp b/aten/src/ATen/native/xnnpack/Activation.cpp
-index 664be58..ce39cdb 100644
---- a/aten/src/ATen/native/xnnpack/Activation.cpp
-+++ b/aten/src/ATen/native/xnnpack/Activation.cpp
-@@ -37,10 +37,11 @@ Tensor& hardswish_impl(Tensor& input, Tensor& output) {
- 
-   const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
-     hardswish_op,
--    input.numel(),  // Batch
-+//    input.numel(),  // Batch
-     input.data_ptr<float>(),
--    output.data_ptr<float>(),
--    caffe2::pthreadpool_());  // threadpool
-+    output.data_ptr<float>()
-+//    caffe2::pthreadpool_()
-+    );  // threadpool
- 
-   TORCH_CHECK(
-     xnn_status_success == setup_status,
-diff --git a/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp b/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp
-index 8b20eca..d41f244 100644
---- a/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp
-+++ b/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp
-@@ -82,10 +82,11 @@ Tensor channel_shuffle(
- 
-   const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32(
-       channel_shuffle_op,                                           // operator
--      batch_size,                                                   // batch_size
-+//      batch_size,                                                   // batch_size
-       input_padded_contig_nhwc.data_ptr<float>(),                   // input
--      output_padded_contig_nhwc.data_ptr<float>(),                  // output
--      caffe2::pthreadpool_());                                      // threadpool
-+      output_padded_contig_nhwc.data_ptr<float>()                  // output
-+//      caffe2::pthreadpool_()
-+  );                                      // threadpool
- 
-   TORCH_CHECK(
-       xnn_status_success == setup_status,
-diff --git a/aten/src/ATen/native/xnnpack/Convolution.cpp b/aten/src/ATen/native/xnnpack/Convolution.cpp
-index cf9d180..48a5267 100644
---- a/aten/src/ATen/native/xnnpack/Convolution.cpp
-+++ b/aten/src/ATen/native/xnnpack/Convolution.cpp
-@@ -237,6 +237,7 @@ ContextConv2D create(
-       output_max,                                                     // output_max
-       0u,                                                             // flags
-       nullptr,                                                        // xnn_caches_t
-+      nullptr,                                                        // xnn_weights_cache
-       &convolution_op);                                               // operator
-   } else {
-     for (const auto i : c10::irange(4)) {
-@@ -266,6 +267,7 @@ ContextConv2D create(
-       output_max,                                                     // output_max
-       0u,                                                             // flags
-       nullptr,                                                        // xnn_caches_t
-+      nullptr,                                                        // xnn_weights_cache
-       &convolution_op);                                               // operator
-   }
- 
-@@ -353,12 +355,13 @@ Tensor run(
-   } else {
-     setup_status = xnn_setup_convolution2d_nhwc_f32(
-       context.op.get(),                                      // operator
--      padded_input_nhwc.size(Layout::Activation4D::batch),   // batch_size
--      padded_input_nhwc.size(Layout::Activation4D::height),  // input_height
--      padded_input_nhwc.size(Layout::Activation4D::width),   // input_width
-+//      padded_input_nhwc.size(Layout::Activation4D::batch),   // batch_size
-+//      padded_input_nhwc.size(Layout::Activation4D::height),  // input_height
-+//      padded_input_nhwc.size(Layout::Activation4D::width),   // input_width
-       padded_input_nhwc.data_ptr<float>(),                   // input
--      output.data_ptr<float>(),                              // output
--      caffe2::pthreadpool_());
-+      output.data_ptr<float>()                              // output
-+//      caffe2::pthreadpool_()
-+);
-   }
- 
-   TORCH_CHECK(
-diff --git a/aten/src/ATen/native/xnnpack/Linear.cpp b/aten/src/ATen/native/xnnpack/Linear.cpp
-index 37e3c6e..0bca1ae 100644
---- a/aten/src/ATen/native/xnnpack/Linear.cpp
-+++ b/aten/src/ATen/native/xnnpack/Linear.cpp
-@@ -98,6 +98,7 @@ ContextLinear create(
-       output_max,                                                     // output_max
-       0u,                                                             // flags
-       nullptr,                                                        // xnn_caches_t
-+      nullptr,                                                        // xnn_weights_cache
-       &linear_op);                                                    // operator
- 
-   TORCH_CHECK(
diff --git a/pytorch.spec b/pytorch.spec
index 3fde7793df5b219295d787fd912e84e575000ffe..3104847d3ba08ff6ef47c484a4f4ae2f1d7dbe39 100644
--- a/pytorch.spec
+++ b/pytorch.spec
@@ -1,14 +1,13 @@
-%define anolis_release 2
+%define anolis_release 3
+
 %global vcu_maj 12
 %global vcu_min 1
-
-# features
-%define use_dnnl   0
-%define use_magma  1
-# ext libs
-%define ext_fmt    1
-%define ext_onnx   1
-%define ext_kineto 0 
+%global _lto_cflags %{nil}
+%global __cmake_in_source_build 1
+%undefine _hardened_build
+%undefine _annotated_build
+%undefine _find_debuginfo_dwz_opts
+%undefine _missing_build_ids_terminate_build
 
 Name:           pytorch
 Version:        2.0.1 
@@ -17,85 +16,12 @@ Summary:        PyTorch Neural Network Package
 License:        BSD
 
 URL:            https://pytorch.org
+Source0:        https://github.com/pytorch/pytorch/releases/download/v%{version}/pytorch-v%{version}.tar.gz
 
-Source0:        pytorch-v2.0.1.tar.gz
-
-Patch1:         pytorch-C.patch
-Patch2:         pytorch-gcc11.patch
-Patch3:         pytorch-quant-cpp.patch
-Patch4:         pytorch-xnnpack.patch
-Patch5:         pytorch-cuda12.patch
-
-BuildRequires:  git doxygen python3-devel pybind11-devel
+BuildRequires:  python3-devel cmake gcc-c++
 BuildRequires:  python3-typing-extensions python3-pyyaml python3-setuptools
-BuildRequires:  xnnpack-devel sleef-devel nnpack-devel tbb-devel foxi-devel
-BuildRequires:  cpuinfo-devel psimd-devel qnnpack-devel
-BuildRequires:  mesa-libGLU-devel ocl-icd-devel libuv-devel rdma-core-devel miniz-devel
-BuildRequires:  hiredis-devel snappy-devel openblas-devel libzstd-devel leveldb-devel
-BuildRequires:  lmdb-devel peachpy-python3 python3-pybind11 python3-six python3-numpy
-BuildRequires:  nnpack-devel gmp-devel mpfr-devel eigen3-devel >= 3.3.9
-BuildRequires:  fp16-devel fxdiv-devel zeromq-devel numactl-devel
-BuildRequires:  glog-devel gflags-devel openblas-openmp protobuf-devel protobuf-compiler
-BuildRequires:  pthreadpool-devel opencv-devel fftw-devel flatbuffers-devel /usr/bin/flatc
-
-BuildRequires:  rocksdb-devel
-
-BuildRequires:  asmjit-devel
-
-%ifarch x86_64
-BuildRequires:  fbgemm-devel
-%endif
-
-%define ext_fmt 0
-
-%if %{use_dnnl}
-BuildRequires:  onednn-devel ideep-devel
-%endif
-
-%if %{ext_fmt}
-BuildRequires:  fmt-devel
-%endif
-
-%if %{ext_onnx}
-BuildRequires:  onnx-devel onnx-optimizer-devel
-%endif
-
-%if %{ext_kineto}
-BuildRequires:  kineto-devel
-%endif
-
-%define have_cuda 1
-%define have_tensorrt 0
-%define have_cuda_gcc 0 
-%global toolchain gcc
-
-%define gpu_target_arch "6.0 6.1 7.0 7.5 8.0 8.6"
-
-%global _lto_cflags %{nil}
-%global debug_package %{nil}
-%global __cmake_in_source_build 1
-%undefine _hardened_build
-%undefine _annotated_build
-%undefine _find_debuginfo_dwz_opts
-%undefine _missing_build_ids_terminate_build
-
-%bcond_without cuda
-%if %{without cuda}
-%global have_cuda 0
-%endif
+BuildRequires:  python3-six python3-numpy
 
-%if "%{toolchain}" == "gcc"
-BuildRequires:  gcc-c++
-%else
-BuildRequires:  clang
-%endif
-
-%if %{have_cuda}
-%if %{have_cuda_gcc}
-%if "%{toolchain}" == "gcc"
-BuildRequires:  cuda-gcc-c++
-%endif
-%endif
 BuildRequires:  cuda-nvcc-%{vcu_maj}-%{vcu_min}
 BuildRequires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
 BuildRequires:  cuda-cupti-%{vcu_maj}-%{vcu_min}
@@ -112,9 +38,9 @@ BuildRequires:  libcusolver-devel-%{vcu_maj}-%{vcu_min}
 BuildRequires:  libnvjitlink-devel-%{vcu_maj}-%{vcu_min}
 BuildRequires:  libnccl-devel
 BuildRequires:  libcudnn-devel
-%if %{use_magma}
-BuildRequires:  magma-devel
-%endif
+BuildRequires:  magma-devel numactl-devel
+BuildRequires:  chrpath
+
 Requires:  cuda-cudart-%{vcu_maj}-%{vcu_min}
 Requires:  cuda-nvrtc-%{vcu_maj}-%{vcu_min}
 Requires:  cuda-nvtx-%{vcu_maj}-%{vcu_min}
@@ -124,13 +50,9 @@ Requires:  libcurand-%{vcu_maj}-%{vcu_min}
 Requires:  libcusparse-%{vcu_maj}-%{vcu_min}
 Requires:  libcusolver-%{vcu_maj}-%{vcu_min}
 Requires:  libnvjitlink-%{vcu_maj}-%{vcu_min}
-%endif
-
-BuildRequires:  gloo-devel
 
-%if %{have_tensorrt}
-BuildRequires:  libnvinfer-plugin-devel libnvonnxparsers-devel
-%endif
+Provides:  pytorch-python3 = %{version}-%{release}
+Obsoletes: pytorch-python3 < %{version}-%{release}
 
 %description
 PyTorch is a python package that provides two high-level
@@ -143,422 +65,78 @@ Requires:       %{name} = %{version}-%{release}
 %description    devel
 This package contains development files for pythorch.
 
-%package        python3
-Summary:        Python files for pytorch
-Provides:       python%{python3_version}dist(torch) = %{version}
-Requires:       %{name} = %{version}-%{release}
-
-%description    python3
-This package contains python files for pythorch.
-
-
 %prep
-%setup -n %{name}-v%{version}
-%global _default_patch_fuzz 100
-%patch1 -p0 -b .python~
-# % patch2 -p1 -b .gcc11~
-# % patch3 -p1 -b .cpp~
-%patch4 -p1 -b .xnn~
-%patch5 -p1 -b .cu12~
-
-# python version
-sed -i -e 's|VERSION_LESS 3.10)|VERSION_LESS 3.6)|g' cmake/Dependencies.cmake
-sed -i -e 's|PY_MAJOR_VERSION == 3|PY_MAJOR_VERSION == 3 \&\& PY_MINOR_VERSION > 6|' torch/csrc/dynamo/eval_frame.c
-
-# c++std 17
-sed -i 's|CMAKE_CXX_STANDARD 14|CMAKE_CXX_STANDARD 17|' CMakeLists.txt
-
-# external fbgemm qnnpack gloo
-sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo gloo_cuda |' caffe2/CMakeLists.txt
-# external pybind11
-sed -i -e 's|USE_SYSTEM_BIND11|USE_SYSTEM_PYBIND11|g' cmake/Dependencies.cmake
-
-%if %{use_dnnl}
-# external mkl-dnn
-rm -rf cmake/Modules/FindMKLDNN.cmake
-echo 'set(DNNL_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})' > cmake/public/mkldnn.cmake
-echo 'set(CAFFE2_USE_MKLDNN ON)' >> cmake/public/mkldnn.cmake
-echo 'find_package(DNNL REQUIRED)' >> cmake/public/mkldnn.cmake
-echo 'set(MKLDNN_FOUND ON)' >> cmake/public/mkldnn.cmake
-echo 'add_library(caffe2::mkldnn ALIAS DNNL::dnnl)' >> cmake/public/mkldnn.cmake
-# external dnnl
-sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 dnnl|' caffe2/CMakeLists.txt
-%endif
-
-# external pthreadpool
-rm -rf third_party/pthreadpool/*
-touch third_party/pthreadpool/CMakeLists.txt
-
-# openblas openmp first
-sed -i -e 's|NAMES openblas|NAMES openblaso openblas|' cmake/Modules/FindOpenBLAS.cmake
-
-# use external zstd
-sed -i -e 's|USE_ZSTD|NOT_USE_ZSTD|g' cmake/Dependencies.cmake
-sed -i -e 's|add_subdirectory(zstd)|list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS zstd)|g' caffe2/share/contrib/CMakeLists.txt
-
-# use external onnx
-%if %{ext_onnx}
-sed -i -e 's|Caffe2_DEPENDENCY_LIBS onnx_proto onnx|Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer|' cmake/Dependencies.cmake
-%endif
-
-# external tensorpipe
-mkdir -p third_party/tensorpipe
-echo '' >> third_party/tensorpipe/CMakeLists.txt
-sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt
-
-# external nnpack
-echo '' > cmake/External/nnpack.cmake
-echo 'set(NNPACK_FOUND TRUE)' >> cmake/External/nnpack.cmake
-
-# external cpuinfo
-sed -i '/TARGET cpuinfo PROPERTY/d' cmake/Dependencies.cmake
-
-# external fp16
-sed -i '/APPEND Caffe2_DEPENDENCY_LIBS fp16/d' cmake/Dependencies.cmake
-
-# external qnnpack
-mkdir -p third_party/QNNPACK
-echo '' >> third_party/QNNPACK/CMakeLists.txt
-sed -i '/TARGET qnnpack PROPERTY/d' cmake/Dependencies.cmake
-sed -i -e '/target_compile_options(qnnpack/d' cmake/Dependencies.cmake
-#sed -i 's/QNNPACK_LIBRARY_TYPE \"static\"/QNNPACK_LIBRARY_TYPE \"shared\"/g' cmake/Dependencies.cmake
-
-# external psimd
-mkdir -p third_party/psimd
-echo '' >> third_party/psimd/CMakeLists.txt
-sed -i '/pytorch_qnnpack PRIVATE psimd/d' aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-
-# external fxdiv
-sed -i '/NOT TARGET fxdiv/,/endif/d' caffe2/CMakeLists.txt
-sed -i '/torch_cpu PRIVATE fxdiv/d' caffe2/CMakeLists.txt
-sed -i '/pytorch_qnnpack PRIVATE fxdiv/d' aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt
-
-# external fbgemm
-mkdir -p third_party/fbgemm
-echo '' > third_party/fbgemm/CMakeLists.txt
-sed -i '/(TARGET fbgemm/d' cmake/Dependencies.cmake
-sed -i 's|caffe2_fakelowp_ops fbgemm cpuinfo|caffe2_fakelowp_ops|' caffe2/contrib/fakelowp/CMakeLists.txt
-sed -i 's|caffe2_dnnlowp_avx2_ops fbgemm|caffe2_dnnlowp_avx2_ops|' caffe2/quantization/server/CMakeLists.txt
-
-# external foxi
-mkdir -p third_party/foxi
-echo '' > third_party/foxi/CMakeLists.txt
-
-# external gloo
-#sed -i '/c10d gloo/d' torch/lib/c10d/CMakeLists.txt
-
-# external kineto
-%if %{ext_kineto}
-sed -i '/if(NOT TARGET kineto)/,/endif()/d' cmake/Dependencies.cmake
-sed -i 's|libkineto/include|libkineto/include\n/usr/include/kineto|' torch/CMakeLists.txt
-sed -i 's|libkineto/include|libkineto/include\n/usr/include/kineto|' caffe2/CMakeLists.txt
-%endif
-
-# external fmt
-%if %{ext_fmt}
-sed -i 's|add_subdirectory(.*/fmt)|find_package(fmt REQUIRED)|g' cmake/Dependencies.cmake
-sed -i '/fmt-header-only PROPERTIES/d' cmake/Dependencies.cmake
-%endif
-
-# external miniz
-#sed -i '/miniz.c/d' caffe2/serialize/CMakeLists.txt
-
-# external tbb
-#sed -i '/^if(TBB_BUILD/,/^endif()/d' aten/src/ATen/cpu/tbb/CMakeLists.txt
-
-# external tensorrt
-mkdir -p third_party/onnx-tensorrt
-echo '' > third_party/onnx-tensorrt/CMakeLists.txt
-sed -i '/nvonnxparser_static/d' cmake/Dependencies.cmake
-sed -i 's|onnx_trt_library|nvonnxparser_static|g' cmake/Dependencies.cmake
-
-# flatbuffers
-#rm -rf torch/csrc/jit/serialization/mobile_bytecode_generated.h
-#flatc --cpp --gen-mutable --scoped-enums \
-#      -o torch/csrc/jit/serialization \
-#      -c torch/csrc/jit/serialization/mobile_bytecode.fbs
-#echo '// @generated' >> torch/csrc/jit/serialization/mobile_bytecode_generated.h
-
-# rocksdb shared
-sed -i '/find_package(RocksDB CONFIG)/d' modules/rocksdb/CMakeLists.txt
-sed -i 's|RocksDB::rocksdb|RocksDB::rocksdb-shared|' modules/rocksdb/CMakeLists.txt
-
-# no cmake cuda locals
-mv -f  cmake/Modules_CUDA_fix/FindCUDNN.cmake cmake/Modules
-rm -rf cmake/Modules_CUDA_fix
-find . -type d -name "FindCUDA" -exec rm -rf {} \;
-sed -i -e '/install/{:a;/COMPONENT/bb;N;ba;:b;/Modules_CUDA_fix/d;}' CMakeLists.txt
-
-# disable AVX2
-#sed -i -e 's|AVX2_FOUND|AVX2_NONE_FOUND|g' cmake/Codegen.cmake
-
-# remove export deps
-sed -i '/install(EXPORT Caffe2Targets/,/dev)/d' CMakeLists.txt
-
-# systeminc
-sed -i 's|SYSTEM ||g' c10/CMakeLists.txt
-sed -i 's|SYSTEM ||g' torch/CMakeLists.txt
-sed -i 's|SYSTEM ||g' caffe2/CMakeLists.txt
-sed -i 's|BEFORE SYSTEM ||g' cmake/ProtoBuf.cmake
-sed -i 's|AFTER SYSTEM ||g' cmake/Dependencies.cmake
-sed -i 's|BEFORE SYSTEM ||g' cmake/Dependencies.cmake
-sed -i 's|SYSTEM ||g' cmake/Dependencies.cmake
-
-# gcc13
-sed -i '1i #include <stdexcept>' c10/util/Registry.h
-sed -i '1i #include <cstdint>' c10/core/DispatchKey.h
-sed -i '1i #include <stdexcept>' torch/csrc/jit/runtime/logging.cpp
-sed -i '1i #include <stdexcept>' torch/csrc/lazy/core/multi_wait.cpp
-sed -i '1i #include "stdint.h"' torch/csrc/jit/passes/quantization/quantization_type.h
+%setup -q -n %{name}-v%{version}
 
 
 %build
-mkdir build
-pushd build
-export ONNX_ML=0
-export BUILD_SPLIT_CUDA=ON
-export REL_WITH_DEB_INFO=1
-export TORCH_NVCC_FLAGS="-DCUDA_HAS_FP16"
-export PYTHON_EXECUTABLE="%{__python3}"
-%global build_ldflags %(echo "%{build_ldflags}" -Wl,-lstdc++)
-%global optflags %(echo "%{optflags} -w -fpermissive -Wno-sign-compare -Wno-deprecated-declarations -Wno-nonnull -DEIGEN_HAS_CXX11_MATH=1" | sed 's|-g||')
-# -DUSE_NATIVE_ARCH=ON
-export LDFLAGS="-Wl,-lstdc++"
-export CFLAGS="${CFLAGS} -fPIC"
-export LD_LIBRARY_PATH=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/lib64
-%cmake .. -Wno-dev \
-       -DCMAKE_SKIP_RPATH=ON \
-       -DCMAKE_VERBOSE_MAKEFILE=OFF \
-       -DCMAKE_BUILD_TYPE=Release \
-       -DCMAKE_NO_SYSTEM_FROM_IMPORTED=ON \
-       -DCMAKE_SKIP_RULE_DEPENDENCY=ON \
-       -DCMAKE_SUPPRESS_REGENERATION=ON \
-       -DUSE_CCACHE=OFF \
-       -DHAVE_SOVERSION=ON \
-       -DUSE_NATIVE_ARCH=OFF \
-       -DUSE_DISTRIBUTED=ON \
-       -DBUILD_DOCS=OFF \
-       -DBUILD_PYTHON=ON \
-       -DBUILD_FUNCTORCH=OFF \
-       -DBUILD_CAFFE2=ON \
-       -DBUILD_BINARY=OFF \
-       -DBUILD_BENCHMARK=OFF \
-       -DBUILD_CUSTOM_PROTOBUF=OFF \
-       -DBUILDING_WITH_TORCH_LIBS=ON \
-       -DPYTHON_EXECUTABLE="%{__python3}" \
-       -DPYBIND11_PYTHON_VERSION="%{python3_version}" \
-       -DCAFFE2_LINK_LOCAL_PROTOBUF=OFF \
-       -DONNX_ML=OFF \
-       -DUSE_GLOG=ON \
-       -DUSE_GFLAGS=ON \
-       -DUSE_OPENMP=ON \
-       -DUSE_KINETO=ON \
-       -DUSE_BREAKPAD=OFF \
-%if %{ext_onnx}
-       -DUSE_SYSTEM_ONNX=ON \
-%else
-       -DUSE_SYSTEM_ONNX=OFF \
-%endif
-       -DUSE_SYSTEM_GLOO=ON \
-       -DUSE_SYSTEM_PYBIND11=ON \
-       -DUSE_SYSTEM_EIGEN_INSTALL=ON \
-%if %{have_cuda}
-       -DUSE_CUDA=ON \
-       -DUSE_CUDNN=ON \
-       -DUSE_NVRTC=OFF \
-       -DUSE_CUPTI_SO=ON \
-       -DUSE_FAST_NVCC=ON \
-       -DUSE_SYSTEM_NCCL=ON \
-       -DCMAKE_CUDA_FLAGS="-fPIC" \
-       -DCUDA_PROPAGATE_HOST_FLAGS=OFF \
-       -DTORCH_CUDA_ARCH_LIST=%{gpu_target_arch} \
-       -DCUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-%{vcu_maj}.%{vcu_min}" \
-       -DCMAKE_CUDA_COMPILER="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc" \
-       -DCUDA_NVCC_FLAGS="--compiler-options;-fPIC;-Wno-deprecated-gpu-targets;-allow-unsupported-compiler;--fatbin-options;-compress-all" \
-       -DCMAKE_CUDA_FLAGS="--compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler --fatbin-options -compress-all" \
-       -DNCCL_INCLUDE_DIR="%{_includedir}/nccl" \
-%if %{use_magma}
-       -DUSE_MAGMA=ON \
-%else
-       -DUSE_MAGMA=OFF \
-%endif
-       -DBUILD_SPLIT_CUDA=ON \
-%if %{have_tensorrt}
-       -DUSE_TENSORRT=ON \
-%else
-       -DUSE_TENSORRT=OFF \
-%endif
-%endif
-       -DBLAS="OpenBLAS" \
-       -DUSE_MPI=OFF \
-       -DUSE_OBSERVERS=OFF \
-       -DUSE_ASAN=OFF \
-       -DUSE_ROCM=OFF \
-%if %{use_dnnl}
-       -DUSE_MKLDNN=ON \
-%else
-       -DUSE_MKLDNN=OFF \
-%endif
-%ifarch x86_64
-       -DUSE_FBGEMM=ON \
-%else
-       -DUSE_FBGEMM=OFF \
-%endif
-       -DUSE_NNPACK=ON \
-       -DUSE_QNNPACK=ON \
-       -DUSE_PYTORCH_QNNPACK=ON \
-       -DUSE_SYSTEM_FP16=ON \
-       -DUSE_SYSTEM_PSIMD=ON \
-       -DUSE_SYSTEM_SLEEF=ON \
-       -DUSE_SYSTEM_FXDIV=ON \
-       -DUSE_SYSTEM_XNNPACK=ON \
-       -DUSE_SYSTEM_CPUINFO=ON \
-       -DUSE_SYSTEM_PTHREADPOOL=ON \
-       -DUSE_TENSORPIPE=ON \
-       -DUSE_FAKELOWP=OFF \
-       -DUSE_OPENCL=OFF \
-       -DUSE_GLOO=ON \
-       -DUSE_ZMQ=ON \
-       -DUSE_ZSTD=ON \
-       -DUSE_LMDB=ON \
-       -DUSE_REDIS=ON \
-       -DUSE_LEVELDB=ON \
-       -DUSE_ROCKSDB=OFF \
-       -DUSE_FFMPEG=OFF \
-       -DUSE_OPENCV=ON \
-       -DUSE_METAL=OFF \
-       -DUSE_TBB=OFF \
-       -DUSE_LLVM=OFF \
-       -DATEN_NO_TEST=ON
-
-make %{?_smp_mflags}
-popd
+export BUILD_TEST=False
+export PYTORCH_BUILD_VERSION=%{version}
+export PYTORCH_BUILD_NUMBER=1
+export CUDAARCHS="all"
+export CMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc
+export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6"
+export CFLAGS="${CFLAGS} -Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-restrict"
+export CXXFLAGS=$CFLAGS
 
+python3 setup.py build
 
 %install
-
-#
-# install libraries
-#
-
-pushd build
-export PYTHON_EXECUTABLE="%{__python3}"
-make install DESTDIR=%{buildroot}
-
-mkdir -p %{buildroot}%{_libdir}
-find %{buildroot}/ -name "*.a" -type f -prune -exec rm -rf '{}' '+'
-rm -rf %{buildroot}/usr/lib/python*
-mv -f %{buildroot}/usr/lib/* %{buildroot}%{_libdir}/
-popd
-install -D -pm 755 build/lib/libnnapi_backend.so %{buildroot}/%{_libdir}/
-
-mkdir -p %{buildroot}/%{python3_sitearch}/torch/bin
-install -D -pm 644 build/lib/_C.so %{buildroot}/%{python3_sitearch}/torch/
-#install -D -pm 644 build/lib/_dl.so %{buildroot}/%{python3_sitearch}/torch/
-mkdir -p %{buildroot}/%{_includedir}/THC/
-install -D -pm 644 aten/src/THC/THCDeviceUtils.cuh %{buildroot}/%{_includedir}/THC/
-
-# symlinks
-ln -sf %{_includedir} %{buildroot}/%{python3_sitearch}/torch/include
-ln -sf %{_libdir} %{buildroot}/%{python3_sitearch}/torch/lib
-ln -sf %{_bindir}/torch_shm_manager %{buildroot}/%{python3_sitearch}/torch/bin/torch_shm_manager
-
-#
-# install python bits
-#
-
-# caffe2
-pushd build
-for f in `find . -name '*.py' | grep -v experiments | grep -v third_party | grep -v _test.py | grep -v docs | grep -v  examples`;
-do
-  install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f
-done
-popd
-
-# torch
-for f in `find ./torch/ -name '*.py'`;
-do
-  install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f
-done
-# torchgen
-for f in `find ./torchgen/ -name '*.py'`;
-do
-  install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f
-done
-
-
-# version.py
-cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}')
-echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py
-echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py
-echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py
-echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py
-
-# install path
-mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/nvfuser/nvfuser.so \
-      %{buildroot}/%{_libdir}/
-mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/torch/lib/libnvfuser_codegen.so \
-      %{buildroot}/%{_libdir}/
-
-# remove junk
-rm -rf %{buildroot}/%{_includedir}/clog.h || true
-rm -rf %{buildroot}/%{_builddir}/pytorch/test || true
-rm -rf %{buildroot}/%{_builddir}/pytorch/nvfuser || true
-
-# egg info
-%{python3} setup.py egg_info
-cp -r torch.egg-info %{buildroot}%{python3_sitearch}/
-sed -i 's|[<=>].*||g' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
-sed -i '/triton/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt
-# strip elf
-set +x
-find %{buildroot} -type f -print | LC_ALL=C sort |
-  file -N -f - | sed -n -e 's/^\(.*\):[ \t]*.*ELF.*, not stripped.*/\1/p' |
-  xargs --no-run-if-empty stat -c '%h %D_%i %n' |
-  while read nlinks inum f; do
-      echo "Stripping: $f"
-      strip -s $f
-  done
-set -x
-
+export BUILD_TEST=False
+export PYTORCH_BUILD_VERSION=%{version}
+export PYTORCH_BUILD_NUMBER=1
+export CUDAARCHS="all"
+export CMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc
+export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6"
+export CFLAGS="${CFLAGS} -Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-restrict"
+export CXXFLAGS=$CFLAGS
+
+mkdir %{buildroot}/usr
+python3 setup.py install --prefix %{buildroot}/usr
+
+chrpath -d %{buildroot}/%{python3_sitearch}/torch/lib/*
+chrpath -d %{buildroot}/%{python3_sitearch}/torch/bin/*
+chrpath -d %{buildroot}/%{python3_sitearch}/nvfuser/*.so
+chrpath -d %{buildroot}/%{python3_sitearch}/functorch/*.so
+
+mkdir -p %{buildroot}/etc/ld.so.conf.d
+echo "%{python3_sitearch}/torch/lib" > %{buildroot}/etc/ld.so.conf.d/torch.conf 
+
+%ldconfig_scriptlets
+
+%pretrans -p <lua>
+path = "/usr/lib64/python3.10/site-packages/torch/lib"
+st = posix.stat(path)
+if st and st.type == "link" then
+  os.remove(path)
+end
+
+%pretrans devel -p <lua>
+path = "/usr/lib64/python3.10/site-packages/torch/include"
+st = posix.stat(path)
+if st and st.type == "link" then
+  os.remove(path)
+end
 
 %files
 %doc README.md
 %doc CONTRIBUTING.md
 %license LICENSE
 %{_bindir}/*
-%{_libdir}/libshm.so.*
-%{_libdir}/libc10.so.*
-%{_libdir}/libc10_cuda.so
-%{_libdir}/libtorch.so.*
-%{_libdir}/libtorch_cpu.so.*
-%{_libdir}/libtorch_cuda.so
-%{_libdir}/libtorch_global_deps.so.*
-%{_libdir}/libcaffe2_observers.so.*
-%{_libdir}/libcaffe2_detectron_ops_gpu.so*
-%{_libdir}/libcaffe2_nvrtc.so
-%{_libdir}/libnnapi_backend.so
-%{_libdir}/libshm.so
-%{_libdir}/libc10.so
-%{_libdir}/libtorch.so
-%{_libdir}/libtorch_cpu.so
-%{_libdir}/libtorch_global_deps.so
-%{_libdir}/libcaffe2_observers.so
-%{_libdir}/libtorch_cuda_linalg.so
-%{_libdir}/nvfuser.so
-%{_libdir}/libnvfuser_codegen.so
-
-
-%files devel
-%{_includedir}/*
-%{_datadir}/*
-
-%files python3
 %{python3_sitearch}/*
-%{_libdir}/libtorch_python.so*
+%exclude %{python3_sitearch}/torch/include
+%exclude %{python3_sitearch}/torch/share
+/etc/ld.so.conf.d/torch.conf
 
+%files devel
+%license LICENSE
+%{python3_sitearch}/torch/include
+%{python3_sitearch}/torch/share
 
 %changelog
+* Tue Aug 29 2023 Chunmei Xu <xuchunmei@linux.alibaba.com> - 2.0.1-3
+- reflator spec file
+
 * Tue Jul 18 2023 Chunmei Xu <xuchunmei@linux.alibaba.com> - 2.0.1-2
 - build with cuda support