diff --git a/pytorch-C.patch b/pytorch-C.patch deleted file mode 100644 index 26cc0296cad8362e5aae8f4bae9e832ac6e57ac5..0000000000000000000000000000000000000000 --- a/pytorch-C.patch +++ /dev/null @@ -1,19 +0,0 @@ ---- torch/CMakeLists.txt.python 2022-10-11 18:48:56.534889586 +0300 -+++ torch/CMakeLists.txt 2022-10-11 18:52:32.368263631 +0300 -@@ -279,6 +279,16 @@ - list(APPEND TORCH_PYTHON_COMPILE_DEFINITIONS USE_NCCL) - endif() - -+add_library(_C SHARED ${TORCH_SRC_DIR}/csrc/stub.c) -+target_link_libraries(_C ${PYTHON_LIBRARIES} torch_python) -+target_compile_definitions(_C PRIVATE ${TORCH_PYTHON_COMPILE_DEFINITIONS}) -+target_compile_options(_C PRIVATE ${TORCH_PYTHON_COMPILE_OPTIONS}) -+target_include_directories(_C PUBLIC ${TORCH_PYTHON_INCLUDE_DIRECTORIES}) -+set_target_properties(_C PROPERTIES PREFIX "") -+if (TORCH_PYTHON_LINK_FLAGS) -+ set_target_properties(_C PROPERTIES LINK_FLAGS "${TORCH_PYTHON_LINK_FLAGS}") -+endif() -+ - if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU") - # Workaround for https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80947 in EmbeddingBag.cpp - set_source_files_properties(${TORCH_SRC_DIR}/csrc/utils/throughput_benchmark.cpp PROPERTIES COMPILE_FLAGS -Wno-attributes) diff --git a/pytorch-cuda12.patch b/pytorch-cuda12.patch deleted file mode 100644 index fb0f0386646c4dc0db5231b3669c5f7ab90f7387..0000000000000000000000000000000000000000 --- a/pytorch-cuda12.patch +++ /dev/null @@ -1,89 +0,0 @@ -diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu b/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu -index 22cf38f..9b82341 100644 ---- a/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu -+++ b/aten/src/ATen/native/nested/cuda/NestedTensorMatmul.cu -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - #include - - #include -diff --git a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu -index 56cac2a..933e4bf 100644 ---- a/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu -+++ b/aten/src/ATen/native/nested/cuda/NestedTensorTransformerFunctions.cu -@@ -1,3 +1,7 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ -+ - #include - - #include -diff --git a/aten/src/ATen/native/transformers/cuda/attention.cu b/aten/src/ATen/native/transformers/cuda/attention.cu -index 56a4e49..14ac1b1 100644 ---- a/aten/src/ATen/native/transformers/cuda/attention.cu -+++ b/aten/src/ATen/native/transformers/cuda/attention.cu -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - #include - - #include -diff --git a/aten/src/ATen/native/transformers/cuda/attention_backward.cu b/aten/src/ATen/native/transformers/cuda/attention_backward.cu -index 62d4de2..5ca325c 100644 ---- a/aten/src/ATen/native/transformers/cuda/attention_backward.cu -+++ b/aten/src/ATen/native/transformers/cuda/attention_backward.cu -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - #include - - #include -diff --git a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h -index e629aaa..5169b2b 100644 ---- a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h -+++ b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_backward.h -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - #pragma once - #include - #include -diff --git a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h -index 5207daa..319526e 100644 ---- a/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h -+++ b/aten/src/ATen/native/transformers/cuda/mem_eff_attention/kernel_forward.h -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - #include - #include - #include -diff --git a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h -index f2730b6..67562d9 100644 ---- a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h -+++ b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_bwd_launch_template.h -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - // Copyright (c) 2022, Tri Dao. - - #pragma once -diff --git a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h -index dc98732..23fd90c 100644 ---- a/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h -+++ b/aten/src/ATen/native/transformers/cuda/flash_attn/fmha_fwd_launch_template.h -@@ -1,3 +1,6 @@ -+#undef __CUDA_NO_HALF_OPERATORS__ -+#undef __CUDA_NO_HALF_CONVERSIONS__ -+#undef __CUDA_NO_HALF2_OPERATORS__ - // Copyright (c) 2022, Tri Dao. - - #pragma once diff --git a/pytorch-gcc11.patch b/pytorch-gcc11.patch deleted file mode 100644 index 3ed573d446994f3cc74ad54b4bb1047f90b543f4..0000000000000000000000000000000000000000 --- a/pytorch-gcc11.patch +++ /dev/null @@ -1,27 +0,0 @@ ---- pytorch/caffe2/operators/conv_pool_op_base.h.orig 2020-12-19 02:12:49.218610548 +0200 -+++ pytorch/caffe2/operators/conv_pool_op_base.h 2020-12-18 20:28:25.278808495 +0200 -@@ -642,6 +642,7 @@ - } - - virtual ~ConvPoolOpBase() {} -+ StorageOrder order_; - - protected: - LegacyPadding legacy_pad_; -@@ -654,7 +655,6 @@ - bool float16_compute_; - - int group_; -- StorageOrder order_; - bool shared_buffer_; - Workspace* ws_; - ---- pytorch/c10/util/hash.h.orig 2020-12-20 02:32:12.985897928 +0200 -+++ pytorch/c10/util/hash.h 2020-12-20 02:32:27.560746725 +0200 -@@ -2,6 +2,7 @@ - - #include - #include -+#include - #include - namespace c10 { diff --git a/pytorch-onednn.patch b/pytorch-onednn.patch deleted file mode 100644 index 4cda8a40c7bde40bb7ae7d35113925bd109a4b91..0000000000000000000000000000000000000000 --- a/pytorch-onednn.patch +++ /dev/null @@ -1,29 +0,0 @@ -diff --git a/aten/src/ATen/Version.cpp b/aten/src/ATen/Version.cpp -index 0c0ea61c..cff309a9 100644 ---- a/aten/src/ATen/Version.cpp -+++ b/aten/src/ATen/Version.cpp -@@ -6,7 +6,7 @@ - #endif - - #if AT_MKLDNN_ENABLED() --#include -+#include - #include - #endif - -@@ -42,12 +42,12 @@ std::string get_mkldnn_version() { - // Apparently no way to get ideep version? - // https://github.com/intel/ideep/issues/29 - { -- const mkldnn_version_t* ver = mkldnn_version(); -- ss << "Intel(R) MKL-DNN v" << ver->major << "." << ver->minor << "." << ver->patch -+ const dnnl_version_t* ver = dnnl_version(); -+ ss << "Intel(R) ONEDNN v" << ver->major << "." << ver->minor << "." << ver->patch - << " (Git Hash " << ver->hash << ")"; - } - #else -- ss << "MKLDNN not found"; -+ ss << "ONEDNN not found"; - #endif - return ss.str(); - } diff --git a/pytorch-quant-cpp.patch b/pytorch-quant-cpp.patch deleted file mode 100644 index e20123fb2d6e1aa91b732e355561193a2d568321..0000000000000000000000000000000000000000 --- a/pytorch-quant-cpp.patch +++ /dev/null @@ -1,83 +0,0 @@ -diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp -index ec1700665..03b1f7b3b 100644 ---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp -+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear.cpp -@@ -238,12 +238,12 @@ class QLinearInt8 final { - }; - - TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) { -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear"), -- TORCH_FN(QLinearInt8::run)); -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear_relu"), -- TORCH_FN(QLinearInt8::run)); -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear"), -+// TORCH_FN(QLinearInt8::run)); -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear_relu"), -+// TORCH_FN(QLinearInt8::run)); - } - - } // namespace -diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp -index 8d17b4285..9fde9ab37 100644 ---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp -+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_dynamic.cpp -@@ -178,12 +178,12 @@ class QLinearDynamicInt8 final { - }; - - TORCH_LIBRARY_IMPL(sparse, CPU, m) { -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear_dynamic"), -- TORCH_FN(QLinearDynamicInt8::run)); -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear_relu_dynamic"), -- TORCH_FN(QLinearDynamicInt8::run)); -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear_dynamic"), -+// TORCH_FN(QLinearDynamicInt8::run)); -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear_relu_dynamic"), -+// TORCH_FN(QLinearDynamicInt8::run)); - } - - } // namespace -diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp -index 7c780806c..9d4b59dc4 100644 ---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp -+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_prepack.cpp -@@ -230,9 +230,9 @@ class QLinearPackWeightInt8 final { - }; - - TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) { -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear_prepack"), -- TORCH_FN(QLinearPackWeightInt8::run)); -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear_prepack"), -+// TORCH_FN(QLinearPackWeightInt8::run)); - } - } // namespace - }} // namespace ao::sparse -diff --git a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp -index 2c47f3ace..42affc71c 100644 ---- a/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp -+++ b/aten/src/ATen/native/ao_sparse/quantized/cpu/qlinear_unpack.cpp -@@ -68,10 +68,10 @@ class QLinearUnpackWeightInt8 final { - } - }; - --TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) { -- m.impl( -- TORCH_SELECTIVE_NAME("sparse::qlinear_unpack"), -- TORCH_FN(QLinearUnpackWeightInt8::run)); --} -+//TORCH_LIBRARY_IMPL(sparse, QuantizedCPU, m) { -+// m.impl( -+// TORCH_SELECTIVE_NAME("sparse::qlinear_unpack"), -+// TORCH_FN(QLinearUnpackWeightInt8::run)); -+//} - } // namespace - }} // namespace ao::sparse diff --git a/pytorch-xnnpack.patch b/pytorch-xnnpack.patch deleted file mode 100644 index 95fa4a5bf5a58a63959432da042f5bb7dd37234b..0000000000000000000000000000000000000000 --- a/pytorch-xnnpack.patch +++ /dev/null @@ -1,211 +0,0 @@ -diff --git a/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h b/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h -index fdc2190..3ee69d7 100644 ---- a/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h -+++ b/aten/src/ATen/native/quantized/cpu/XnnpackUtils.h -@@ -100,6 +100,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc( - op_max, /* int8_t output_max */ - flags, /* uint32_t flags */ - nullptr, /* xnn_caches_t caches */ -+ nullptr, /* xnn_weights_cache */ - op); /* xnn_operator_t* deconvolution_op_out */ - - } -@@ -132,6 +133,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc( - op_max, /* int8_t output_max */ - flags, /* uint32_t flags */ - nullptr, /* xnn_caches_t caches */ -+ nullptr, /* xnn_weights_cache */ - op); /* xnn_operator_t* convolution_op_out */ - } else { /* per_channel */ - return xnn_create_convolution2d_nhwc_qc8( -@@ -161,6 +163,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc( - op_max, /* int8_t output_max */ - flags, /* uint32_t flags */ - nullptr, /* xnn_caches_t caches */ -+ nullptr, /* xnn_weights_cache */ - op); /* xnn_operator_t* convolution_op_out */ - } - } -@@ -198,21 +201,23 @@ enum xnn_status xnnp_setup_convolution2d_nhwc( - if (!per_channel) { - return xnn_setup_convolution2d_nhwc_qs8( - op, /* xnn_operator_t convolution_op */ -- batch, /* size_t batch_size */ -- in_h, /* size_t input_height */ -- in_w, /* size_t input_width */ -+// batch, /* size_t batch_size */ -+// in_h, /* size_t input_height */ -+// in_w, /* size_t input_width */ - inp, /* const int8_t* input */ -- outp, /* int8_t* output */ -- pt_pool); /* pthreadpool_t threadpool */ -+ outp /* int8_t* output */ -+// pt_pool -+ ); /* pthreadpool_t threadpool */ - } else { /* per_channel */ - return xnn_setup_convolution2d_nhwc_qc8( - op, /* xnn_operator_t convolution_op */ -- batch, /* size_t batch_size */ -- in_h, /* size_t input_height */ -- in_w, /* size_t input_width */ -+// batch, /* size_t batch_size */ -+// in_h, /* size_t input_height */ -+// in_w, /* size_t input_width */ - inp, /* const int8_t* input */ -- outp, /* int8_t* output */ -- pt_pool); /* pthreadpool_t threadpool */ -+ outp /* int8_t* output */ -+// pt_pool -+ ); /* pthreadpool_t threadpool */ - } - } - -@@ -258,6 +263,7 @@ enum xnn_status xnnp_create_fully_connected_nc( - output_max, /* int8_t output_max */ - flags, /* uint32_t flags */ - nullptr, /* xnn_caches_t caches */ -+ nullptr, /* xnn_weights_cache */ - fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */ - } - -diff --git a/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp b/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp -index 1d1a77a..bbda905 100644 ---- a/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp -+++ b/aten/src/ATen/native/quantized/cpu/BinaryOps.cpp -@@ -269,14 +269,15 @@ enum xnn_status xnnp_setup_add_nd( - pthreadpool_t pt_pool) { - return xnn_setup_add_nd_qs8( - op, /* xnn_operator_t add_op */ -- a_shape.size(), /* size_t num_input1_dims */ -- a_shape.data(), /* const size_t* input1_shape */ -- b_shape.size(), /* size_t num_input2_dims */ -- b_shape.data(), /* const size_t* input2_shape */ -+// a_shape.size(), /* size_t num_input1_dims */ -+// a_shape.data(), /* const size_t* input1_shape */ -+// b_shape.size(), /* size_t num_input2_dims */ -+// b_shape.data(), /* const size_t* input2_shape */ - da, /* const int8_t* input1 */ - db, /* const int8_t* input2 */ -- dc, /* int8_t* output */ -- pt_pool); /* pthreadpool_t threadpool */ -+ dc /* int8_t* output */ -+// pt_pool -+ ); /* pthreadpool_t threadpool */ - } - - template -diff --git a/aten/src/ATen/native/quantized/cpu/qmul.cpp b/aten/src/ATen/native/quantized/cpu/qmul.cpp -index aa6ad0e..7318be3 100644 ---- a/aten/src/ATen/native/quantized/cpu/qmul.cpp -+++ b/aten/src/ATen/native/quantized/cpu/qmul.cpp -@@ -142,14 +142,15 @@ Tensor _mul_out_xnnpack( - // set up operator - status = xnn_setup_multiply_nd_qs8( - xnnp_qmul_operator.get(), -- self_shape.size(), -- self_shape.data(), -- other_shape.size(), -- other_shape.data(), -+// self_shape.size(), -+// self_shape.data(), -+// other_shape.size(), -+// other_shape.data(), - reinterpret_cast(self_contig.data_ptr()), - reinterpret_cast(other_contig.data_ptr()), -- reinterpret_cast(out.data_ptr()), -- caffe2::pthreadpool_()); -+ reinterpret_cast(out.data_ptr()) -+// caffe2::pthreadpool_() -+ ); - - TORCH_CHECK( - status == xnn_status_success, -diff --git a/aten/src/ATen/native/xnnpack/Activation.cpp b/aten/src/ATen/native/xnnpack/Activation.cpp -index 664be58..ce39cdb 100644 ---- a/aten/src/ATen/native/xnnpack/Activation.cpp -+++ b/aten/src/ATen/native/xnnpack/Activation.cpp -@@ -37,10 +37,11 @@ Tensor& hardswish_impl(Tensor& input, Tensor& output) { - - const xnn_status setup_status = xnn_setup_hardswish_nc_f32( - hardswish_op, -- input.numel(), // Batch -+// input.numel(), // Batch - input.data_ptr(), -- output.data_ptr(), -- caffe2::pthreadpool_()); // threadpool -+ output.data_ptr() -+// caffe2::pthreadpool_() -+ ); // threadpool - - TORCH_CHECK( - xnn_status_success == setup_status, -diff --git a/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp b/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp -index 8b20eca..d41f244 100644 ---- a/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp -+++ b/aten/src/ATen/native/xnnpack/ChannelShuffle.cpp -@@ -82,10 +82,11 @@ Tensor channel_shuffle( - - const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32( - channel_shuffle_op, // operator -- batch_size, // batch_size -+// batch_size, // batch_size - input_padded_contig_nhwc.data_ptr(), // input -- output_padded_contig_nhwc.data_ptr(), // output -- caffe2::pthreadpool_()); // threadpool -+ output_padded_contig_nhwc.data_ptr() // output -+// caffe2::pthreadpool_() -+ ); // threadpool - - TORCH_CHECK( - xnn_status_success == setup_status, -diff --git a/aten/src/ATen/native/xnnpack/Convolution.cpp b/aten/src/ATen/native/xnnpack/Convolution.cpp -index cf9d180..48a5267 100644 ---- a/aten/src/ATen/native/xnnpack/Convolution.cpp -+++ b/aten/src/ATen/native/xnnpack/Convolution.cpp -@@ -237,6 +237,7 @@ ContextConv2D create( - output_max, // output_max - 0u, // flags - nullptr, // xnn_caches_t -+ nullptr, // xnn_weights_cache - &convolution_op); // operator - } else { - for (const auto i : c10::irange(4)) { -@@ -266,6 +267,7 @@ ContextConv2D create( - output_max, // output_max - 0u, // flags - nullptr, // xnn_caches_t -+ nullptr, // xnn_weights_cache - &convolution_op); // operator - } - -@@ -353,12 +355,13 @@ Tensor run( - } else { - setup_status = xnn_setup_convolution2d_nhwc_f32( - context.op.get(), // operator -- padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size -- padded_input_nhwc.size(Layout::Activation4D::height), // input_height -- padded_input_nhwc.size(Layout::Activation4D::width), // input_width -+// padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size -+// padded_input_nhwc.size(Layout::Activation4D::height), // input_height -+// padded_input_nhwc.size(Layout::Activation4D::width), // input_width - padded_input_nhwc.data_ptr(), // input -- output.data_ptr(), // output -- caffe2::pthreadpool_()); -+ output.data_ptr() // output -+// caffe2::pthreadpool_() -+); - } - - TORCH_CHECK( -diff --git a/aten/src/ATen/native/xnnpack/Linear.cpp b/aten/src/ATen/native/xnnpack/Linear.cpp -index 37e3c6e..0bca1ae 100644 ---- a/aten/src/ATen/native/xnnpack/Linear.cpp -+++ b/aten/src/ATen/native/xnnpack/Linear.cpp -@@ -98,6 +98,7 @@ ContextLinear create( - output_max, // output_max - 0u, // flags - nullptr, // xnn_caches_t -+ nullptr, // xnn_weights_cache - &linear_op); // operator - - TORCH_CHECK( diff --git a/pytorch.spec b/pytorch.spec index 3fde7793df5b219295d787fd912e84e575000ffe..3104847d3ba08ff6ef47c484a4f4ae2f1d7dbe39 100644 --- a/pytorch.spec +++ b/pytorch.spec @@ -1,14 +1,13 @@ -%define anolis_release 2 +%define anolis_release 3 + %global vcu_maj 12 %global vcu_min 1 - -# features -%define use_dnnl 0 -%define use_magma 1 -# ext libs -%define ext_fmt 1 -%define ext_onnx 1 -%define ext_kineto 0 +%global _lto_cflags %{nil} +%global __cmake_in_source_build 1 +%undefine _hardened_build +%undefine _annotated_build +%undefine _find_debuginfo_dwz_opts +%undefine _missing_build_ids_terminate_build Name: pytorch Version: 2.0.1 @@ -17,85 +16,12 @@ Summary: PyTorch Neural Network Package License: BSD URL: https://pytorch.org +Source0: https://github.com/pytorch/pytorch/releases/download/v%{version}/pytorch-v%{version}.tar.gz -Source0: pytorch-v2.0.1.tar.gz - -Patch1: pytorch-C.patch -Patch2: pytorch-gcc11.patch -Patch3: pytorch-quant-cpp.patch -Patch4: pytorch-xnnpack.patch -Patch5: pytorch-cuda12.patch - -BuildRequires: git doxygen python3-devel pybind11-devel +BuildRequires: python3-devel cmake gcc-c++ BuildRequires: python3-typing-extensions python3-pyyaml python3-setuptools -BuildRequires: xnnpack-devel sleef-devel nnpack-devel tbb-devel foxi-devel -BuildRequires: cpuinfo-devel psimd-devel qnnpack-devel -BuildRequires: mesa-libGLU-devel ocl-icd-devel libuv-devel rdma-core-devel miniz-devel -BuildRequires: hiredis-devel snappy-devel openblas-devel libzstd-devel leveldb-devel -BuildRequires: lmdb-devel peachpy-python3 python3-pybind11 python3-six python3-numpy -BuildRequires: nnpack-devel gmp-devel mpfr-devel eigen3-devel >= 3.3.9 -BuildRequires: fp16-devel fxdiv-devel zeromq-devel numactl-devel -BuildRequires: glog-devel gflags-devel openblas-openmp protobuf-devel protobuf-compiler -BuildRequires: pthreadpool-devel opencv-devel fftw-devel flatbuffers-devel /usr/bin/flatc - -BuildRequires: rocksdb-devel - -BuildRequires: asmjit-devel - -%ifarch x86_64 -BuildRequires: fbgemm-devel -%endif - -%define ext_fmt 0 - -%if %{use_dnnl} -BuildRequires: onednn-devel ideep-devel -%endif - -%if %{ext_fmt} -BuildRequires: fmt-devel -%endif - -%if %{ext_onnx} -BuildRequires: onnx-devel onnx-optimizer-devel -%endif - -%if %{ext_kineto} -BuildRequires: kineto-devel -%endif - -%define have_cuda 1 -%define have_tensorrt 0 -%define have_cuda_gcc 0 -%global toolchain gcc - -%define gpu_target_arch "6.0 6.1 7.0 7.5 8.0 8.6" - -%global _lto_cflags %{nil} -%global debug_package %{nil} -%global __cmake_in_source_build 1 -%undefine _hardened_build -%undefine _annotated_build -%undefine _find_debuginfo_dwz_opts -%undefine _missing_build_ids_terminate_build - -%bcond_without cuda -%if %{without cuda} -%global have_cuda 0 -%endif +BuildRequires: python3-six python3-numpy -%if "%{toolchain}" == "gcc" -BuildRequires: gcc-c++ -%else -BuildRequires: clang -%endif - -%if %{have_cuda} -%if %{have_cuda_gcc} -%if "%{toolchain}" == "gcc" -BuildRequires: cuda-gcc-c++ -%endif -%endif BuildRequires: cuda-nvcc-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-nvtx-%{vcu_maj}-%{vcu_min} BuildRequires: cuda-cupti-%{vcu_maj}-%{vcu_min} @@ -112,9 +38,9 @@ BuildRequires: libcusolver-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libnvjitlink-devel-%{vcu_maj}-%{vcu_min} BuildRequires: libnccl-devel BuildRequires: libcudnn-devel -%if %{use_magma} -BuildRequires: magma-devel -%endif +BuildRequires: magma-devel numactl-devel +BuildRequires: chrpath + Requires: cuda-cudart-%{vcu_maj}-%{vcu_min} Requires: cuda-nvrtc-%{vcu_maj}-%{vcu_min} Requires: cuda-nvtx-%{vcu_maj}-%{vcu_min} @@ -124,13 +50,9 @@ Requires: libcurand-%{vcu_maj}-%{vcu_min} Requires: libcusparse-%{vcu_maj}-%{vcu_min} Requires: libcusolver-%{vcu_maj}-%{vcu_min} Requires: libnvjitlink-%{vcu_maj}-%{vcu_min} -%endif - -BuildRequires: gloo-devel -%if %{have_tensorrt} -BuildRequires: libnvinfer-plugin-devel libnvonnxparsers-devel -%endif +Provides: pytorch-python3 = %{version}-%{release} +Obsoletes: pytorch-python3 < %{version}-%{release} %description PyTorch is a python package that provides two high-level @@ -143,422 +65,78 @@ Requires: %{name} = %{version}-%{release} %description devel This package contains development files for pythorch. -%package python3 -Summary: Python files for pytorch -Provides: python%{python3_version}dist(torch) = %{version} -Requires: %{name} = %{version}-%{release} - -%description python3 -This package contains python files for pythorch. - - %prep -%setup -n %{name}-v%{version} -%global _default_patch_fuzz 100 -%patch1 -p0 -b .python~ -# % patch2 -p1 -b .gcc11~ -# % patch3 -p1 -b .cpp~ -%patch4 -p1 -b .xnn~ -%patch5 -p1 -b .cu12~ - -# python version -sed -i -e 's|VERSION_LESS 3.10)|VERSION_LESS 3.6)|g' cmake/Dependencies.cmake -sed -i -e 's|PY_MAJOR_VERSION == 3|PY_MAJOR_VERSION == 3 \&\& PY_MINOR_VERSION > 6|' torch/csrc/dynamo/eval_frame.c - -# c++std 17 -sed -i 's|CMAKE_CXX_STANDARD 14|CMAKE_CXX_STANDARD 17|' CMakeLists.txt - -# external fbgemm qnnpack gloo -sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 qnnpack gloo gloo_cuda |' caffe2/CMakeLists.txt -# external pybind11 -sed -i -e 's|USE_SYSTEM_BIND11|USE_SYSTEM_PYBIND11|g' cmake/Dependencies.cmake - -%if %{use_dnnl} -# external mkl-dnn -rm -rf cmake/Modules/FindMKLDNN.cmake -echo 'set(DNNL_USE_NATIVE_ARCH ${USE_NATIVE_ARCH})' > cmake/public/mkldnn.cmake -echo 'set(CAFFE2_USE_MKLDNN ON)' >> cmake/public/mkldnn.cmake -echo 'find_package(DNNL REQUIRED)' >> cmake/public/mkldnn.cmake -echo 'set(MKLDNN_FOUND ON)' >> cmake/public/mkldnn.cmake -echo 'add_library(caffe2::mkldnn ALIAS DNNL::dnnl)' >> cmake/public/mkldnn.cmake -# external dnnl -sed -i -e 's|torch_cpu PUBLIC c10|torch_cpu PUBLIC c10 dnnl|' caffe2/CMakeLists.txt -%endif - -# external pthreadpool -rm -rf third_party/pthreadpool/* -touch third_party/pthreadpool/CMakeLists.txt - -# openblas openmp first -sed -i -e 's|NAMES openblas|NAMES openblaso openblas|' cmake/Modules/FindOpenBLAS.cmake - -# use external zstd -sed -i -e 's|USE_ZSTD|NOT_USE_ZSTD|g' cmake/Dependencies.cmake -sed -i -e 's|add_subdirectory(zstd)|list(APPEND Caffe2_PUBLIC_DEPENDENCY_LIBS zstd)|g' caffe2/share/contrib/CMakeLists.txt - -# use external onnx -%if %{ext_onnx} -sed -i -e 's|Caffe2_DEPENDENCY_LIBS onnx_proto onnx|Caffe2_DEPENDENCY_LIBS onnx_proto onnx onnx_optimizer|' cmake/Dependencies.cmake -%endif - -# external tensorpipe -mkdir -p third_party/tensorpipe -echo '' >> third_party/tensorpipe/CMakeLists.txt -sed -i '/add_dependencies(tensorpipe_agent tensorpipe)/d' caffe2/CMakeLists.txt - -# external nnpack -echo '' > cmake/External/nnpack.cmake -echo 'set(NNPACK_FOUND TRUE)' >> cmake/External/nnpack.cmake - -# external cpuinfo -sed -i '/TARGET cpuinfo PROPERTY/d' cmake/Dependencies.cmake - -# external fp16 -sed -i '/APPEND Caffe2_DEPENDENCY_LIBS fp16/d' cmake/Dependencies.cmake - -# external qnnpack -mkdir -p third_party/QNNPACK -echo '' >> third_party/QNNPACK/CMakeLists.txt -sed -i '/TARGET qnnpack PROPERTY/d' cmake/Dependencies.cmake -sed -i -e '/target_compile_options(qnnpack/d' cmake/Dependencies.cmake -#sed -i 's/QNNPACK_LIBRARY_TYPE \"static\"/QNNPACK_LIBRARY_TYPE \"shared\"/g' cmake/Dependencies.cmake - -# external psimd -mkdir -p third_party/psimd -echo '' >> third_party/psimd/CMakeLists.txt -sed -i '/pytorch_qnnpack PRIVATE psimd/d' aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt - -# external fxdiv -sed -i '/NOT TARGET fxdiv/,/endif/d' caffe2/CMakeLists.txt -sed -i '/torch_cpu PRIVATE fxdiv/d' caffe2/CMakeLists.txt -sed -i '/pytorch_qnnpack PRIVATE fxdiv/d' aten/src/ATen/native/quantized/cpu/qnnpack/CMakeLists.txt - -# external fbgemm -mkdir -p third_party/fbgemm -echo '' > third_party/fbgemm/CMakeLists.txt -sed -i '/(TARGET fbgemm/d' cmake/Dependencies.cmake -sed -i 's|caffe2_fakelowp_ops fbgemm cpuinfo|caffe2_fakelowp_ops|' caffe2/contrib/fakelowp/CMakeLists.txt -sed -i 's|caffe2_dnnlowp_avx2_ops fbgemm|caffe2_dnnlowp_avx2_ops|' caffe2/quantization/server/CMakeLists.txt - -# external foxi -mkdir -p third_party/foxi -echo '' > third_party/foxi/CMakeLists.txt - -# external gloo -#sed -i '/c10d gloo/d' torch/lib/c10d/CMakeLists.txt - -# external kineto -%if %{ext_kineto} -sed -i '/if(NOT TARGET kineto)/,/endif()/d' cmake/Dependencies.cmake -sed -i 's|libkineto/include|libkineto/include\n/usr/include/kineto|' torch/CMakeLists.txt -sed -i 's|libkineto/include|libkineto/include\n/usr/include/kineto|' caffe2/CMakeLists.txt -%endif - -# external fmt -%if %{ext_fmt} -sed -i 's|add_subdirectory(.*/fmt)|find_package(fmt REQUIRED)|g' cmake/Dependencies.cmake -sed -i '/fmt-header-only PROPERTIES/d' cmake/Dependencies.cmake -%endif - -# external miniz -#sed -i '/miniz.c/d' caffe2/serialize/CMakeLists.txt - -# external tbb -#sed -i '/^if(TBB_BUILD/,/^endif()/d' aten/src/ATen/cpu/tbb/CMakeLists.txt - -# external tensorrt -mkdir -p third_party/onnx-tensorrt -echo '' > third_party/onnx-tensorrt/CMakeLists.txt -sed -i '/nvonnxparser_static/d' cmake/Dependencies.cmake -sed -i 's|onnx_trt_library|nvonnxparser_static|g' cmake/Dependencies.cmake - -# flatbuffers -#rm -rf torch/csrc/jit/serialization/mobile_bytecode_generated.h -#flatc --cpp --gen-mutable --scoped-enums \ -# -o torch/csrc/jit/serialization \ -# -c torch/csrc/jit/serialization/mobile_bytecode.fbs -#echo '// @generated' >> torch/csrc/jit/serialization/mobile_bytecode_generated.h - -# rocksdb shared -sed -i '/find_package(RocksDB CONFIG)/d' modules/rocksdb/CMakeLists.txt -sed -i 's|RocksDB::rocksdb|RocksDB::rocksdb-shared|' modules/rocksdb/CMakeLists.txt - -# no cmake cuda locals -mv -f cmake/Modules_CUDA_fix/FindCUDNN.cmake cmake/Modules -rm -rf cmake/Modules_CUDA_fix -find . -type d -name "FindCUDA" -exec rm -rf {} \; -sed -i -e '/install/{:a;/COMPONENT/bb;N;ba;:b;/Modules_CUDA_fix/d;}' CMakeLists.txt - -# disable AVX2 -#sed -i -e 's|AVX2_FOUND|AVX2_NONE_FOUND|g' cmake/Codegen.cmake - -# remove export deps -sed -i '/install(EXPORT Caffe2Targets/,/dev)/d' CMakeLists.txt - -# systeminc -sed -i 's|SYSTEM ||g' c10/CMakeLists.txt -sed -i 's|SYSTEM ||g' torch/CMakeLists.txt -sed -i 's|SYSTEM ||g' caffe2/CMakeLists.txt -sed -i 's|BEFORE SYSTEM ||g' cmake/ProtoBuf.cmake -sed -i 's|AFTER SYSTEM ||g' cmake/Dependencies.cmake -sed -i 's|BEFORE SYSTEM ||g' cmake/Dependencies.cmake -sed -i 's|SYSTEM ||g' cmake/Dependencies.cmake - -# gcc13 -sed -i '1i #include ' c10/util/Registry.h -sed -i '1i #include ' c10/core/DispatchKey.h -sed -i '1i #include ' torch/csrc/jit/runtime/logging.cpp -sed -i '1i #include ' torch/csrc/lazy/core/multi_wait.cpp -sed -i '1i #include "stdint.h"' torch/csrc/jit/passes/quantization/quantization_type.h +%setup -q -n %{name}-v%{version} %build -mkdir build -pushd build -export ONNX_ML=0 -export BUILD_SPLIT_CUDA=ON -export REL_WITH_DEB_INFO=1 -export TORCH_NVCC_FLAGS="-DCUDA_HAS_FP16" -export PYTHON_EXECUTABLE="%{__python3}" -%global build_ldflags %(echo "%{build_ldflags}" -Wl,-lstdc++) -%global optflags %(echo "%{optflags} -w -fpermissive -Wno-sign-compare -Wno-deprecated-declarations -Wno-nonnull -DEIGEN_HAS_CXX11_MATH=1" | sed 's|-g||') -# -DUSE_NATIVE_ARCH=ON -export LDFLAGS="-Wl,-lstdc++" -export CFLAGS="${CFLAGS} -fPIC" -export LD_LIBRARY_PATH=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/lib64 -%cmake .. -Wno-dev \ - -DCMAKE_SKIP_RPATH=ON \ - -DCMAKE_VERBOSE_MAKEFILE=OFF \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_NO_SYSTEM_FROM_IMPORTED=ON \ - -DCMAKE_SKIP_RULE_DEPENDENCY=ON \ - -DCMAKE_SUPPRESS_REGENERATION=ON \ - -DUSE_CCACHE=OFF \ - -DHAVE_SOVERSION=ON \ - -DUSE_NATIVE_ARCH=OFF \ - -DUSE_DISTRIBUTED=ON \ - -DBUILD_DOCS=OFF \ - -DBUILD_PYTHON=ON \ - -DBUILD_FUNCTORCH=OFF \ - -DBUILD_CAFFE2=ON \ - -DBUILD_BINARY=OFF \ - -DBUILD_BENCHMARK=OFF \ - -DBUILD_CUSTOM_PROTOBUF=OFF \ - -DBUILDING_WITH_TORCH_LIBS=ON \ - -DPYTHON_EXECUTABLE="%{__python3}" \ - -DPYBIND11_PYTHON_VERSION="%{python3_version}" \ - -DCAFFE2_LINK_LOCAL_PROTOBUF=OFF \ - -DONNX_ML=OFF \ - -DUSE_GLOG=ON \ - -DUSE_GFLAGS=ON \ - -DUSE_OPENMP=ON \ - -DUSE_KINETO=ON \ - -DUSE_BREAKPAD=OFF \ -%if %{ext_onnx} - -DUSE_SYSTEM_ONNX=ON \ -%else - -DUSE_SYSTEM_ONNX=OFF \ -%endif - -DUSE_SYSTEM_GLOO=ON \ - -DUSE_SYSTEM_PYBIND11=ON \ - -DUSE_SYSTEM_EIGEN_INSTALL=ON \ -%if %{have_cuda} - -DUSE_CUDA=ON \ - -DUSE_CUDNN=ON \ - -DUSE_NVRTC=OFF \ - -DUSE_CUPTI_SO=ON \ - -DUSE_FAST_NVCC=ON \ - -DUSE_SYSTEM_NCCL=ON \ - -DCMAKE_CUDA_FLAGS="-fPIC" \ - -DCUDA_PROPAGATE_HOST_FLAGS=OFF \ - -DTORCH_CUDA_ARCH_LIST=%{gpu_target_arch} \ - -DCUDA_TOOLKIT_ROOT_DIR="/usr/local/cuda-%{vcu_maj}.%{vcu_min}" \ - -DCMAKE_CUDA_COMPILER="/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc" \ - -DCUDA_NVCC_FLAGS="--compiler-options;-fPIC;-Wno-deprecated-gpu-targets;-allow-unsupported-compiler;--fatbin-options;-compress-all" \ - -DCMAKE_CUDA_FLAGS="--compiler-options -fPIC -Wno-deprecated-gpu-targets -allow-unsupported-compiler --fatbin-options -compress-all" \ - -DNCCL_INCLUDE_DIR="%{_includedir}/nccl" \ -%if %{use_magma} - -DUSE_MAGMA=ON \ -%else - -DUSE_MAGMA=OFF \ -%endif - -DBUILD_SPLIT_CUDA=ON \ -%if %{have_tensorrt} - -DUSE_TENSORRT=ON \ -%else - -DUSE_TENSORRT=OFF \ -%endif -%endif - -DBLAS="OpenBLAS" \ - -DUSE_MPI=OFF \ - -DUSE_OBSERVERS=OFF \ - -DUSE_ASAN=OFF \ - -DUSE_ROCM=OFF \ -%if %{use_dnnl} - -DUSE_MKLDNN=ON \ -%else - -DUSE_MKLDNN=OFF \ -%endif -%ifarch x86_64 - -DUSE_FBGEMM=ON \ -%else - -DUSE_FBGEMM=OFF \ -%endif - -DUSE_NNPACK=ON \ - -DUSE_QNNPACK=ON \ - -DUSE_PYTORCH_QNNPACK=ON \ - -DUSE_SYSTEM_FP16=ON \ - -DUSE_SYSTEM_PSIMD=ON \ - -DUSE_SYSTEM_SLEEF=ON \ - -DUSE_SYSTEM_FXDIV=ON \ - -DUSE_SYSTEM_XNNPACK=ON \ - -DUSE_SYSTEM_CPUINFO=ON \ - -DUSE_SYSTEM_PTHREADPOOL=ON \ - -DUSE_TENSORPIPE=ON \ - -DUSE_FAKELOWP=OFF \ - -DUSE_OPENCL=OFF \ - -DUSE_GLOO=ON \ - -DUSE_ZMQ=ON \ - -DUSE_ZSTD=ON \ - -DUSE_LMDB=ON \ - -DUSE_REDIS=ON \ - -DUSE_LEVELDB=ON \ - -DUSE_ROCKSDB=OFF \ - -DUSE_FFMPEG=OFF \ - -DUSE_OPENCV=ON \ - -DUSE_METAL=OFF \ - -DUSE_TBB=OFF \ - -DUSE_LLVM=OFF \ - -DATEN_NO_TEST=ON - -make %{?_smp_mflags} -popd +export BUILD_TEST=False +export PYTORCH_BUILD_VERSION=%{version} +export PYTORCH_BUILD_NUMBER=1 +export CUDAARCHS="all" +export CMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc +export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6" +export CFLAGS="${CFLAGS} -Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-restrict" +export CXXFLAGS=$CFLAGS +python3 setup.py build %install - -# -# install libraries -# - -pushd build -export PYTHON_EXECUTABLE="%{__python3}" -make install DESTDIR=%{buildroot} - -mkdir -p %{buildroot}%{_libdir} -find %{buildroot}/ -name "*.a" -type f -prune -exec rm -rf '{}' '+' -rm -rf %{buildroot}/usr/lib/python* -mv -f %{buildroot}/usr/lib/* %{buildroot}%{_libdir}/ -popd -install -D -pm 755 build/lib/libnnapi_backend.so %{buildroot}/%{_libdir}/ - -mkdir -p %{buildroot}/%{python3_sitearch}/torch/bin -install -D -pm 644 build/lib/_C.so %{buildroot}/%{python3_sitearch}/torch/ -#install -D -pm 644 build/lib/_dl.so %{buildroot}/%{python3_sitearch}/torch/ -mkdir -p %{buildroot}/%{_includedir}/THC/ -install -D -pm 644 aten/src/THC/THCDeviceUtils.cuh %{buildroot}/%{_includedir}/THC/ - -# symlinks -ln -sf %{_includedir} %{buildroot}/%{python3_sitearch}/torch/include -ln -sf %{_libdir} %{buildroot}/%{python3_sitearch}/torch/lib -ln -sf %{_bindir}/torch_shm_manager %{buildroot}/%{python3_sitearch}/torch/bin/torch_shm_manager - -# -# install python bits -# - -# caffe2 -pushd build -for f in `find . -name '*.py' | grep -v experiments | grep -v third_party | grep -v _test.py | grep -v docs | grep -v examples`; -do - install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f -done -popd - -# torch -for f in `find ./torch/ -name '*.py'`; -do - install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f -done -# torchgen -for f in `find ./torchgen/ -name '*.py'`; -do - install -D -pm 644 $f %{buildroot}/%{python3_sitearch}/$f -done - - -# version.py -cuver=$(/usr/local/cuda/bin/nvcc --version | grep release | cut -d',' -f2 | awk '{print $2}') -echo '__version__ = "%{version}"' > %{buildroot}/%{python3_sitearch}/torch/version.py -echo 'debug = False' >> %{buildroot}/%{python3_sitearch}/torch/version.py -echo "cuda = \"$cuver\"" >> %{buildroot}/%{python3_sitearch}/torch/version.py -echo 'hip = None' >> %{buildroot}/%{python3_sitearch}/torch/version.py - -# install path -mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/nvfuser/nvfuser.so \ - %{buildroot}/%{_libdir}/ -mv -f %{buildroot}/%{_builddir}/pytorch-v%{version}/torch/lib/libnvfuser_codegen.so \ - %{buildroot}/%{_libdir}/ - -# remove junk -rm -rf %{buildroot}/%{_includedir}/clog.h || true -rm -rf %{buildroot}/%{_builddir}/pytorch/test || true -rm -rf %{buildroot}/%{_builddir}/pytorch/nvfuser || true - -# egg info -%{python3} setup.py egg_info -cp -r torch.egg-info %{buildroot}%{python3_sitearch}/ -sed -i 's|[<=>].*||g' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt -sed -i '/triton/d' %{buildroot}%{python3_sitearch}/*.egg-info/requires.txt -# strip elf -set +x -find %{buildroot} -type f -print | LC_ALL=C sort | - file -N -f - | sed -n -e 's/^\(.*\):[ \t]*.*ELF.*, not stripped.*/\1/p' | - xargs --no-run-if-empty stat -c '%h %D_%i %n' | - while read nlinks inum f; do - echo "Stripping: $f" - strip -s $f - done -set -x - +export BUILD_TEST=False +export PYTORCH_BUILD_VERSION=%{version} +export PYTORCH_BUILD_NUMBER=1 +export CUDAARCHS="all" +export CMAKE_CUDA_COMPILER=/usr/local/cuda-%{vcu_maj}.%{vcu_min}/bin/nvcc +export TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6" +export CFLAGS="${CFLAGS} -Wno-maybe-uninitialized -Wno-uninitialized -Wno-free-nonheap-object -Wno-restrict" +export CXXFLAGS=$CFLAGS + +mkdir %{buildroot}/usr +python3 setup.py install --prefix %{buildroot}/usr + +chrpath -d %{buildroot}/%{python3_sitearch}/torch/lib/* +chrpath -d %{buildroot}/%{python3_sitearch}/torch/bin/* +chrpath -d %{buildroot}/%{python3_sitearch}/nvfuser/*.so +chrpath -d %{buildroot}/%{python3_sitearch}/functorch/*.so + +mkdir -p %{buildroot}/etc/ld.so.conf.d +echo "%{python3_sitearch}/torch/lib" > %{buildroot}/etc/ld.so.conf.d/torch.conf + +%ldconfig_scriptlets + +%pretrans -p +path = "/usr/lib64/python3.10/site-packages/torch/lib" +st = posix.stat(path) +if st and st.type == "link" then + os.remove(path) +end + +%pretrans devel -p +path = "/usr/lib64/python3.10/site-packages/torch/include" +st = posix.stat(path) +if st and st.type == "link" then + os.remove(path) +end %files %doc README.md %doc CONTRIBUTING.md %license LICENSE %{_bindir}/* -%{_libdir}/libshm.so.* -%{_libdir}/libc10.so.* -%{_libdir}/libc10_cuda.so -%{_libdir}/libtorch.so.* -%{_libdir}/libtorch_cpu.so.* -%{_libdir}/libtorch_cuda.so -%{_libdir}/libtorch_global_deps.so.* -%{_libdir}/libcaffe2_observers.so.* -%{_libdir}/libcaffe2_detectron_ops_gpu.so* -%{_libdir}/libcaffe2_nvrtc.so -%{_libdir}/libnnapi_backend.so -%{_libdir}/libshm.so -%{_libdir}/libc10.so -%{_libdir}/libtorch.so -%{_libdir}/libtorch_cpu.so -%{_libdir}/libtorch_global_deps.so -%{_libdir}/libcaffe2_observers.so -%{_libdir}/libtorch_cuda_linalg.so -%{_libdir}/nvfuser.so -%{_libdir}/libnvfuser_codegen.so - - -%files devel -%{_includedir}/* -%{_datadir}/* - -%files python3 %{python3_sitearch}/* -%{_libdir}/libtorch_python.so* +%exclude %{python3_sitearch}/torch/include +%exclude %{python3_sitearch}/torch/share +/etc/ld.so.conf.d/torch.conf +%files devel +%license LICENSE +%{python3_sitearch}/torch/include +%{python3_sitearch}/torch/share %changelog +* Tue Aug 29 2023 Chunmei Xu - 2.0.1-3 +- reflator spec file + * Tue Jul 18 2023 Chunmei Xu - 2.0.1-2 - build with cuda support