PaddlePaddle · YuanRisheng · Dec 28, 2024 · Dec 30, 2024
diff --git a/paddle/fluid/pir/dialect/operator/utils/utils.cc b/paddle/fluid/pir/dialect/operator/utils/utils.cc
@@ -86,6 +86,8 @@ enum class AttrType {
 
   STRING,
 
+  TENSOR_NAME,
+
   NUM_ATTR_TYPES,
 };
 
@@ -110,6 +112,8 @@ static inline AttrType GetAttributeType(const pir::Attribute& attr) {
     return AttrType::DATA_TYPE;
   } else if (attr.isa<paddle::dialect::PlaceAttribute>()) {
     return AttrType::PLACE;
+  } else if (attr.isa<pir::TensorNameAttribute>()) {
+    return AttrType::TENSOR_NAME;
   } else {
     PADDLE_THROW(common::errors::Unimplemented(
         "Unsupported ir Attribute type when casting it into "
@@ -161,6 +165,10 @@ static std::function<T(const pir::Attribute& attr)> GetAttrCast(
            [](const pir::Attribute& attr) {
              return T{attr.dyn_cast<paddle::dialect::PlaceAttribute>().data()};
            }},
+          {AttrType::TENSOR_NAME,
+           [](const pir::Attribute& attr) {
+             return T{attr.dyn_cast<pir::TensorNameAttribute>().data()};
+           }},
           {AttrType::ARRAY,
            [](const pir::Attribute& attr) {
              auto attr_vec = attr.dyn_cast<pir::ArrayAttribute>().AsVector();

diff --git a/paddle/fluid/pir/transforms/general/constant_folding_pass.cc b/paddle/fluid/pir/transforms/general/constant_folding_pass.cc
@@ -47,6 +47,7 @@
 #include "paddle/pir/include/core/region.h"
 #include "paddle/pir/include/core/value.h"
 #include "paddle/pir/include/pass/pass.h"
+#include "paddle/pir/include/pass/pass_registry.h"
 #include "paddle/pir/include/pattern_rewrite/frozen_rewrite_pattern_set.h"
 #include "paddle/pir/include/pattern_rewrite/pattern_match.h"
 #include "paddle/pir/include/pattern_rewrite/pattern_rewrite_driver.h"
@@ -300,7 +301,6 @@ class ConstantFoldingPattern : public pir::RewritePattern {
     }
     paddle::framework::InterpreterCore core(
         place_, {}, kernel_program->block(), scope_, *exe_config_);
-
     core.Run({});
     return output_var_names;
   }
@@ -557,3 +557,5 @@ std::unique_ptr<Pass> CreateConstantFoldingPass() {
 }
 
 }  // namespace pir
+
+REGISTER_IR_PASS(constant_folding_pass, ConstantFoldingPass);
diff --git a/paddle/fluid/pybind/pir.cc b/paddle/fluid/pybind/pir.cc
@@ -2657,6 +2657,12 @@ void BindPassManager(pybind11::module *m) {
                  pass->Set(attr.first, new int(attr.second.cast<int>()));
                } else if (py::isinstance<py::float_>(attr.second)) {
                  pass->Set(attr.first, new float(attr.second.cast<float>()));
+               } else if (py::isinstance<framework::Scope>(attr.second)) {
+                 pass->SetNotOwned(attr.first,
+                                   attr.second.cast<framework::Scope *>());
+               } else if (py::isinstance<phi::GPUPlace>(attr.second)) {
+                 pass->Set(attr.first,
+                           new phi::Place(attr.second.cast<phi::GPUPlace>()));
                } else {
                  PADDLE_THROW(common::errors::InvalidArgument(
                      "The pass attr is not supported this type."));

diff --git a/python/paddle/tensorrt/converter.py b/python/paddle/tensorrt/converter.py
@@ -27,6 +27,7 @@
 from paddle import pir
 from paddle.base.core import clear_shape_info, get_value_shape_range_info
 from paddle.base.log_helper import get_logger
+from paddle.pir.core import _PADDLE_PIR_DTYPE_2_NUMPY_DTYPE
 
 from .impls.activation import *  # noqa: F403
 from .impls.attribute import *  # noqa: F403
@@ -75,8 +76,10 @@ def __init__(self, paddle_program, scope, trt_config=None):
         # save parameters
         for v in params:
             name = v.get_defining_op().attrs()["parameter_name"]
-            weight_array = np.array(self.scope.var(name).get_tensor())
-            # weights = trt.Weights(weight_array)
+            if self.scope.find_var(name) is None:
+                weight_array = None
+            else:
+                weight_array = np.array(self.scope.var(name).get_tensor())
             param_dict.update({name: weight_array})
         self.param_dict = param_dict
 
@@ -147,6 +150,7 @@ def convert_subgraph_to_trt(self, program, group_op):
         opt_value_map = {}
         max_value_map = {}
         input_names = []
+        new_input_values = []
 
         # Because one of the inputs to pd_op.concat is builtin.combine,
         # during the conversion process using the converter,
@@ -169,7 +173,24 @@ def convert_subgraph_to_trt(self, program, group_op):
                 param_name = defining_op.attrs()["parameter_name"]
                 weight = trt.Weights(self.param_dict[param_name])
                 value_to_trt_tensor[value.id] = weight
-                input_names.append("")
+            elif defining_op.name() == "builtin.constant":
+                constant_value_name = defining_op.attrs()["value"]
+                constant_tensor = self.scope.var(
+                    constant_value_name
+                ).get_tensor()
+                out_dtype = np.dtype(
+                    _PADDLE_PIR_DTYPE_2_NUMPY_DTYPE[value.dtype]
+                )
+                if out_dtype == np.dtype("float64"):
+                    out_dtype = np.dtype("float32")
+                if out_dtype == np.dtype("int64"):
+                    out_dtype = np.dtype("int32")
+                constant_data = np.array(constant_tensor, dtype=out_dtype)
+                if len(constant_data) == 0:
+                    value_to_trt_tensor[value.id] = None
+                else:
+                    constant_tensor = trt.Weights(constant_data)
+                    value_to_trt_tensor[value.id] = constant_tensor
             else:
                 shape = value.shape
                 dtype = map_dtype(value.dtype.name)
@@ -181,6 +202,7 @@ def convert_subgraph_to_trt(self, program, group_op):
                     name=input_name, dtype=dtype, shape=shape
                 )
                 input_names.append(input_name)
+                new_input_values.append(value)
                 value_to_trt_tensor[value.id] = input_tensor
 
         for op in operations:
@@ -193,6 +215,9 @@ def convert_subgraph_to_trt(self, program, group_op):
                 if not source.initialized():
                     operands.append(None)
                     continue
+                vec_type = source.type().as_vec_type()
+                if vec_type is not None and len(vec_type.as_list()) == 0:
+                    continue
                 define_op_name = source.get_defining_op().name()
                 if define_op_name == "builtin.combine":
                     operand_list = []
@@ -239,13 +264,18 @@ def convert_subgraph_to_trt(self, program, group_op):
 
             for idx, result in enumerate(op.results()):
                 if result.is_combine():
+                    # empty vec value condition
+                    if len(result.type().as_vec_type().as_list()) == 0:
+                        results.append(result)
+                        continue
                     used_ops = result.all_used_ops()
                     for use_op in used_ops:
                         if use_op.name() == "builtin.split":
                             split_outputs = use_op.results()
                             results.extend(split_outputs)
                 else:
                     results.append(result)
+
             for idx, result in enumerate(results):
                 if idx < len(trt_outs):
                     value_to_trt_tensor[result.id] = trt_outs[idx]
@@ -255,84 +285,86 @@ def convert_subgraph_to_trt(self, program, group_op):
         # Set TRT min/opt/max input shape and the value of shape tensor
         for i, value in enumerate(origin_input_value):
             trt_input = value_to_trt_tensor[value.id]
-            if isinstance(trt_input, trt.Weights):
+            defining_op_name = value.get_defining_op().name()
+            if (
+                defining_op_name == "builtin.parameter"
+                or defining_op_name == "builtin.constant"
+            ):
+                # constant/parameter condition, needn't get min/opt/max shape
                 continue
-            input_name = trt_input.name
-            if input_name != "":
-                _logger.info(
-                    f"set shape of {value}, op is: {value.get_defining_op()}"
+
+            _logger.info(
+                f"set shape of {value}, op is: {value.get_defining_op()}"
+            )
+            min_shape = []
+            opt_shape = []
+            max_shape = []
+            min_value = []
+            opt_value = []
+            max_value = []
+
+            value_define_op = value.get_defining_op()
+            # if the input value is generated by the other trt_engine_op, so the shape is searched by origin value
+            if (
+                value_define_op.name() == "builtin.split"
+                and value_define_op.operand_source(0).get_defining_op().name()
+                == "pd_op.tensorrt_engine"
+            ):
+                min_shape = self.input_info[value.id]["min_shape"]
+                opt_shape = self.input_info[value.id]["opt_shape"]
+                max_shape = self.input_info[value.id]["max_shape"]
+                if trt_input.is_shape_tensor:
+                    min_value = self.input_info[value.id]["min_value"]
+                    opt_value = self.input_info[value.id]["opt_value"]
+                    max_value = self.input_info[value.id]["max_value"]
+            else:
+                min_shape = get_value_shape_range_info(
+                    value, False, paddle.base.core.ShapeMode.kMIN
+                )
+                opt_shape = get_value_shape_range_info(
+                    value, False, paddle.base.core.ShapeMode.kOPT
+                )
+                max_shape = get_value_shape_range_info(
+                    value, False, paddle.base.core.ShapeMode.kMAX
                 )
-                min_shape = []
-                opt_shape = []
-                max_shape = []
-                min_value = []
-                opt_value = []
-                max_value = []
-
-                value_define_op = value.get_defining_op()
-                # if the input value is generated by the other trt_engine_op, so the shape is searched by origin value
-                if (
-                    value_define_op.name() == "builtin.split"
-                    and value_define_op.operand_source(0)
-                    .get_defining_op()
-                    .name()
-                    == "pd_op.tensorrt_engine"
-                ):
-                    min_shape = self.input_info[value.id]["min_shape"]
-                    opt_shape = self.input_info[value.id]["opt_shape"]
-                    max_shape = self.input_info[value.id]["max_shape"]
-                    if trt_input.is_shape_tensor:
-                        min_value = self.input_info[value.id]["min_value"]
-                        opt_value = self.input_info[value.id]["opt_value"]
-                        max_value = self.input_info[value.id]["max_value"]
-                else:
-                    min_shape = get_value_shape_range_info(
-                        value, False, paddle.base.core.ShapeMode.kMIN
-                    )
-                    opt_shape = get_value_shape_range_info(
-                        value, False, paddle.base.core.ShapeMode.kOPT
-                    )
-                    max_shape = get_value_shape_range_info(
-                        value, False, paddle.base.core.ShapeMode.kMAX
-                    )
 
-                    if trt_input.is_shape_tensor:
-                        min_value = get_value_shape_range_info(
-                            value, True, paddle.base.core.ShapeMode.kMIN
-                        )
-                        opt_value = get_value_shape_range_info(
-                            value, True, paddle.base.core.ShapeMode.kOPT
-                        )
-                        max_value = get_value_shape_range_info(
-                            value, True, paddle.base.core.ShapeMode.kMAX
-                        )
-                if not trt_input.is_shape_tensor:
-                    _logger.info(f"set min_shape of {value} as {min_shape}")
-                    _logger.info(f"set opt_shape of {value} as {opt_shape}")
-                    _logger.info(f"set max_shape of {value} as {max_shape}")
-                    profile.set_shape(
-                        input_name, min=min_shape, opt=opt_shape, max=max_shape
-                    )
-                else:
-                    _logger.info(
-                        f"set min_value of shape input: {value} as {min_value}"
-                    )
-                    _logger.info(
-                        f"set max_value of shape input: {value} as {opt_value}"
+                if trt_input.is_shape_tensor:
+                    min_value = get_value_shape_range_info(
+                        value, True, paddle.base.core.ShapeMode.kMIN
                     )
-                    _logger.info(
-                        f"set opt_value of shape input: {value} as {max_value}"
+                    opt_value = get_value_shape_range_info(
+                        value, True, paddle.base.core.ShapeMode.kOPT
                     )
-                    profile.set_shape_input(
-                        input_name, min=min_value, opt=opt_value, max=max_value
+                    max_value = get_value_shape_range_info(
+                        value, True, paddle.base.core.ShapeMode.kMAX
                     )
+            if not trt_input.is_shape_tensor:
+                _logger.info(f"set min_shape of {value} as {min_shape}")
+                _logger.info(f"set opt_shape of {value} as {opt_shape}")
+                _logger.info(f"set max_shape of {value} as {max_shape}")
+                profile.set_shape(
+                    input_name, min=min_shape, opt=opt_shape, max=max_shape
+                )
+            else:
+                _logger.info(
+                    f"set min_value of shape input: {value} as {min_value}"
+                )
+                _logger.info(
+                    f"set max_value of shape input: {value} as {opt_value}"
+                )
+                _logger.info(
+                    f"set opt_value of shape input: {value} as {max_value}"
+                )
+                profile.set_shape_input(
+                    input_name, min=min_value, opt=opt_value, max=max_value
+                )
 
-                min_shape_map[input_name] = min_shape
-                opt_shape_map[input_name] = opt_shape
-                max_shape_map[input_name] = max_shape
-                min_value_map[input_name] = min_value
-                opt_value_map[input_name] = opt_value
-                max_value_map[input_name] = max_value
+            min_shape_map[input_name] = min_shape
+            opt_shape_map[input_name] = opt_shape
+            max_shape_map[input_name] = max_shape
+            min_value_map[input_name] = min_value
+            opt_value_map[input_name] = opt_value
+            max_value_map[input_name] = max_value
 
         out_shapes = []
         out_names = []
@@ -471,7 +503,7 @@ def convert_subgraph_to_trt(self, program, group_op):
         with paddle.pir_utils.IrGuard(), paddle.pir.core.program_guard(program):
             pir.set_insertion_point(group_op)
             out = paddle._C_ops.tensorrt_engine(
-                origin_input_value,
+                new_input_values,
                 trt_params,
                 input_names,
                 out_names,

diff --git a/python/paddle/tensorrt/converter_utils.py b/python/paddle/tensorrt/converter_utils.py
@@ -453,6 +453,7 @@ def trt_reduce_to_scalar(network, tensor, dtype=trt.int32):
 def convert_conv2d(network, paddle_op, inputs):
     from paddle.tensorrt.util import support_fp32_mix_precision
 
+    bias = None
     if (
         paddle_op.name() == "pd_op.conv2d"
         or paddle_op.name() == "pd_op.depthwise_conv2d"
@@ -469,7 +470,8 @@ def convert_conv2d(network, paddle_op, inputs):
             output_size = None
         else:
             raise ValueError("Invalid number of inputs for conv2d_transpose")
-
+    if paddle_op.name() == "pd_op.fused_conv2d_add_act":
+        input_tensor, filter, bias, _ = inputs
     input_shape = paddle_op.operands()[0].source().shape
     filter_shape = paddle_op.operands()[1].source().shape
 
@@ -521,13 +523,14 @@ def convert_conv2d(network, paddle_op, inputs):
     if (
         paddle_op.name() == "pd_op.conv2d"
         or paddle_op.name() == "pd_op.depthwise_conv2d"
+        or paddle_op.name() == "pd_op.fused_conv2d_add_act"
     ):
         layer = network.add_convolution_nd(
             input=input_tensor,
             num_output_maps=n_output,
             kernel_shape=nv_ksize,
             kernel=filter,
-            bias=None,
+            bias=bias,
         )
     elif (
         paddle_op.name() == "pd_op.conv2d_transpose"

diff --git a/python/paddle/tensorrt/export.py b/python/paddle/tensorrt/export.py
@@ -255,17 +255,17 @@ def convert_to_trt(program, trt_config, scope):
             min_shape_feed[feed_name[i]] = min_data
             max_shape_feed[feed_name[i]] = max_data
 
-            # run warmup for collecting shape
+        # run pir pass (including trt_op_marker_pass)
+        program_with_pir = run_pir_pass(program, partition_mode=False)
+
+        # run warmup for collecting shape
         program = warmup_shape_infer(
             program,
             min_shape_feed=min_shape_feed,
             max_shape_feed=max_shape_feed,
             scope=scope,
         )
 
-        # run pir pass (including trt_op_marker_pass)
-        program_with_pir = run_pir_pass(program, partition_mode=False)
-
         # specify certain operators to be excluded from entering TensorRT
         if trt_config.disable_ops:
             forbid_op_lower_trt(program, trt_config.disable_ops)

diff --git a/python/paddle/tensorrt/impls/conv.py b/python/paddle/tensorrt/impls/conv.py
@@ -19,6 +19,9 @@
 
 @converter_registry.register("pd_op.depthwise_conv2d", trt_version="8.x")
 @converter_registry.register("pd_op.conv2d", trt_version="trt_version_ge=8.0")
+@converter_registry.register(
+    "pd_op.fused_conv2d_add_act", trt_version="trt_version_ge=8.0"
+)
 @converter_registry.register("pd_op.conv2d_transpose", trt_version="8.x")
 @converter_registry.register(
     "pd_op.depthwise_conv2d_transpose", trt_version="8.x"