Store user model to simplify ONNXProgram.{adapt_torch_*,__call__} APIs (#115281) (#115583)

Thiago Crepaldi · web-flow · commit 96d2ddbafe3a · 2024-01-08T10:16:13.000-08:00
Currently (after #114407), the user has must pass the original user ``model`` to APIs such as ``ONNXProgram.__call__``, ``ONNXProgram.adapt_torch_inputs_to_onnx`` and ``ONNXProgram.adapt_torch_outputs_to_onnx`` APIs. This was needed because when the model is fakefied, a version of the non-fakefied model is needed so that the Initializers, buffers and constants can be extracted from a real model (and used as input to the ONNX model). That approach brings an unnecessary usability burden to the user when the model is not fakefied, because the model that was already passed to ``torch.onnx.dynamo_export`` could be used to extract ``state_dict``. This PR adds ``ONNXProgram._model_torch`` attribute to store the user model and demote ``model`` argument of the aforementioned APIs to optional, only (as opposed to required). As a result, for the fakefied model scenario, the user still need to pass the required model, but for non fakefied models, the persisted model is implicitly used to extract the model state_dict, making it easier to use. Pull Request resolved: #115281 Approved by: https://github.com/BowenBao ghstack dependencies: #114407
diff --git a/test/onnx/onnx_test_common.py b/test/onnx/onnx_test_common.py
@@ -436,12 +436,13 @@ def _compare_pytorch_onnx_with_ort(
         ref_input_args = input_args
         ref_input_kwargs = input_kwargs
 
-    # ONNXProgram holds a reference (not copy) to the original ref_model, including its state_dict.
+    # NOTE: ONNXProgram holds a reference (not copy) to the original ref_model, including its state_dict.
     # Thus, ONNXProgram() must run before ref_model() to prevent ref_model.forward() from changing the state_dict.
     # Otherwise, the ref_model can change buffers on state_dict which would be used by ONNXProgram.__call__()
-    ort_outputs = onnx_program(*input_args, model=ref_model, **input_kwargs)
+    # NOTE: `model_with_state_dict=ref_model` is specified to cover runs with FakeTensor support
+    ort_outputs = onnx_program(*input_args, **input_kwargs)
     ref_outputs = ref_model(*ref_input_args, **ref_input_kwargs)
-    ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(ref_model, ref_outputs)
+    ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(ref_outputs)
 
     if len(ref_outputs) != len(ort_outputs):
         raise AssertionError(
diff --git a/test/onnx/test_fx_to_onnx_with_onnxruntime.py b/test/onnx/test_fx_to_onnx_with_onnxruntime.py
@@ -198,23 +198,15 @@ def func(x, b=1.0):
             ),
         )
         onnx_test_common.assert_dynamic_shapes(onnx_program, self.dynamic_shapes)
-        onnx_format_args = onnx_program.adapt_torch_inputs_to_onnx(
-            tensor_x, model=func, b=8.0
-        )
-        ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(
-            func, func(tensor_x, 8.0)
-        )
+        onnx_format_args = onnx_program.adapt_torch_inputs_to_onnx(tensor_x, b=8.0)
+        ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(func(tensor_x, 8.0))
         ort_outputs = onnx_test_common.run_ort(onnx_program, onnx_format_args)
         for ref_output, ort_output in zip(ref_outputs, ort_outputs):
             torch.testing.assert_close(ref_output, torch.tensor(ort_output))
 
         # test on different non-tensor input - xfail
-        onnx_format_args = onnx_program.adapt_torch_inputs_to_onnx(
-            tensor_x, model=func, b=9.0
-        )
-        ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(
-            func, func(tensor_x, 9.0)
-        )
+        onnx_format_args = onnx_program.adapt_torch_inputs_to_onnx(tensor_x, b=9.0)
+        ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(func(tensor_x, 9.0))
         _ = onnx_test_common.run_ort(onnx_program, onnx_format_args)
         for ref_output, ort_output in zip(ref_outputs, ort_outputs):
             torch.testing.assert_close(ref_output, torch.tensor(ort_output))
@@ -839,10 +831,10 @@ def _test_fx_symbolic_tracer_large_scale_exporter(
             kwargs = create_pytorch_only_kwargs()
             # Original outputs.
             ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(
-                model, model(*args, **kwargs)
+                model(*args, **kwargs)
             )
             # ORT outputs.
-            args_not_none = onnx_program.adapt_torch_inputs_to_onnx(*args, model=model)
+            args_not_none = onnx_program.adapt_torch_inputs_to_onnx(*args)
 
             # Drop Parameters and buffers added by fx_serialization.save_model_with_external_data
             args_not_none = args_not_none[: len(args) - len(kwargs)]
@@ -1077,12 +1069,14 @@ def _test_fake_tensor_mode_exporter(
                 args = create_args()
                 kwargs = create_kwargs()
                 # Original outputs.
+                # model_with_state_dict=real_model is used to create non-fake weights
                 ref_outputs = onnx_program.adapt_torch_outputs_to_onnx(
-                    fake_model, real_model(*args, **kwargs)
+                    real_model(*args, **kwargs), model_with_state_dict=real_model
                 )
                 # ORT outputs.
+                # model_with_state_dict=real_model is used to create non-fake weights
                 args_not_none = onnx_program.adapt_torch_inputs_to_onnx(
-                    *args, model=real_model, **kwargs
+                    *args, model_with_state_dict=real_model, **kwargs
                 )
 
                 ort_outputs = onnx_test_common.run_ort(
diff --git a/test/onnx/torch_export/test_torch_export_with_onnxruntime.py b/test/onnx/torch_export/test_torch_export_with_onnxruntime.py
@@ -31,12 +31,10 @@ def _compare_onnx_and_torch_exported_program(
         # NOTE: ONNXProgram holds a reference (not copy) to the original ref_model, including its state_dict.
         # Thus, ONNXProgram() must run before ref_model() to prevent ref_model.forward() from changing the state_dict.
         # Otherwise, the ref_model can change buffers on state_dict which would be used by ONNXProgram.__call__()
-        onnx_outputs = onnx_exported_program(
-            *input_args, model=torch_exported_program, **input_kwargs
-        )
+        onnx_outputs = onnx_exported_program(*input_args, **input_kwargs)
         torch_outputs = torch_exported_program(*input_args, **input_kwargs)
         torch_outputs_onnx_format = onnx_exported_program.adapt_torch_outputs_to_onnx(
-            torch_exported_program, torch_outputs
+            torch_outputs
         )
         if len(torch_outputs_onnx_format) != len(onnx_outputs):
             raise AssertionError(
diff --git a/torch/onnx/_internal/exporter.py b/torch/onnx/_internal/exporter.py
@@ -659,6 +659,9 @@ class ONNXProgram:
     _fake_context: Final[Optional[ONNXFakeContext]]
     _export_exception: Final[Optional[Exception]]
     _model_signature: Final[Optional[torch.export.ExportGraphSignature]]
+    _model_torch: Final[
+        Optional[Union[torch.nn.Module, Callable, torch_export.ExportedProgram]]
+    ]
 
     @_beartype.beartype
     def __init__(
@@ -671,9 +674,13 @@ def __init__(
         fake_context: Optional[ONNXFakeContext] = None,
         export_exception: Optional[Exception] = None,
         model_signature: Optional[torch.export.ExportGraphSignature] = None,
+        model_torch: Optional[
+            Union[torch.nn.Module, Callable, torch_export.ExportedProgram]
+        ] = None,
     ):
         self._model_proto = model_proto
         self._model_signature = model_signature
+        self._model_torch = model_torch
         self._input_adapter = input_adapter
         self._output_adapter = output_adapter
         self._diagnostic_context = diagnostic_context
@@ -683,7 +690,9 @@ def __init__(
     def __call__(
         self,
         *args: Any,
-        model: Union[torch.nn.Module, Callable, torch_export.ExportedProgram],
+        model_with_state_dict: Optional[
+            Union[torch.nn.Module, Callable, torch_export.ExportedProgram]
+        ] = None,
         options: Optional[ONNXRuntimeOptions] = None,
         **kwargs: Any,
     ) -> Any:
@@ -692,15 +701,21 @@ def __call__(
         Args:
             args: The positional inputs to the model.
             kwargs: The keyword inputs to the model.
-            model: The PyTorch model to fetch state from.
+            model_with_state_dict: The PyTorch model to fetch state from.
+                Required when :func:`enable_fake_mode` is used to extract real initializers as needed by the ONNX graph.
             options: The options to use for running the model with ONNX Runtime.
 
         Returns:
             The model output as computed by ONNX Runtime
         """
         import onnxruntime  # type: ignore[import]
 
-        onnx_input = self.adapt_torch_inputs_to_onnx(*args, model=model, **kwargs)
+        # model specified by the user has precedence, when specified
+        model_with_state_dict = model_with_state_dict or self._model_torch
+
+        onnx_input = self.adapt_torch_inputs_to_onnx(
+            *args, model_with_state_dict=model_with_state_dict, **kwargs
+        )
         options = options or ONNXRuntimeOptions()
         providers = options.execution_providers or onnxruntime.get_available_providers()
         onnx_model = self.model_proto.SerializeToString()
@@ -809,7 +824,7 @@ def fake_context(self) -> Optional[ONNXFakeContext]:
     def adapt_torch_inputs_to_onnx(
         self,
         *model_args,
-        model: Optional[
+        model_with_state_dict: Optional[
             Union[torch.nn.Module, Callable, torch_export.ExportedProgram]
         ] = None,
         **model_kwargs,
@@ -828,8 +843,10 @@ def adapt_torch_inputs_to_onnx(
         This method replays the adapting steps recorded during export.
 
         Args:
-            model: The PyTorch model to get extra state from. If not specified, the model used during export is used.
             model_args: The PyTorch model inputs.
+            model_with_state_dict: The PyTorch model to get extra state from.
+                If not specified, the model used during export is used.
+                Required when :func:`enable_fake_mode` is used to extract real initializers as needed by the ONNX graph.
             model_kwargs: The PyTorch model keyword inputs.
 
         Returns:
@@ -841,7 +858,7 @@ def adapt_torch_inputs_to_onnx(
             >>> import torch
             >>> import torch.onnx
             >>> from typing import Dict, Tuple
-            >>> def func_with_nested_input_structure(
+            >>> def func_nested_input(
             ...     x_dict: Dict[str, torch.Tensor],
             ...     y_tuple: Tuple[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]]
             ... ):
@@ -857,23 +874,32 @@ def adapt_torch_inputs_to_onnx(
             ...     return x + y1 + y2 + y3
             >>> x_dict = {"a": torch.tensor(1.)}
             >>> y_tuple = (torch.tensor(2.), (torch.tensor(3.), torch.tensor(4.)))
-            >>> onnx_program = torch.onnx.dynamo_export(func_with_nested_input_structure, x_dict, y_tuple)
+            >>> onnx_program = torch.onnx.dynamo_export(func_nested_input, x_dict, y_tuple)
             >>> print(x_dict, y_tuple)
             {'a': tensor(1.)} (tensor(2.), (tensor(3.), tensor(4.)))
-            >>> print(onnx_program.adapt_torch_inputs_to_onnx(x_dict, y_tuple, model=func_with_nested_input_structure))
+            >>> print(onnx_program.adapt_torch_inputs_to_onnx(x_dict, y_tuple, model_with_state_dict=func_nested_input))
             (tensor(1.), tensor(2.), tensor(3.), tensor(4.))
 
         .. warning::
             This API is experimental and is *NOT* backward-compatible.
 
         """
-        return self._input_adapter.apply(*model_args, model=model, **model_kwargs)
+        # model specified by the user has precedence, when specified
+        model_with_state_dict = model_with_state_dict or self._model_torch
+        assert (
+            model_with_state_dict is not None
+        ), "model_with_state_dict must be specified."
+        return self._input_adapter.apply(
+            *model_args, model=model_with_state_dict, **model_kwargs
+        )
 
     @_beartype.beartype
     def adapt_torch_outputs_to_onnx(
         self,
-        model: Union[torch.nn.Module, Callable, torch_export.ExportedProgram],
         model_outputs: Any,
+        model_with_state_dict: Optional[
+            Union[torch.nn.Module, Callable, torch_export.ExportedProgram]
+        ] = None,
     ) -> Sequence[Union[torch.Tensor, int, float, bool]]:
         """Converts the PyTorch model outputs to exported ONNX model outputs format.
 
@@ -891,6 +917,9 @@ def adapt_torch_outputs_to_onnx(
         Args:
             model: The PyTorch model to get extra state from.
             model_outputs: The PyTorch model outputs.
+            model_with_state_dict: The PyTorch model to get extra state from.
+                If not specified, the model used during export is used.
+                Required when :func:`enable_fake_mode` is used to extract real initializers as needed by the ONNX graph.
 
         Returns:
             PyTorch model outputs in exported ONNX model outputs format.
@@ -912,14 +941,19 @@ def adapt_torch_outputs_to_onnx(
             >>> pt_output = func_returning_tuples(x, y, z)
             >>> print(pt_output)
             (tensor(3.), (tensor(5.), tensor(8.)))
-            >>> print(onnx_program.adapt_torch_outputs_to_onnx(func_returning_tuples, pt_output))
+            >>> print(onnx_program.adapt_torch_outputs_to_onnx(pt_output, model_with_state_dict=func_returning_tuples))
             [tensor(3.), tensor(5.), tensor(8.)]
 
         .. warning::
             This API is experimental and is *NOT* backward-compatible.
 
         """
-        return self._output_adapter.apply(model, model_outputs)
+        # model specified by the user has precedence, when specified
+        model_with_state_dict = model_with_state_dict or self._model_torch
+        assert (
+            model_with_state_dict is not None
+        ), "model_with_state_dict must be specified."
+        return self._output_adapter.apply(model_outputs, model=model_with_state_dict)
 
     @_beartype.beartype
     def save(
@@ -1053,6 +1087,7 @@ def _from_failure(
         # https://github.com/pytorch/pytorch/issues/103764
         import onnx
 
+        # TODO: Should we populate ONNXProgram with more info, such _model_torch for easier debug?
         return ONNXProgram(
             onnx.ModelProto(),  # type: ignore[attr-defined]
             io_adapter.InputAdapter(),
@@ -1182,6 +1217,7 @@ def export(self) -> ONNXProgram:
                 model_signature=getattr(
                     self.model, "graph_signature", None
                 ),  # Available for isinstance(self.model, ExportedProgram) only
+                model_torch=self.model,
             )
 
     def _assert_fake_tensor_mode(self):
diff --git a/torch/onnx/_internal/fx/dynamo_graph_extractor.py b/torch/onnx/_internal/fx/dynamo_graph_extractor.py
@@ -132,12 +132,14 @@ def __init__(
 
     def apply(
         self,
-        model: Union[torch.nn.Module, Callable, torch_export.ExportedProgram],
         model_outputs: Any,
+        model: Optional[
+            Union[torch.nn.Module, Callable, torch_export.ExportedProgram]
+        ] = None,
     ) -> Sequence[Any]:
         """Flatten the model outputs, under the context of pytree extension."""
         with self._pytree_extension_context:
-            return super().apply(model, model_outputs)
+            return super().apply(model_outputs, model=model)
 
 
 def _wrap_model_with_output_adapter(
@@ -163,7 +165,7 @@ def _wrap_model_with_output_adapter(
     # Preserve original function signature.
     @functools.wraps(model_func)
     def wrapped(*args, **kwargs):
-        return output_adapter.apply(model, model_func(*args, **kwargs))
+        return output_adapter.apply(model_func(*args, **kwargs), model=model)
 
     return wrapped
 
diff --git a/torch/onnx/_internal/fx/fx_symbolic_graph_extractor.py b/torch/onnx/_internal/fx/fx_symbolic_graph_extractor.py
@@ -169,7 +169,7 @@ def _trace_into_fx_graph_via_fx_symbolic_trace(
             torch.onnx.utils.model_signature(model)
         )
         self.input_adapter.append_step(bind_input_step)
-        _, named_args = bind_input_step.apply(model, model_args, model_kwargs)
+        _, named_args = bind_input_step.apply(model_args, model_kwargs, model=model)
 
         # Create inputs to call symbolic trace (torch.fx.symbolic_trace)
         # Example content of concrete_args:
diff --git a/torch/onnx/_internal/io_adapter.py b/torch/onnx/_internal/io_adapter.py

Original file line number	Diff line number	Diff line change
`@@ -169,7 +169,7 @@ def _trace_into_fx_graph_via_fx_symbolic_trace(`
`169`	`169`	`torch.onnx.utils.model_signature(model)`
`170`	`170`	`)`
`171`	`171`	`self.input_adapter.append_step(bind_input_step)`
`172`		`- _, named_args = bind_input_step.apply(model, model_args, model_kwargs)`
	`172`	`+ _, named_args = bind_input_step.apply(model_args, model_kwargs, model=model)`
`173`	`173`
`174`	`174`	`# Create inputs to call symbolic trace (torch.fx.symbolic_trace)`
`175`	`175`	`# Example content of concrete_args:`