باید از cv::cuda::Stream استفاده کنید و بحای forward هم از forwardAsync استفاده کنید :
// Load the models
cv::dnn::Net model1 = cv::dnn::readNetFromCaffe("model1.prototxt", "model1.caffemodel");
cv::dnn::Net model2 = cv::dnn::readNetFromCaffe("model2.prototxt", "model2.caffemodel");
// Enable GPU mode
model1.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
model1.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
model2.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
model2.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
// Create CUDA streams
cv::cuda::Stream stream1, stream2;
// Prepare input data
cv::Mat input_image1 = cv::imread("input_image1.jpg");
cv::Mat input_image2 = cv::imread("input_image2.jpg");
cv::Mat input_data1 = preprocess_input_for_model1(input_image1); // assume this function returns a cv::Mat
cv::Mat input_data2 = preprocess_input_for_model2(input_image2); // assume this function returns a cv::Mat
// Create a blob from the input data
cv::Mat blob1 = cv::dnn::blobFromImage(input_data1, 1.0, cv::Size(224, 224), cv::Scalar(104, 117, 123), false, false);
cv::Mat blob2 = cv::dnn::blobFromImage(input_data2, 1.0, cv::Size(224, 224), cv::Scalar(104, 117, 123), false, false);
// Set the input blobs for each model
model1.setInput(blob1, "data");
model2.setInput(blob2, "data");
// Run the models in batch mode using streams
cv::Mat output1, output2;
model1.forwardAsync(output1, "output", stream1);
model2.forwardAsync(output2, "output", stream2);
// Wait for the streams to finish
stream1.waitForCompletion();
stream2.waitForCompletion();
// Postprocess the results
postprocess_output(output1, output2); // assume this function takes the two outputs and performs some postprocessing