triton-inference-server · mc-nv · Apr 17, 2026 · Apr 17, 2026
diff --git a/qa/L0_backend_python/test.sh b/qa/L0_backend_python/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2020-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -65,9 +65,6 @@ export PYTHON_ENV_VERSION=${PYTHON_ENV_VERSION:="12"}
 export PYTHON_BACKEND_REPO_TAG=$PYTHON_BACKEND_REPO_TAG
 
 BASE_SERVER_ARGS="--model-repository=${MODELDIR}/models --backend-directory=${BACKEND_DIR} --log-verbose=1"
-# Set the default byte size to 5MBs to avoid going out of shared memory. The
-# environment that this job runs on has only 1GB of shared-memory available.
-SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=5242880"
 
 CLIENT_PY=./python_test.py
 CLIENT_LOG="./client.log"
@@ -177,6 +174,9 @@ fi
 
 pip3 install pytest requests virtualenv
 
+# Set the default byte size to 5MBs to avoid going out of shared memory. The
+# environment that this job runs on has only 1GB of shared-memory available.
+SERVER_ARGS="$BASE_SERVER_ARGS --allow-client-shm=true --backend-config=python,shm-default-byte-size=5242880"
 prev_num_pages=`get_shm_pages`
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -205,6 +205,7 @@ and shared memory pages after starting triton equals to $current_num_pages \n***
     RET=1
 fi
 
+SERVER_ARGS="$BASE_SERVER_ARGS --backend-config=python,shm-default-byte-size=5242880"
 prev_num_pages=`get_shm_pages`
 # Triton non-graceful exit
 run_server

diff --git a/qa/L0_buffer_attributes/test.sh b/qa/L0_buffer_attributes/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2022-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -52,7 +52,7 @@ export CUDA_VISIBLE_DEVICES=0
 rm -fr *.log
 
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_ARGS="--model-repository=`pwd`/models --allow-client-shm=true"
 SERVER_LOG="./inference_server.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then

diff --git a/qa/L0_client_timeout/test.sh b/qa/L0_client_timeout/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2020-2026, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -241,7 +241,7 @@ wait $SERVER_PID
 
 # Test all APIs other than infer
 export TRITONSERVER_SERVER_DELAY_GRPC_RESPONSE_SEC=2
-SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --load-model=custom_identity_int32 --log-verbose 2"
+SERVER_ARGS="${SERVER_ARGS} --model-control-mode=explicit --allow-client-shm=true --load-model=custom_identity_int32 --log-verbose 2"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"

diff --git a/qa/L0_grpc/test.sh b/qa/L0_grpc/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -158,7 +158,7 @@ rm -f *.log.*
 set -e
 
 CLIENT_LOG=`pwd`/client.log
-SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --allow-client-shm=true"
 source ../common/util.sh
 
 run_server
@@ -569,7 +569,7 @@ done
 # Run python grpc aio unit test
 PYTHON_GRPC_AIO_TEST=python_grpc_aio_test.py
 CLIENT_LOG=`pwd`/python_grpc_aio_test.log
-SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --allow-client-shm=true"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"

diff --git a/qa/L0_http/test.sh b/qa/L0_http/test.sh
@@ -129,7 +129,7 @@ rm -f *.log.*
 set -e
 
 CLIENT_LOG=`pwd`/client.log
-SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --allow-client-shm=true"
 
 run_server
 if [ "$SERVER_PID" == "0" ]; then
@@ -612,7 +612,7 @@ done
 # Run python http aio unit test
 PYTHON_HTTP_AIO_TEST=python_http_aio_test.py
 CLIENT_LOG=`pwd`/python_http_aio_test.log
-SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR}"
+SERVER_ARGS="--backend-directory=${BACKEND_DIR} --model-repository=${MODELDIR} --allow-client-shm=true"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
@@ -931,7 +931,7 @@ fi
 MODELDIR="`pwd`/models"
 REQUEST_MANY_CHUNKS_PY="http_request_many_chunks.py"
 CLIENT_LOG="./client.http_request_many_chunks.log"
-SERVER_ARGS="--model-repository=${MODELDIR} --log-verbose=1 --model-control-mode=explicit --load-model=simple"
+SERVER_ARGS="--model-repository=${MODELDIR} --allow-client-shm=true --log-verbose=1 --model-control-mode=explicit --load-model=simple"
 SERVER_LOG="./inference_server_request_many_chunks.log"
 
 run_server

diff --git a/qa/L0_input_validation/test.sh b/qa/L0_input_validation/test.sh
@@ -126,7 +126,7 @@ cp -r $DATADIR/qa_model_repository/onnx_object_int32_int32 models/.
 cp -r $DATADIR/qa_shapetensor_model_repository/plan_nobatch_zero_1_float32_int32 models/.
 cp -r $DATADIR/qa_shapetensor_model_repository/plan_zero_1_float32_int32 models/.
 
-SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_ARGS="--model-repository=`pwd`/models --allow-client-shm=true"
 run_server
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"

diff --git a/qa/L0_perf_nomodel/run_test.sh b/qa/L0_perf_nomodel/run_test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2019-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -59,6 +59,10 @@ if [ "$ARCH" != "aarch64" ]; then
     DATADIR="/data/inferenceserver/${REPO_VERSION}"
 fi
 
+if [ "$SHARED_MEMORY" != "none" ]; then
+    SERVER_ARGS="${SERVER_ARGS} --allow-client-shm=true"
+fi
+
 # Select the single GPU that will be available to the inference server
 export CUDA_VISIBLE_DEVICES=0
 

diff --git a/qa/L0_query/test.sh b/qa/L0_query/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2021-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -69,7 +69,7 @@ export TEST_FAIL_WITH_QUERY_RESULT=1
 export TEST_BYTE_SIZE=4
 
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=`pwd`/models"
+SERVER_ARGS="--model-repository=`pwd`/models --allow-client-shm=true"
 SERVER_LOG="./inference_server.log"
 run_server
 if [ "$SERVER_PID" == "0" ]; then

diff --git a/qa/L0_trt_reformat_free/test.sh b/qa/L0_trt_reformat_free/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2019-2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -50,7 +50,7 @@ rm -rf ${DATADIR}
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_trt_format_model_repository/ ${DATADIR}
 
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=$DATADIR"
+SERVER_ARGS="--model-repository=$DATADIR --allow-client-shm=true"
 source ../common/util.sh
 
 rm -f *.log*

diff --git a/qa/L0_trt_shape_tensors/test.sh b/qa/L0_trt_shape_tensors/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright (c) 2019-2024, NVIDIA CORPORATION. All rights reserved.
+# Copyright (c) 2019-2026, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -45,10 +45,18 @@ CLIENT_LOG="./client.log"
 SHAPE_TENSOR_TEST=trt_shape_tensor_test.py
 
 SERVER=/opt/tritonserver/bin/tritonserver
-SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1"
+SERVER_ARGS="--model-repository=`pwd`/models --log-verbose=1 --allow-client-shm=true"
 SERVER_LOG="./inference_server.log"
 source ../common/util.sh
 
+if [ -z "$TEST_SYSTEM_SHARED_MEMORY" ]; then
+    TEST_SYSTEM_SHARED_MEMORY="0"
+fi
+
+if [ -z "$TEST_CUDA_SHARED_MEMORY" ]; then
+    TEST_CUDA_SHARED_MEMORY="0"
+fi
+
 rm -fr  *.log
 rm -fr models && mkdir models
 cp -r /data/inferenceserver/${REPO_VERSION}/qa_shapetensor_model_repository/* models/.
@@ -220,6 +228,9 @@ for i in \
     test_dynaseq_different_shape_values_parallel \
     ;do
     SERVER_ARGS="--model-repository=`pwd`/models"
+    if [ "$TEST_SYSTEM_SHARED_MEMORY" -eq 1 ] || [ "$TEST_CUDA_SHARED_MEMORY" -eq 1 ]; then
+        SERVER_ARGS="${SERVER_ARGS} --allow-client-shm=true"
+    fi
     SERVER_LOG="./$i.server.log"
     run_server
     if [ "$SERVER_PID" == "0" ]; then