apiVersion: v1
data:
_example: "################################\n# #\n\
# EXAMPLE CONFIGURATION #\n# #\n################################\n\
\n# This block is not actually functional configuration,\n# but serves to illustrate\
\ the available configuration\n# options and document them in a way that is accessible\n\
# to users that `kubectl edit` this config map.\n#\n# These sample configuration\
\ options may be copied out of\n# this example block and unindented to be in the\
\ data block\n# to actually change the configuration.\n\n# The Revision ContainerConcurrency\
\ field specifies the maximum number\n# of requests the Container can handle at\
\ once. Container concurrency\n# target percentage is how much of that maximum\
\ to use in a stable\n# state. E.g. if a Revision specifies ContainerConcurrency\
\ of 10, then\n# the Autoscaler will try to maintain 7 concurrent connections\
\ per pod\n# on average.\n# Note: this limit will be applied to container concurrency\
\ set at every\n# level (ConfigMap, Revision Spec or Annotation).\n# For legacy\
\ and backwards compatibility reasons, this value also accepts\n# fractional values\
\ in (0, 1] interval (i.e. 0.7 \u21D2 70%).\n# Thus minimal percentage value must\
\ be greater than 1.0, or it will be\n# treated as a fraction.\n# NOTE: that this\
\ value does not affect actual number of concurrent requests\n# the user\
\ container may receive, but only the average number of requests\n# that\
\ the revision pods will receive.\ncontainer-concurrency-target-percentage: \"\
70\"\n\n# The container concurrency target default is what the Autoscaler will\n\
# try to maintain when concurrency is used as the scaling metric for the\n# Revision\
\ and the Revision specifies unlimited concurrency.\n# When revision explicitly\
\ specifies container concurrency, that value\n# will be used as a scaling target\
\ for autoscaler.\n# When specifying unlimited concurrency, the autoscaler will\n\
# horizontally scale the application based on this target concurrency.\n# This\
\ is what we call \"soft limit\" in the documentation, i.e. it only\n# affects\
\ number of pods and does not affect the number of requests\n# individual pod\
\ processes.\n# The value must be a positive number such that the value multiplied\n\
# by container-concurrency-target-percentage is greater than 0.01.\n# NOTE: that\
\ this value will be adjusted by application of\n# container-concurrency-target-percentage,\
\ i.e. by default\n# the system will target on average 70 concurrent requests\n\
# per revision pod.\n# NOTE: Only one metric can be used for autoscaling\
\ a Revision.\ncontainer-concurrency-target-default: \"100\"\n\n# The requests\
\ per second (RPS) target default is what the Autoscaler will\n# try to maintain\
\ when RPS is used as the scaling metric for a Revision and\n# the Revision specifies\
\ unlimited RPS. Even when specifying unlimited RPS,\n# the autoscaler will horizontally\
\ scale the application based on this\n# target RPS.\n# Must be greater than 1.0.\n\
# NOTE: Only one metric can be used for autoscaling a Revision.\nrequests-per-second-target-default:\
\ \"200\"\n\n# The target burst capacity specifies the size of burst in concurrent\n\
# requests that the system operator expects the system will receive.\n# Autoscaler\
\ will try to protect the system from queueing by introducing\n# Activator in\
\ the request path if the current spare capacity of the\n# service is less than\
\ this setting.\n# If this setting is 0, then Activator will be in the request\
\ path only\n# when the revision is scaled to 0.\n# If this setting is > 0 and\
\ container-concurrency-target-percentage is\n# 100% or 1.0, then activator will\
\ always be in the request path.\n# -1 denotes unlimited target-burst-capacity\
\ and activator will always\n# be in the request path.\n# Other negative values\
\ are invalid.\ntarget-burst-capacity: \"200\"\n\n# When operating in a stable\
\ mode, the autoscaler operates on the\n# average concurrency over the stable\
\ window.\n# Stable window must be in whole seconds.\nstable-window: \"60s\"\n\
\n# When observed average concurrency during the panic window reaches\n# panic-threshold-percentage\
\ the target concurrency, the autoscaler\n# enters panic mode. When operating\
\ in panic mode, the autoscaler\n# scales on the average concurrency over the\
\ panic window which is\n# panic-window-percentage of the stable-window.\n# Must\
\ be in the [1, 100] range.\n# When computing the panic window it will be rounded\
\ to the closest\n# whole second, at least 1s.\npanic-window-percentage: \"10.0\"\
\n\n# The percentage of the container concurrency target at which to\n# enter\
\ panic mode when reached within the panic window.\npanic-threshold-percentage:\
\ \"200.0\"\n\n# Max scale up rate limits the rate at which the autoscaler will\n\
# increase pod count. It is the maximum ratio of desired pods versus\n# observed\
\ pods.\n# Cannot be less or equal to 1.\n# I.e with value of 2.0 the number of\
\ pods can at most go N to 2N\n# over single Autoscaler period (2s), but at least\
\ N to\n# N+1, if Autoscaler needs to scale up.\nmax-scale-up-rate: \"1000.0\"\
\n\n# Max scale down rate limits the rate at which the autoscaler will\n# decrease\
\ pod count. It is the maximum ratio of observed pods versus\n# desired pods.\n\
# Cannot be less or equal to 1.\n# I.e. with value of 2.0 the number of pods can\
\ at most go N to N/2\n# over single Autoscaler evaluation period (2s), but at\n\
# least N to N-1, if Autoscaler needs to scale down.\nmax-scale-down-rate: \"\
2.0\"\n\n# Scale to zero feature flag.\nenable-scale-to-zero: \"true\"\n\n# Scale\
\ to zero grace period is the time an inactive revision is left\n# running before\
\ it is scaled to zero (must be positive, but recommended\n# at least a few seconds\
\ if running with mesh networking).\n# This is the upper limit and is provided\
\ not to enforce timeout after\n# the revision stopped receiving requests for\
\ stable window, but to\n# ensure network reprogramming to put activator in the\
\ path has completed.\n# If the system determines that a shorter period is satisfactory,\n\
# then the system will only wait that amount of time before scaling to 0.\n# NOTE:\
\ this period might actually be 0, if activator has been\n# in the request path\
\ sufficiently long.\n# If there is necessity for the last pod to linger longer\
\ use\n# scale-to-zero-pod-retention-period flag.\nscale-to-zero-grace-period:\
\ \"30s\"\n\n# Scale to zero pod retention period defines the minimum amount\n\
# of time the last pod will remain after Autoscaler has decided to\n# scale to\
\ zero.\n# This flag is for the situations where the pod startup is very expensive\n\
# and the traffic is bursty (requiring smaller windows for fast action),\n# but\
\ patchy.\n# The larger of this flag and `scale-to-zero-grace-period` will effectively\n\
# determine how the last pod will hang around.\nscale-to-zero-pod-retention-period:\
\ \"0s\"\n\n# pod-autoscaler-class specifies the default pod autoscaler class\n\
# that should be used if none is specified. If omitted, the Knative\n# Horizontal\
\ Pod Autoscaler (KPA) is used by default.\npod-autoscaler-class: \"kpa.autoscaling.knative.dev\"\
\n\n# The capacity of a single activator task.\n# The `unit` is one concurrent\
\ request proxied by the activator.\n# activator-capacity must be at least 1.\n\
# This value is used for computation of the Activator subset size.\n# See the\
\ algorithm here: http://bit.ly/38XiCZ3.\n# TODO(vagababov): tune after actual\
\ benchmarking.\nactivator-capacity: \"100.0\"\n\n# initial-scale is the cluster-wide\
\ default value for the initial target\n# scale of a revision after creation,\
\ unless overridden by the\n# \"autoscaling.knative.dev/initialScale\" annotation.\n\
# This value must be greater than 0 unless allow-zero-initial-scale is true.\n\
initial-scale: \"1\"\n\n# allow-zero-initial-scale controls whether either the\
\ cluster-wide initial-scale flag,\n# or the \"autoscaling.knative.dev/initialScale\"\
\ annotation, can be set to 0.\nallow-zero-initial-scale: \"false\"\n\n# max-scale\
\ is the cluster-wide default value for the max scale of a revision,\n# unless\
\ overridden by the \"autoscaling.knative.dev/maxScale\" annotation.\n# If set\
\ to 0, the revision has no maximum scale.\nmax-scale: \"0\"\n\n# scale-down-delay\
\ is the amount of time that must pass at reduced\n# concurrency before a scale\
\ down decision is applied. This can be useful,\n# for example, to maintain replica\
\ count and avoid a cold start penalty if\n# more requests come in within the\
\ scale down delay period.\n# The default, 0s, imposes no delay at all.\nscale-down-delay:\
\ \"0s\"\n\n# max-scale-limit sets the maximum permitted value for the max scale\
\ of a revision.\n# When this is set to a positive value, a revision with a maxScale\
\ above that value\n# (including a maxScale of \"0\" = unlimited) is disallowed.\n\
# A value of zero (the default) allows any limit, including unlimited.\nmax-scale-limit:\
\ \"0\"\n"
container-concurrency-target-default: '75'
enable-scale-to-zero: 'true'
kind: ConfigMap
metadata:
annotations:
knative.dev/example-checksum: 604cb513
kubectl.kubernetes.io/last-applied-configuration: "{\"apiVersion\":\"v1\",\"data\"\
:{\"_example\":\"################################\\n# \
\ #\\n# EXAMPLE CONFIGURATION #\\n# \
\ #\\n################################\\n\\n# This block is not actually\
\ functional configuration,\\n# but serves to illustrate the available configuration\\\
n# options and document them in a way that is accessible\\n# to users that `kubectl\
\ edit` this config map.\\n#\\n# These sample configuration options may be copied\
\ out of\\n# this example block and unindented to be in the data block\\n# to\
\ actually change the configuration.\\n\\n# The Revision ContainerConcurrency\
\ field specifies the maximum number\\n# of requests the Container can handle\
\ at once. Container concurrency\\n# target percentage is how much of that maximum\
\ to use in a stable\\n# state. E.g. if a Revision specifies ContainerConcurrency\
\ of 10, then\\n# the Autoscaler will try to maintain 7 concurrent connections\
\ per pod\\n# on average.\\n# Note: this limit will be applied to container\
\ concurrency set at every\\n# level (ConfigMap, Revision Spec or Annotation).\\\
n# For legacy and backwards compatibility reasons, this value also accepts\\\
n# fractional values in (0, 1] interval (i.e. 0.7 \u21D2 70%).\\n# Thus minimal\
\ percentage value must be greater than 1.0, or it will be\\n# treated as a\
\ fraction.\\n# NOTE: that this value does not affect actual number of concurrent\
\ requests\\n# the user container may receive, but only the average number\
\ of requests\\n# that the revision pods will receive.\\ncontainer-concurrency-target-percentage:\
\ \\\"70\\\"\\n\\n# The container concurrency target default is what the Autoscaler\
\ will\\n# try to maintain when concurrency is used as the scaling metric for\
\ the\\n# Revision and the Revision specifies unlimited concurrency.\\n# When\
\ revision explicitly specifies container concurrency, that value\\n# will be\
\ used as a scaling target for autoscaler.\\n# When specifying unlimited concurrency,\
\ the autoscaler will\\n# horizontally scale the application based on this target\
\ concurrency.\\n# This is what we call \\\"soft limit\\\" in the documentation,\
\ i.e. it only\\n# affects number of pods and does not affect the number of\
\ requests\\n# individual pod processes.\\n# The value must be a positive number\
\ such that the value multiplied\\n# by container-concurrency-target-percentage\
\ is greater than 0.01.\\n# NOTE: that this value will be adjusted by application\
\ of\\n# container-concurrency-target-percentage, i.e. by default\\n#\
\ the system will target on average 70 concurrent requests\\n# per\
\ revision pod.\\n# NOTE: Only one metric can be used for autoscaling a Revision.\\\
ncontainer-concurrency-target-default: \\\"100\\\"\\n\\n# The requests per second\
\ (RPS) target default is what the Autoscaler will\\n# try to maintain when\
\ RPS is used as the scaling metric for a Revision and\\n# the Revision specifies\
\ unlimited RPS. Even when specifying unlimited RPS,\\n# the autoscaler will\
\ horizontally scale the application based on this\\n# target RPS.\\n# Must\
\ be greater than 1.0.\\n# NOTE: Only one metric can be used for autoscaling\
\ a Revision.\\nrequests-per-second-target-default: \\\"200\\\"\\n\\n# The target\
\ burst capacity specifies the size of burst in concurrent\\n# requests that\
\ the system operator expects the system will receive.\\n# Autoscaler will try\
\ to protect the system from queueing by introducing\\n# Activator in the request\
\ path if the current spare capacity of the\\n# service is less than this setting.\\\
n# If this setting is 0, then Activator will be in the request path only\\n#\
\ when the revision is scaled to 0.\\n# If this setting is \\u003e 0 and container-concurrency-target-percentage\
\ is\\n# 100% or 1.0, then activator will always be in the request path.\\n#\
\ -1 denotes unlimited target-burst-capacity and activator will always\\n# be\
\ in the request path.\\n# Other negative values are invalid.\\ntarget-burst-capacity:\
\ \\\"200\\\"\\n\\n# When operating in a stable mode, the autoscaler operates\
\ on the\\n# average concurrency over the stable window.\\n# Stable window must\
\ be in whole seconds.\\nstable-window: \\\"60s\\\"\\n\\n# When observed average\
\ concurrency during the panic window reaches\\n# panic-threshold-percentage\
\ the target concurrency, the autoscaler\\n# enters panic mode. When operating\
\ in panic mode, the autoscaler\\n# scales on the average concurrency over the\
\ panic window which is\\n# panic-window-percentage of the stable-window.\\\
n# Must be in the [1, 100] range.\\n# When computing the panic window it will\
\ be rounded to the closest\\n# whole second, at least 1s.\\npanic-window-percentage:\
\ \\\"10.0\\\"\\n\\n# The percentage of the container concurrency target at\
\ which to\\n# enter panic mode when reached within the panic window.\\npanic-threshold-percentage:\
\ \\\"200.0\\\"\\n\\n# Max scale up rate limits the rate at which the autoscaler\
\ will\\n# increase pod count. It is the maximum ratio of desired pods versus\\\
n# observed pods.\\n# Cannot be less or equal to 1.\\n# I.e with value of 2.0\
\ the number of pods can at most go N to 2N\\n# over single Autoscaler period\
\ (2s), but at least N to\\n# N+1, if Autoscaler needs to scale up.\\nmax-scale-up-rate:\
\ \\\"1000.0\\\"\\n\\n# Max scale down rate limits the rate at which the autoscaler\
\ will\\n# decrease pod count. It is the maximum ratio of observed pods versus\\\
n# desired pods.\\n# Cannot be less or equal to 1.\\n# I.e. with value of 2.0\
\ the number of pods can at most go N to N/2\\n# over single Autoscaler evaluation\
\ period (2s), but at\\n# least N to N-1, if Autoscaler needs to scale down.\\\
nmax-scale-down-rate: \\\"2.0\\\"\\n\\n# Scale to zero feature flag.\\nenable-scale-to-zero:\
\ \\\"true\\\"\\n\\n# Scale to zero grace period is the time an inactive revision\
\ is left\\n# running before it is scaled to zero (must be positive, but recommended\\\
n# at least a few seconds if running with mesh networking).\\n# This is the\
\ upper limit and is provided not to enforce timeout after\\n# the revision\
\ stopped receiving requests for stable window, but to\\n# ensure network reprogramming\
\ to put activator in the path has completed.\\n# If the system determines that\
\ a shorter period is satisfactory,\\n# then the system will only wait that\
\ amount of time before scaling to 0.\\n# NOTE: this period might actually be\
\ 0, if activator has been\\n# in the request path sufficiently long.\\n# If\
\ there is necessity for the last pod to linger longer use\\n# scale-to-zero-pod-retention-period\
\ flag.\\nscale-to-zero-grace-period: \\\"30s\\\"\\n\\n# Scale to zero pod retention\
\ period defines the minimum amount\\n# of time the last pod will remain after\
\ Autoscaler has decided to\\n# scale to zero.\\n# This flag is for the situations\
\ where the pod startup is very expensive\\n# and the traffic is bursty (requiring\
\ smaller windows for fast action),\\n# but patchy.\\n# The larger of this flag\
\ and `scale-to-zero-grace-period` will effectively\\n# determine how the last\
\ pod will hang around.\\nscale-to-zero-pod-retention-period: \\\"0s\\\"\\n\\\
n# pod-autoscaler-class specifies the default pod autoscaler class\\n# that\
\ should be used if none is specified. If omitted, the Knative\\n# Horizontal\
\ Pod Autoscaler (KPA) is used by default.\\npod-autoscaler-class: \\\"kpa.autoscaling.knative.dev\\\
\"\\n\\n# The capacity of a single activator task.\\n# The `unit` is one concurrent\
\ request proxied by the activator.\\n# activator-capacity must be at least\
\ 1.\\n# This value is used for computation of the Activator subset size.\\\
n# See the algorithm here: http://bit.ly/38XiCZ3.\\n# TODO(vagababov): tune\
\ after actual benchmarking.\\nactivator-capacity: \\\"100.0\\\"\\n\\n# initial-scale\
\ is the cluster-wide default value for the initial target\\n# scale of a revision\
\ after creation, unless overridden by the\\n# \\\"autoscaling.knative.dev/initialScale\\\
\" annotation.\\n# This value must be greater than 0 unless allow-zero-initial-scale\
\ is true.\\ninitial-scale: \\\"1\\\"\\n\\n# allow-zero-initial-scale controls\
\ whether either the cluster-wide initial-scale flag,\\n# or the \\\"autoscaling.knative.dev/initialScale\\\
\" annotation, can be set to 0.\\nallow-zero-initial-scale: \\\"false\\\"\\\
n\\n# max-scale is the cluster-wide default value for the max scale of a revision,\\\
n# unless overridden by the \\\"autoscaling.knative.dev/maxScale\\\" annotation.\\\
n# If set to 0, the revision has no maximum scale.\\nmax-scale: \\\"0\\\"\\\
n\\n# scale-down-delay is the amount of time that must pass at reduced\\n# concurrency\
\ before a scale down decision is applied. This can be useful,\\n# for example,\
\ to maintain replica count and avoid a cold start penalty if\\n# more requests\
\ come in within the scale down delay period.\\n# The default, 0s, imposes no\
\ delay at all.\\nscale-down-delay: \\\"0s\\\"\\n\\n# max-scale-limit sets the\
\ maximum permitted value for the max scale of a revision.\\n# When this is\
\ set to a positive value, a revision with a maxScale above that value\\n# (including\
\ a maxScale of \\\"0\\\" = unlimited) is disallowed.\\n# A value of zero (the\
\ default) allows any limit, including unlimited.\\nmax-scale-limit: \\\"0\\\
\"\\n\",\"container-concurrency-target-default\":\"75\",\"enable-scale-to-zero\"\
:\"true\"},\"kind\":\"ConfigMap\",\"metadata\":{\"annotations\":{\"knative.dev/example-checksum\"\
:\"604cb513\"},\"labels\":{\"serving.knative.dev/release\":\"v0.24.0\"},\"name\"\
:\"config-autoscaler\",\"namespace\":\"knative-serving\",\"ownerReferences\"\
:[{\"apiVersion\":\"operator.knative.dev/v1alpha1\",\"blockOwnerDeletion\":true,\"\
controller\":true,\"kind\":\"KnativeServing\",\"name\":\"knative-serving\",\"\
uid\":\"5162f181-a47b-4465-a388-ee8a99ad290e\"}]}}\n"
creationTimestamp: '2021-08-30T18:59:53Z'
labels:
serving.knative.dev/release: v0.24.0
managedFields:
- apiVersion: v1
fieldsType: FieldsV1
fieldsV1:
f:data:
.: {}
f:_example: {}
f:container-concurrency-target-default: {}
f:enable-scale-to-zero: {}
f:metadata:
f:annotations:
.: {}
f:knative.dev/example-checksum: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:labels:
.: {}
f:serving.knative.dev/release: {}
f:ownerReferences:
.: {}
k:{"uid":"5162f181-a47b-4465-a388-ee8a99ad290e"}:
.: {}
f:apiVersion: {}
f:blockOwnerDeletion: {}
f:controller: {}
f:kind: {}
f:name: {}
f:uid: {}
manager: manifestival
operation: Update
time: '2021-08-30T19:00:06Z'
name: config-autoscaler
namespace: knative-serving
ownerReferences:
- apiVersion: operator.knative.dev/v1alpha1
blockOwnerDeletion: true
controller: true
kind: KnativeServing
name: knative-serving
uid: 5162f181-a47b-4465-a388-ee8a99ad290e
resourceVersion: '6534'
uid: ecb8256a-13b3-4cf5-88cb-b79309a13d97