rfc9681xml2.original.xml | rfc9681.xml | |||
---|---|---|---|---|
<?xml version="1.0" encoding="US-ASCII"?> | <?xml version='1.0' encoding='UTF-8'?> | |||
<!DOCTYPE rfc SYSTEM "rfc2629.dtd"> | ||||
<?rfc toc="yes"?> | ||||
<?rfc tocompact="yes"?> | ||||
<?rfc tocdepth="2"?> | ||||
<?rfc tocindent="yes"?> | ||||
<?rfc symrefs="yes"?> | ||||
<?rfc sortrefs="yes"?> | ||||
<?rfc comments="yes"?> | ||||
<?rfc inline="yes"?> | ||||
<?rfc compact="yes"?> | ||||
<?rfc subcompact="no"?> | ||||
<rfc category="exp" docName="draft-ietf-lsr-isis-fast-flooding-11" ipr="trust200 | ||||
902"> | ||||
<front> | ||||
<title abbrev="IS-IS Fast Flooding">IS-IS Fast Flooding</title> | ||||
<author fullname="Bruno Decraene" initials="B." surname="Decraene | ||||
"> | ||||
<organization>Orange</organization> | ||||
<address> | ||||
<email>bruno.decraene@orange.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Les Ginsberg" initials="L" surname="Ginsberg"> | ||||
<organization>Cisco Systems</organization> | ||||
<address> | ||||
<postal> | ||||
<street>821 Alder Drive</street> | ||||
<city>Milpitas</city> | ||||
<code>95035</code> | ||||
<region>CA</region> | ||||
<country>USA</country> | ||||
</postal> | ||||
<email>ginsberg@cisco.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Tony Li" initials="T." surname="Li"> | ||||
<organization>Juniper Networks, Inc.</organization> | ||||
<address> | ||||
<phone/> | ||||
<email>tony.li@tony.li</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Guillaume Solignac" initials="G." surname="Soli | ||||
gnac"> | ||||
<address> | ||||
<email>gsoligna@protonmail.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Marek Karasek" initials="M" surname="Karasek"> | ||||
<organization>Cisco Systems</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Pujmanove 1753/10a, Prague 4 - Nu | ||||
sle</street> | ||||
<city>Prague</city> | ||||
<region/> | ||||
<code>10 14000</code> | ||||
<country>Czech Republic</country> | ||||
</postal> | ||||
<phone/> | ||||
<facsimile/> | ||||
<email>mkarasek@cisco.com</email> | ||||
<uri/> | ||||
</address> | ||||
</author> | ||||
<author initials="G." surname="Van de Velde" fullname="Gunter Van | ||||
de Velde"> | ||||
<organization>Nokia</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Copernicuslaan 50</street> | ||||
<city>Antwerp</city> | ||||
<code>2018</code> | ||||
<country>Belgium</country> | ||||
</postal> | ||||
<email>gunter.van_de_velde@nokia.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Tony Przygienda" initials="T" surname="Przygien | ||||
da"> | ||||
<organization>Juniper</organization> | ||||
<address> | ||||
<postal> | ||||
<street>1137 Innovation Way</street> | ||||
<city>Sunnyvale</city> | ||||
<region>Ca</region> | ||||
<code/> | ||||
<country>USA</country> | ||||
</postal> | ||||
<phone/> | ||||
<facsimile/> | <!DOCTYPE rfc [ | |||
<!ENTITY nbsp " "> | ||||
<!ENTITY zwsp "​"> | ||||
<!ENTITY nbhy "‑"> | ||||
<!ENTITY wj "⁠"> | ||||
]> | ||||
<email>prz@juniper.net</email> | <rfc xmlns:xi="http://www.w3.org/2001/XInclude" category="exp" docName="draft-ie tf-lsr-isis-fast-flooding-11" number="9681" ipr="trust200902" obsoletes="" updat es="" submissionType="IETF" xml:lang="en" tocInclude="true" tocDepth="2" consens us="true" symRefs="true" sortRefs="true" version="3"> | |||
<uri/> | <front> | |||
</address> | <title abbrev="IS-IS Fast Flooding">IS-IS Fast Flooding</title> | |||
</author> | <seriesInfo name="RFC" value="9681"/> | |||
<author fullname="Bruno Decraene" initials="B." surname="Decraene"> | ||||
<organization>Orange</organization> | ||||
<address> | ||||
<email>bruno.decraene@orange.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Les Ginsberg" initials="L" surname="Ginsberg"> | ||||
<organization>Cisco Systems</organization> | ||||
<address> | ||||
<postal> | ||||
<street>821 Alder Drive</street> | ||||
<city>Milpitas</city> | ||||
<code>95035</code> | ||||
<region>CA</region> | ||||
<country>United States of America</country> | ||||
</postal> | ||||
<email>ginsberg@cisco.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Tony Li" initials="T." surname="Li"> | ||||
<organization>Juniper Networks, Inc.</organization> | ||||
<address> | ||||
<email>tony.li@tony.li</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Guillaume Solignac" initials="G." surname="Solignac"> | ||||
<address> | ||||
<email>gsoligna@protonmail.com</email> | ||||
</address> | ||||
</author> | ||||
<author fullname="Marek Karasek" initials="M" surname="Karasek"> | ||||
<organization>Cisco Systems</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Pujmanove 1753/10a, Prague 4 - Nusle</street> | ||||
<city>Prague</city> | ||||
<code>10 14000</code> | ||||
<country>Czech Republic</country> | ||||
</postal> | ||||
<email>mkarasek@cisco.com</email> | ||||
</address> | ||||
</author> | ||||
<author initials="G." surname="Van de Velde" fullname="Gunter Van de Velde"> | ||||
<organization>Nokia</organization> | ||||
<address> | ||||
<postal> | ||||
<street>Copernicuslaan 50</street> | ||||
<city>Antwerp</city> | ||||
<code>2018</code> | ||||
<country>Belgium</country> | ||||
</postal> | ||||
<email>gunter.van_de_velde@nokia.com</email> | ||||
</address> | ||||
</author> | ||||
<date year="2024"/> | <author fullname="Tony Przygienda" initials="T" surname="Przygienda"> | |||
<abstract> | <organization>Juniper</organization> | |||
<t> | <address> | |||
Current Link State Protocol Data Unit (PDU) | <postal> | |||
flooding rates are much slower than what modern | <street>1133 Innovation Way</street> | |||
networks can support. The use of IS-IS at larger | <city>Sunnyvale</city> | |||
scale requires faster flooding rates to achieve | <region>CA</region><code>94089</code> | |||
desired convergence goals. This document | <country>United States of America</country> | |||
discusses the need for faster flooding, the issues | </postal> | |||
around faster flooding, and some example | <email>prz@juniper.net</email> | |||
approaches to achieve faster flooding. It also | </address> | |||
defines protocol extensions relevant to faster | </author> | |||
flooding. | <date month="November" year="2024"/> | |||
</t> | <area>RTG</area> | |||
</abstract> | <workgroup>lsr</workgroup> | |||
</front> | <keyword>LSP</keyword> | |||
<middle> | <keyword>congestion</keyword> | |||
<keyword>flow control</keyword> | ||||
<keyword>scale</keyword> | ||||
<keyword>performance</keyword> | ||||
<keyword>IS-IS</keyword> | ||||
<keyword>flooding</keyword> | ||||
<section title="Introduction"> | <abstract> | |||
<t>Link state IGPs such as Intermediate-System-to-Interme | <t>Current Link State PDU flooding rates are much | |||
diate-System | slower than what modern networks can support. The use of IS-IS at | |||
(IS-IS) depend upon having consistent Link State Databases (LSDB) on all | larger scale requires faster flooding rates to achieve desired | |||
convergence goals. This document discusses the need for faster | ||||
flooding, the issues around faster flooding, and some example approaches | ||||
to achieve faster flooding. It also defines protocol extensions relevant | ||||
to faster flooding. | ||||
</t> | ||||
</abstract> | ||||
</front> | ||||
<middle> | ||||
<section numbered="true" toc="default"> | ||||
<name>Introduction</name> | ||||
<t>Link state IGPs such as Intermediate System to Intermediate System | ||||
(IS-IS) depend upon having consistent Link State Databases (LSDBs) on all | ||||
Intermediate Systems (ISs) in the network in order to provide correct | Intermediate Systems (ISs) in the network in order to provide correct | |||
forwarding of data packets. When topology changes occur, new/updated | forwarding of data packets. When topology changes occur, new/updated | |||
Link State PDUs (LSPs) are propagated network-wide. The speed of | Link State PDUs (LSPs) are propagated network-wide. The speed of | |||
propagation is a key contributor to convergence time.</t> | propagation is a key contributor to convergence time.</t> | |||
<t>IS-IS base specification <xref target="ISO10589" format="default"/> | ||||
does not use flow or congestion control but static flooding rates. | ||||
Historically, flooding rates have been conservative -- on the order of | ||||
tens of LSPs per second. This is the result of guidance in the base | ||||
specification and early deployments when the CPU and interface speeds | ||||
were much slower and the area scale was much smaller than they are | ||||
today.</t> | ||||
<t>As IS-IS is deployed in greater scale both in the number of nodes in | ||||
an area and in the number of neighbors per node, the impact of the | ||||
historic flooding rates becomes more significant. Consider the bring-up | ||||
or failure of a node with 1000 neighbors. This will result in a minimum | ||||
of 1000 LSP updates. At typical LSP flooding rates used today (33 | ||||
LSPs per second), it would take more than 30 seconds simply to send the | ||||
updated LSPs to a given neighbor. Depending on the diameter of the | ||||
network, achieving a consistent LSDB on all nodes in the network could | ||||
easily take a minute or more.</t> | ||||
<t>Therefore, increasing the LSP flooding rate becomes an essential | ||||
element of supporting greater network scale.</t> | ||||
<t> Improving the LSP flooding rate is complementary to protocol | ||||
extensions that reduce LSP flooding traffic by reducing the flooding | ||||
topology such as Mesh Groups <xref target="RFC2973" format="default"/> | ||||
or Dynamic Flooding <xref target="RFC9667" | ||||
format="default"/>. Reduction of the flooding topology does not alter | ||||
the number of LSPs required to be exchanged between two nodes, so | ||||
increasing the overall flooding speed is still beneficial when such | ||||
extensions are in use. It is also possible that the flooding topology | ||||
can be reduced in ways that prefer the use of neighbors that support | ||||
improved flooding performance.</t> | ||||
<t>With the goal of supporting faster flooding, this document introduces t | ||||
he signaling | ||||
of additional flooding related parameters (<xref target="FloodingTLV" for | ||||
mat="default"/>), specifies some | ||||
performance improvements on the receiver (<xref target="Receiver" format= | ||||
"default"/>) | ||||
and introduces the use of flow and/or congestion control (<xref target="C | ||||
ontrol" format="default"/>).</t> | ||||
</section> | ||||
<section anchor="Language" numbered="true" toc="default"> | ||||
<name>Requirements Language</name> | ||||
<t> | ||||
The key words "<bcp14>MUST</bcp14>", "<bcp14>MUST NOT</bcp14>", "<bcp14>REQU | ||||
IRED</bcp14>", "<bcp14>SHALL</bcp14>", "<bcp14>SHALL | ||||
NOT</bcp14>", "<bcp14>SHOULD</bcp14>", "<bcp14>SHOULD NOT</bcp14>", "<bcp14> | ||||
RECOMMENDED</bcp14>", "<bcp14>NOT RECOMMENDED</bcp14>", | ||||
"<bcp14>MAY</bcp14>", and "<bcp14>OPTIONAL</bcp14>" in this document are to | ||||
be interpreted as | ||||
described in BCP 14 <xref target="RFC2119"/> <xref target="RFC8174"/> | ||||
when, and only when, they appear in all capitals, as shown here. | ||||
</t> | ||||
</section> | ||||
<section anchor="HISTORY" numbered="true" toc="default"> | ||||
<name>Historical Behavior</name> | ||||
<t>The base specification for IS-IS <xref target="ISO10589" | ||||
format="default"/> was first published in 1992 and updated in 2002. The | ||||
update made no changes in regards to suggested timer values. Convergence | ||||
targets at the time were on the order of seconds, and the specified timer | ||||
values reflect that. Here are some examples:</t> | ||||
<t>IS-IS base specification <xref target="ISO10589"/> doe | <blockquote> | |||
s not use flow | <dl spacing="normal" newline="false"> | |||
or congestion control but static flooding rates. | <dt>minimumLSPGenerationInterval</dt> <dd><t>- This is the minimum time | |||
Historically, flooding rates have been conservative - on | interval between generation of Link State PDUs. A source Intermediate | |||
the order of | system shall wait at least this long before regenerating one of its | |||
10s of LSPs/second. This is the result of guidance in the base specificati | own Link State PDUs. [...]</t> | |||
on | <t>A reasonable value is 30 s.</t></dd> | |||
and early deployments when the CPU and | ||||
interface speeds were much slower and the area scale | ||||
much smaller than they are today.</t> | ||||
<t>As IS-IS is deployed in greater scale both in the numb | ||||
er of nodes in an | ||||
area and in the number of neighbors per node, the impact of the historic | ||||
flooding rates becomes more significant. Consider the bringup or failure | ||||
of a node with 1000 neighbors. This will result in a minimum of 1000 LSP | ||||
updates. At typical LSP flooding rates used today | ||||
(33 LSPs/second), it would take more than 30 seconds simply to send the up | ||||
dated | ||||
LSPs to a given neighbor. Depending on the diameter of the network, | ||||
achieving a consistent LSDB on all nodes in the network could easily | ||||
take a minute or more.</t> | ||||
<t>Increasing the LSP flooding rate therefore becomes an | ||||
essential element | ||||
of supporting greater network scale.</t> | ||||
<t> Improving the LSP flooding rate is complementary | ||||
to protocol | ||||
extensions that reduce LSP flooding traffic by reducing the | ||||
flooding topology such as Mesh Groups <xref target="RFC2973"/> | ||||
or Dynamic Flooding <xref target="I-D.ietf-lsr-dynamic-flooding"/> | ||||
. Reduction of the | ||||
flooding topology does not alter the number of LSPs required | ||||
to be exchanged between two nodes, so increasing the overall | ||||
flooding speed is still beneficial when such extensions are in | ||||
use. It is also possible that the flooding topology can be | ||||
reduced in ways that prefer the use of neighbors that support | ||||
improved flooding performance.</t> | ||||
<t>With the goal of supporting faster flooding, this document introduces the | ||||
signaling | ||||
of additional flooding related parameters <xref target="FloodingTLV"/>, s | ||||
pecifies some | ||||
performance improvements on the receiver <xref target="Receiver"/> | ||||
and introduces the use of flow and/or congestion control <xref target="Co | ||||
ntrol"/>.</t> | ||||
</section> | ||||
<section anchor="Language" title="Requirements Language"> | <dt>minimumLSPTransmissionInterval</dt> <dd><t>- This is the amount of | |||
<t>The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL | time an Intermediate system shall wait before further propagating | |||
NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "NOT RECOMMENDED", | another Link State PDU from the same source system. [...]</t> | |||
"MAY", and "OPTIONAL" in this document are to be interpreted as | <t>A reasonable value is 5 s.</t></dd> | |||
described in BCP 14 <xref target="RFC2119"/> <xref | ||||
target="RFC8174"/> when, and only when, they appear in all capitals, | ||||
as shown here.</t> | ||||
</section> | ||||
<section anchor="HISTORY" title="Historical Behavior"> | <dt>partialSNPInterval</dt> <dd><t>- This is the amount of time between p | |||
<t>The base specification for IS-IS <xref target="ISO10589"/> | eriodic action for | |||
was first | transmission of Partial Sequence Number PDUs. It shall be less than | |||
published in 1992 and updated in 2002. The update made no changes in | minimumLSPTransmissionInterval. [...]</t> | |||
regards to suggested timer values. Convergence targets at the time were | <t>A reasonable value is 2 s.</t></dd> | |||
on the order of seconds and the specified timer values reflect that. | </dl> | |||
Here are some examples:</t> | </blockquote> | |||
<t> | <t>Most relevant to a discussion of the LSP flooding rate is the | |||
<figure> | recommended interval between the transmission of two different LSPs on | |||
<artwork><![CDATA[minimumLSPGenerationInterval - | a given interface.</t> | |||
This is the minimum time interval | ||||
between generation of Link State PDUs. A source Intermediate | ||||
system shall wait at least this long before re-generating one | ||||
of its own Link State PDUs.]]></artwork> | ||||
</figure> | ||||
</t> | ||||
<t> | ||||
The recommended value is 30 seconds. | ||||
</t> | ||||
<t> | ||||
<figure> | ||||
<artwork><![CDATA[minimumLSPTransmissionInterval | ||||
- This is the amount of time an | ||||
Intermediate system shall wait before further propagating | ||||
another Link State PDU from the same source system.]]></artwork> | ||||
</figure> | ||||
</t> | ||||
<t> | ||||
The recommended value is 5 seconds. | ||||
</t> | ||||
<t> | ||||
<figure> | ||||
<artwork><![CDATA[partialSNPInterval - This is th | ||||
e amount of time between periodic | ||||
action for transmission of Partial Sequence Number PDUs. | ||||
It shall be less than minimumLSPTransmissionInterval.]]></artwork> | ||||
</figure> | ||||
</t> | ||||
<t> | ||||
The recommended value is 2 seconds. | ||||
</t> | ||||
<t>Most relevant to a discussion of the LSP flooding rate is the | ||||
recommended | ||||
interval between the transmission of two different LSPs on a given | ||||
interface.</t> | ||||
<t>For broadcast interfaces, <xref target="ISO10589"/> | <t>For broadcast interfaces, <xref target="ISO10589" | |||
defined:</t> | format="default"/> states:</t> | |||
<t> | <blockquote> | |||
<figure> | <t> | |||
<artwork><![CDATA[ minimumBroadcastLSPTransmissi | minimumBroadcastLSPTransmissionInterval indicates the minimum | |||
onInterval - the minimum interval | interval between PDU arrivals which can be processed by the slowest | |||
between PDU arrivals which can be processed by the slowest | Intermediate System on the LAN. | |||
Intermediate System on the LAN.]]></artwork> | </t> | |||
</figure> | </blockquote> | |||
</t> | ||||
<t> | <t> | |||
The default value was defined as 33 milliseconds. | The default value was defined as 33 milliseconds. | |||
It is permitted to send multiple LSPs "back-to-back" | It is permitted to send multiple LSPs back to back | |||
as a burst, but this was limited to 10 LSPs in a one second | as a burst, but this was limited to 10 LSPs in a one-second | |||
period. | period. | |||
</t> | </t> | |||
<t> | ||||
Although this value was specific to LAN interfaces, this has commonly | ||||
been applied by implementations to all interfaces though that was not | ||||
the original intent of the base specification. In fact Section | ||||
12.1.2.4.3 states:</t> | ||||
<t> | <t> | |||
<figure> | Although this value was specific to LAN interfaces, this has | |||
<artwork><![CDATA[ On point-to-point links the p | commonly been applied by implementations to all interfaces though | |||
eak rate of arrival is limited only | that was not the original intent of the base specification. In | |||
by the speed of the data link and the other traffic flowing on | fact, Section 12.1.2.4.3 of <xref target="ISO10589"/> states:</t> | |||
that link.]]></artwork> | ||||
</figure> | ||||
</t> | ||||
<t>Although modern implementations have not strictly adhered to t | <blockquote><t>On point-to-point links the peak rate of arrival is | |||
he 33 | limited only by the speed of the data link and the other traffic flowing | |||
millisecond interval, it is commonplace for implementations to limit | on that link.</t></blockquote> | |||
the flooding rate to the same order of magnitude: tens of milliseconds, | ||||
and not the single digits or fractions of milliseconds that are needed | ||||
today.</t> | ||||
<t>In the past 20 years, significant work on achieving faster | <t>Although modern implementations have not strictly adhered to the | |||
33-millisecond interval, it is commonplace for implementations to limit | ||||
the flooding rate to the same order of magnitude: tens of milliseconds, | ||||
and not the single digits or fractions of milliseconds that are needed | ||||
today.</t> | ||||
<t>In the past 20 years, significant work on achieving faster | ||||
convergence, more specifically sub-second convergence, has resulted in | convergence, more specifically sub-second convergence, has resulted in | |||
implementations modifying a number of the above timers in order to | implementations modifying a number of the above timers in order to | |||
support faster signaling of topology changes. For example, | support faster signaling of topology changes. For example, | |||
minimumLSPGenerationInterval has been modified to support millisecond | minimumLSPGenerationInterval has been modified to support millisecond | |||
intervals, often with a backoff algorithm applied to prevent LSP | intervals, often with a backoff algorithm applied to prevent LSP | |||
generation storms in the event of rapid successive oscillations.</t> | generation storms in the event of rapid successive oscillations.</t> | |||
<t>However, the flooding rate has not been fundamentally altered.</t> | ||||
</section> | ||||
<section anchor="FloodingTLV" numbered="true" toc="default"> | ||||
<name>Flooding Parameters TLV</name> | ||||
<t>This document defines a new Type-Length-Value (TLV) tuple called the | ||||
"Flooding Parameters TLV" that may be included in IS-IS Hellos (IIHs) | ||||
or Partial Sequence Number PDUs (PSNPs). It allows IS-IS implementations | ||||
to advertise flooding-related parameters and capabilities that may be | ||||
used by the peer to support faster flooding.</t> | ||||
<t>However, the flooding rate has not been fundamentally altered. | <dl newline="false" spacing="compact" indent="9"> | |||
</t> | <dt>Type:</dt> <dd>21</dd> | |||
</section> | <dt>Length:</dt> <dd>variable; the size in octets of the Value field</dd> | |||
<dt>Value:</dt> <dd>one or more sub-TLVs</dd> | ||||
<section anchor="FloodingTLV" title="Flooding Parameters TLV"> | </dl> | |||
<t> | <t>Several sub-TLVs are defined in this document. The support of any sub-T | |||
This document defines a new Type-Length-Value | LV is <bcp14>OPTIONAL</bcp14>.</t> | |||
tuple (TLV) called the "Flooding Parameters TLV" | <t> For a given IS-IS adjacency, the Flooding Parameters TLV does not | |||
that may be included in IS to IS Hellos (IIH) or | need to be advertised in each IIH or PSNP. An IS uses the latest | |||
Partial Sequence Number PDUs (PSNPs). It allows | received value for each parameter until a new value is advertised by the | |||
IS-IS implementations to advertise flooding-related | peer. However, as IIHs and PSNPs are not reliably exchanged and may | |||
parameters and capabilities which may be | never be received, parameters <bcp14>SHOULD</bcp14> be sent even if | |||
used by the peer to support faster flooding. | there is no change in value since the last transmission. For a | |||
</t> | parameter that has never been advertised, an IS uses its local default | |||
<t>Type: 21</t> | value. That value <bcp14>SHOULD</bcp14> be configurable on a per-node | |||
<t>Length: variable, the size in octets of the Value field</t> | basis and <bcp14>MAY</bcp14> be configurable on a per-interface basis. | |||
</t> | ||||
<t>Value: One or more sub-TLVs</t> | <section anchor="LSPBurstSize" numbered="true" toc="default"> | |||
<t>Several sub-TLVs are defined in this document. The support of | <name>LSP Burst Size Sub-TLV</name> | |||
any sub-TLV is OPTIONAL.</t> | <t>The LSP Burst Size sub-TLV advertises the maximum number of LSPs that | |||
the node can receive without an intervening delay between LSP transmissions.</t | ||||
<t> | > | |||
For a given IS-IS adjacency, the Flooding | <dl newline="false" spacing="compact" indent="9"> | |||
Parameters TLV does not need to be advertised | <dt>Type:</dt> <dd>1</dd> | |||
in each IIH or PSNP. An IS uses the latest | <dt>Length:</dt> <dd>4 octets</dd> | |||
received value for each parameter until a new | <dt>Value:</dt> <dd>number of LSPs that can be received back to back</ | |||
value is advertised by the peer. However, as | dd> | |||
IIHs and PSNPs are not reliably exchanged, and | </dl> | |||
may never be received, parameters SHOULD be | </section> | |||
sent even if there is no change in value since | <section anchor="InterfaceLSPTransmissionInterval" numbered="true" toc="de | |||
the last transmission. For a parameter that | fault"> | |||
has never been advertised, an IS uses | <name>LSP Transmission Interval Sub-TLV</name> | |||
its local default value. That value SHOULD be | <t>The LSP Transmission Interval sub-TLV advertises the minimum interval | |||
configurable on a per-node basis and MAY be | , in microseconds, between LSPs arrivals that can be sustained on this receiving | |||
configurable on a per-interface basis. | interface.</t> | |||
</t> | <dl newline="false" spacing="compact" indent="9"> | |||
<section anchor="LSPBurstSize" title="LSP Burst Size sub-TLV"> | <dt>Type:</dt> <dd>2</dd> | |||
<t>The LSP Burst Size sub-TLV advertises the maximum numb | <dt>Length:</dt> <dd>4 octets</dd> | |||
er of LSPs that the node can receive without an intervening delay between LSP tr | <dt>Value:</dt> <dd>minimum interval, in microseconds, between two | |||
ansmissions.</t> | consecutive LSPs received after LSP Burst Size LSPs have been | |||
<t>Type: 1</t> | received</dd> | |||
<t>Length: 4 octets</t> | </dl> | |||
<t>Value: number of LSPs that can be received back-to-bac | <t>The LSP Transmission Interval is an advertisement of the receiver's s | |||
k.</t> | ustainable LSP reception rate. This rate may be safely used by a sender that doe | |||
</section> | s not support the flow control or congestion algorithm. It may also be used as t | |||
<section anchor="InterfaceLSPTransmissionInterval" title="LSP Tra | he minimal safe rate by flow control or congestion algorithms in unexpected case | |||
nsmission Interval sub-TLV"> | s, e.g., when the receiver is not acknowledging LSPs anymore. </t> | |||
<t>The LSP Transmission Interval sub-TLV advertises the m | </section> | |||
inimum interval, in micro-seconds, between LSPs arrivals which can be sustained | <section anchor="LPP" numbered="true" toc="default"> | |||
on this receiving interface.</t> | <name>LSPs per PSNP Sub-TLV</name> | |||
<t>Type: 2</t> | <t>The LSP per PSNP (LPP) sub-TLV advertises the number of received LSPs | |||
<t>Length: 4 octets</t> | that triggers the immediate sending of a PSNP to acknowledge them.</t> | |||
<t>Value: minimum interval, in micro-seconds, between two | <dl newline="false" spacing="compact" indent="9"> | |||
consecutive LSPs received after LSP Burst Size LSPs have been received</t> | <dt>Type:</dt> <dd>3</dd> | |||
<t>The LSP Transmission Interval is an advertisement of t | <dt>Length:</dt> <dd>2 octets</dd> | |||
he receiver's sustainable LSP reception rate. This rate may be safely used by a | <dt>Value:</dt> <dd>number of LSPs acknowledged per PSNP</dd> | |||
sender which do not support the flow control or congestion algorithm. It may als | </dl> | |||
o be used as the minimal safe rate by flow control or congestion algorithms in u | <t>A node advertising this sub-TLV with a value for LPP <bcp14>MUST</bcp | |||
nexpected cases, e.g., when the receiver is not acknowledging LSPs anymore. </t> | 14> send a PSNP once LPP LSPs have been received and need to be acknowledged.</t | |||
> | ||||
</section> | ||||
<section anchor="Flags" numbered="true" toc="default"> | ||||
<name>Flags Sub-TLV</name> | ||||
<t>The sub-TLV Flags advertises a set of flags.</t> | ||||
<dl newline="false" spacing="compact" indent="9"> | ||||
<dt>Type:</dt> <dd>4</dd> | ||||
<dt>Length:</dt> <dd>Indicates the length in octets (1-8) of the Value | ||||
field. The length <bcp14>SHOULD</bcp14> be the minimum required to send all bit | ||||
s that are set.</dd> | ||||
<dt>Value:</dt> <dd><t>list of flags</t> | ||||
<artwork align="left" name="" type="" alt=""><![CDATA[ | ||||
0 1 2 3 4 5 6 7 ... | ||||
+-+-+-+-+-+-+-+-+... | ||||
|O| ... | ||||
+-+-+-+-+-+-+-+-+...]]></artwork> | ||||
</dd></dl> | ||||
</section> | <t>An LSP receiver sets the O-flag (Ordered | |||
<section anchor="LPP" title="LSPs Per PSNP sub-TLV"> | acknowledgment) to indicate to the LSP sender that | |||
<t>The LSP per PSNP (LPP) sub-TLV advertises the number o | it will acknowledge the LSPs in the order as received. A PSNP | |||
f received LSPs that triggers the immediate sending of a PSNP to acknowledge the | acknowledging N LSPs is acknowledging the N oldest LSPs received. The | |||
m.</t> | order inside the PSNP is meaningless. If the sender keeps track of the | |||
<t>Type: 3</t> | order of LSPs sent, this indication allows for fast detection of the | |||
<t>Length: 2 octets</t> | loss of an LSP. This <bcp14>MUST NOT</bcp14> be used to alter the | |||
<t>Value: number of LSPs acknowledged per PSNP</t> | retransmission timer for any LSP. This <bcp14>MAY</bcp14> be used to | |||
<t>A node advertising this sub-TLV with a value for LPP M | trigger a congestion signal.</t> | |||
UST send a PSNP once LPP LSPs have been received and need to be acknowledged.</t | </section> | |||
> | <section anchor="partialSNPI" numbered="true" toc="default"> | |||
</section> | <name>PSNP Interval Sub-TLV</name> | |||
<section anchor="Flags" title="Flags sub-TLV"> | ||||
<t>The sub-TLV Flags advertises a set of flags.</t> | ||||
<t>Type: 4</t> | ||||
<t>Length: Indicates the length in octets (1-8) of the Va | ||||
lue field. The length SHOULD be the minimum required to send all bits that are s | ||||
et.</t> | ||||
<t>Value: List of flags.</t> | ||||
<t> | ||||
<figure> | ||||
<artwork align="left"> | ||||
0 1 2 3 4 5 6 7 ... | ||||
+-+-+-+-+-+-+-+-+... | ||||
|O| ... | ||||
+-+-+-+-+-+-+-+-+...</artwork> | ||||
</figure> | ||||
</t> | ||||
<t>An LSP receiver sets the O-flag to indicate to the LSP | ||||
sender that | ||||
it will acknowledge the LSPs in the order as received. A | ||||
PSNP acknowledging N LSPs is acknowledging the | ||||
N oldest LSPs received. The order inside the | ||||
PSNP is meaningless. If the sender keeps track | ||||
of the order of LSPs sent, this indication | ||||
allows a fast detection of the loss of an | ||||
LSP. This MUST NOT be used to alter the | ||||
retransmission timer for any LSP. This MAY be used to | ||||
trigger a congestion signal.</t> | ||||
</section> | ||||
<section anchor="partialSNPI" title="Partial SNP Interval sub-TLV"> | <t>The PSNP Interval sub-TLV advertises the amount of | |||
<t>The Partial SNP Interval sub-TLV advertises the amount of | time in milliseconds between periodic action for transmission of PSNPs. T | |||
time in milliseconds between periodic action for transmission of Partial | his time will trigger the sending of a PSNP | |||
Sequence Number PDUs. This time will trigger the sending of a PSNP | ||||
even if the number of unacknowledged LSPs received on a given | even if the number of unacknowledged LSPs received on a given | |||
interface does not exceed LPP (<xref target="LPP"/>). The time is | interface does not exceed LPP (<xref target="LPP" format="default"/>). T he time is | |||
measured from the reception of the first unacknowledged LSP.</t> | measured from the reception of the first unacknowledged LSP.</t> | |||
<dl newline="false" spacing="compact" indent="9"> | ||||
<t>Type: 5</t> | <dt>Type:</dt> <dd>5</dd> | |||
<dt>Length:</dt> <dd>2 octets</dd> | ||||
<t>Length: 2 octets</t> | <dt>Value:</dt> <dd>partialSNPInterval in milliseconds</dd> | |||
</dl> | ||||
<t>Value: partialSNPInterval in milliseconds</t> | <t>A node advertising this sub-TLV <bcp14>SHOULD</bcp14> send a PSNP at | |||
least once | ||||
<t>A node advertising this sub-TLV SHOULD send a PSNP at least once | per PSNP Interval if one or more unacknowledged LSPs have been | |||
per Partial SNP Interval if one or more unacknowledged LSPs have been | ||||
received on a given interface.</t> | received on a given interface.</t> | |||
</section> | </section> | |||
<section anchor="RWIN" numbered="true" toc="default"> | ||||
<name>Receive Window Sub-TLV</name> | ||||
<t>The Receive Window (RWIN) sub-TLV advertises the maximum number of un | ||||
acknowledged LSPs that the node can receive for a given adjacency.</t> | ||||
<dl newline="false" spacing="compact" indent="9"> | ||||
<dt>Type:</dt> <dd>6</dd> | ||||
<dt>Length:</dt> <dd>2 octets</dd> | ||||
<dt>Value:</dt> <dd>maximum number of unacknowledged LSPs</dd> | ||||
</dl> | ||||
</section> | ||||
<section anchor="TLVoperationLAN" numbered="true" toc="default"> | ||||
<name>Operation on a LAN Interface</name> | ||||
<t>On a LAN interface, all LSPs are link-level multicasts. Each LSP sent | ||||
will be received by all ISs on the LAN, and each IS will receive LSPs from all | ||||
transmitters. In this section, we clarify how the flooding parameters should be | ||||
interpreted in the context of a LAN.</t> | ||||
<t>An LSP receiver on a LAN will communicate its desired flooding parame | ||||
ters using a single Flooding Parameters TLV, which will be received by all LSP t | ||||
ransmitters. The flooding parameters sent by the LSP receiver <bcp14>MUST</bcp14 | ||||
> be understood as instructions from the LSP receiver to each LSP transmitter ab | ||||
out the desired maximum transmit characteristics of each transmitter. The receiv | ||||
er is aware that there are multiple transmitters that can send LSPs to the recei | ||||
ver LAN interface. The receiver might want to take that into account by advertis | ||||
ing more conservative values, e.g., a higher LSP Transmission Interval. When the | ||||
transmitters receive the LSP Transmission Interval value advertised by an LSP r | ||||
eceiver, the transmitters should rate-limit LSPs according to the advertised flo | ||||
oding parameters. They should not apply any further interpretation to the floodi | ||||
ng parameters advertised by the receiver.</t> | ||||
<t>A given LSP transmitter will receive multiple flooding parameter adve | ||||
rtisements from different receivers that may include different flooding paramete | ||||
r values. A given transmitter <bcp14>SHOULD</bcp14> use the most conservative va | ||||
lue on a per-parameter basis. For example, if the transmitter receives multiple | ||||
LSP Burst Size values, it should use the smallest value.</t> | ||||
<t>The Designated Intermediate System (DIS) plays a special role in the | ||||
operation of flooding on the LAN as it is responsible for responding to PSNPs se | ||||
nt on the LAN circuit that are used to request LSPs that the sender of the PSNP | ||||
does not have. If the DIS does not support faster flooding, this will impact the | ||||
maximum flooding speed that could occur on a LAN. Use of LAN priority to prefer | ||||
a node that supports faster flooding in the DIS election may be useful.</t> | ||||
<section anchor="RWIN" title="Receive Window sub-TLV"> | <t>Note: The focus of work used to develop the example algorithms discus | |||
<t>The Receive Window (RWIN) sub-TLV advertises the maxim | sed later in this document focused on operation over point-to-point interfaces. | |||
um number of unacknowledged LSPs that the node can receive for a given adjacency | A full discussion of how best to do faster flooding on a LAN interface is theref | |||
.</t> | ore out of scope for this document.</t> | |||
<t>Type: 6</t> | </section> | |||
<t>Length: 2 octets</t> | </section> | |||
<t>Value: maximum number of unacknowledged LSPs</t> | <section anchor="Receiver" numbered="true" toc="default"> | |||
</section> | <name>Performance Improvement on the Receiver</name> | |||
<t>This section defines two behaviors that <bcp14>SHOULD</bcp14> be implem | ||||
<section anchor="TLVoperationLAN" title="Operation on a LAN inter | ented on the receiver.</t> | |||
face"> | <section anchor="LSPACKRate" numbered="true" toc="default"> | |||
<t>On a LAN interface, all LSPs are link-level multicasts | <name>Rate of LSP Acknowledgments</name> | |||
. Each LSP sent will be received by all ISs on the LAN and each IS will receive | <t>On point-to-point networks, PSNPs provide acknowledgments for | |||
LSPs from all transmitters. In this section, we clarify how the flooding paramet | received LSPs. <xref target="ISO10589" format="default"/> suggests | |||
ers should be interpreted in the context of a LAN.</t> | using some delay when sending PSNPs. This provides some optimization | |||
<t>An LSP receiver on a LAN will communicate its desired | as multiple LSPs can be acknowledged by a single PSNP.</t> | |||
flooding parameters using a single Flooding Parameters TLV, which will be receiv | <t>Faster LSP flooding benefits from a faster feedback loop. This | |||
ed by all LSP transmitters. The flooding parameters sent by the LSP receiver MUS | requires a reduction in the delay in sending PSNPs. | |||
T be understood as instructions from the LSP receiver to each LSP transmitter ab | </t> | |||
out the desired maximum transmit characteristics of each transmitter. The receiv | <t>For the generation of PSNPs, the receiver <bcp14>SHOULD</bcp14> use | |||
er is aware that there are multiple transmitters that can send LSPs to the recei | a partialSNPInterval smaller than the one defined in <xref | |||
ver LAN interface. The receiver might want to take that into account by advertis | target="ISO10589" format="default"/>. The choice of this lower value | |||
ing more conservative values, e.g., a higher LSP Transmission Interval. When the | is a local choice. It may depend on the available processing power of | |||
transmitters receive the LSP Transmission Interval value advertised by an LSP r | the node, the number of adjacencies, and the requirement to | |||
eceiver, the transmitters should rate-limit LSPs according to the advertised flo | synchronize the LSDB more quickly. 200 ms seems to be a reasonable | |||
oding parameters. They should not apply any further interpretation to the floodi | value.</t> | |||
ng parameters advertised by the receiver.</t> | <t>In addition to the timer-based partialSNPInterval, the receiver | |||
<t>A given LSP transmitter will receive multiple flooding | <bcp14>SHOULD</bcp14> keep track of the number of unacknowledged LSPs | |||
parameter advertisements from different receivers that may include different fl | per circuit and level. When this number exceeds a preset threshold of | |||
ooding parameter values. A given transmitter SHOULD use the most convervative va | LSPs per PSNP (LPP), the receiver <bcp14>SHOULD</bcp14> immediately | |||
lue on a per-parameter basis. For example, if the transmitter receives multiple | send a PSNP without waiting for the PSNP timer to expire. In the case | |||
LSP Burst Size values, it should use the smallest value.</t> | of a burst of LSPs, this allows more frequent PSNPs, giving faster | |||
<t>The Designated Intermediate System (DIS) plays a speci | feedback to the sender. Outside of the burst case, the usual | |||
al role in the operation of flooding on the LAN as it is responsible for respond | timer-based PSNP approach comes into effect.</t> | |||
ing to PSNPs sent on the LAN circuit which are used to request LSPs that the sen | <t>The smaller the LPP is, the faster the feedback to the sender and | |||
der of the PSNP does not have. If the DIS does not support faster flooding, this | possibly the higher the rate if the rate is limited by the end-to-end | |||
will impact the maximum flooding speed which could occur on a LAN. Use of LAN p | RTT (link RTT + time to acknowledge). This may result in an increase | |||
riority to prefer a node which supports faster flooding in the DIS election may | in the number of PSNPs sent, which may increase CPU and IO load on both | |||
be useful.</t> | the sender and receiver. The LPP should be less than or equal to 90 | |||
<t>NOTE: The focus of work used to develop the example al | as this is the maximum number of LSPs that can be acknowledged in a | |||
gorithms discussed later in this document focused on operation over point-to-poi | PSNP at common MTU sizes; hence, waiting longer would not reduce the | |||
nt interfaces. A full discussion of how best to do faster flooding on a LAN inte | number of PSNPs sent but would delay the acknowledgments. LPP should | |||
rface is therefore out of scope for this document.</t> | not be chosen too high as the congestion control starts with a | |||
</section> | congestion window of LPP + 1. Based on experimental evidence, 15 | |||
unacknowledged LSPs is a good value, assuming that the Receive Window | ||||
</section> | is at least 30. More frequent PSNPs give the transmitter more | |||
feedback on receiver progress, allowing the transmitter to continue | ||||
<section anchor="Receiver" title="Performance improvement on the receiver | transmitting while not burdening the receiver with undue overhead. | |||
"> | </t> | |||
<t>By deploying both the timer-based and the threshold-based PSNP approa | ||||
<t>This section defines two behaviors that SHOULD be implemented | ches, the receiver can be adaptive to both LSP bursts and infrequent LSP updates | |||
on the receiver.</t> | . </t> | |||
<t>As PSNPs also consume link bandwidth, packet-queue space, and | ||||
<section anchor="LSPACKRate" title="Rate of LSP Acknowledgments"> | ||||
<t>On point-to-point networks, PSNPs provide acknowledgme | ||||
nts for | ||||
received LSPs. <xref target="ISO10589"/> | ||||
suggests that some delay be | ||||
used when sending PSNPs. This provides some optimization as multiple | ||||
LSPs can be acknowledged by a single PSNP.</t> | ||||
<t> | ||||
Faster LSP flooding benefits from a faster feedback | ||||
loop. This requires a reduction in the delay in sending | ||||
PSNPs. | ||||
</t> | ||||
<t>For the generation of PSNPs, the receiver SHOULD use a | ||||
partialSNPInterval smaller than the one defined in [ISO10589]. The choice of th | ||||
is lower value is a local choice. It may depend on the available processing powe | ||||
r of the node, the number of adjacencies, and the requirement to synchronize the | ||||
LSDB more quickly. 200 ms seems to be a reasonable value.</t> | ||||
<t> | ||||
In addition to the timer-based | ||||
partialSNPInterval, the receiver SHOULD keep | ||||
track of the number of unacknowledged LSPs | ||||
per circuit and level. When this number | ||||
exceeds a preset threshold of LSPs Per PSNP | ||||
(LPP), the receiver SHOULD immediately send | ||||
a PSNP without waiting for the PSNP timer to | ||||
expire. In the case of a burst of LSPs, this | ||||
allows for more frequent PSNPs, giving | ||||
faster feedback to the sender. Outside of | ||||
the burst case, the usual time-based PSNP | ||||
approach comes into effect.</t> | ||||
<t> The smaller the LPP, the faster the feedback to the | ||||
sender | ||||
and possibly the higher the rate if the rate is limited | ||||
by the | ||||
end to end RTT (link RTT + time to acknowledge). This | ||||
may result | ||||
in an increase in the number of PSNPs sent which may i | ||||
ncrease CPU | ||||
and IO load on both the sender and receiver. | ||||
The LPP should | ||||
be less than or equal to 90 as this is | ||||
the maximum number of LSPs that can be | ||||
acknowledged in a PSNP at common MTU sizes, | ||||
hence waiting longer would not reduce the | ||||
number of PSNPs sent but would delay the | ||||
acknowledgements. LPP should not be chosen too high as | ||||
the congestion control starts with a congestion window | ||||
of LPP+1. | ||||
Based on experimental | ||||
evidence, 15 unacknowledged LSPs is a good | ||||
value assuming that the Receive Window is | ||||
at least 30. More | ||||
frequent PSNPs gives the transmitter more | ||||
feedback on receiver progress, allowing the | ||||
transmitter to continue transmitting while | ||||
not burdening the receiver with undue | ||||
overhead. | ||||
</t> | ||||
<t>By deploying both the time-based and the threshold-bas | ||||
ed PSNP approaches, the receiver can be adaptive to both LSP bursts and infreque | ||||
nt LSP updates. </t> | ||||
<t>As PSNPs also consume link bandwidth, packet-queue spa | ||||
ce, and | ||||
protocol-processing time on receipt, the increased sending of PSNPs | protocol-processing time on receipt, the increased sending of PSNPs | |||
should be taken into account when considering the rate at which LSPs | should be taken into account when considering the rate at which LSPs | |||
can be sent on an interface.</t> | can be sent on an interface.</t> | |||
</section> | </section> | |||
<section anchor="PKTPRI" numbered="true" toc="default"> | ||||
<section anchor="PKTPRI" title="Packet Prioritization on Receive" | <name>Packet Prioritization on Receive</name> | |||
> | <t>There are three classes of PDUs sent by IS-IS:</t> | |||
<t>There are three classes of PDUs sent by IS-IS:</t> | <ul spacing="normal"> | |||
<li> | ||||
<t> | <t>Hellos</t> | |||
<list style="symbols"> | </li> | |||
<t>Hellos</t> | <li> | |||
<t>LSPs</t> | ||||
<t>LSPs</t> | </li> | |||
<li> | ||||
<t>Complete Sequence Number PDUs (CSNPs) | <t>SNPs (Complete Sequence Number PDUs (CSNPs) and PSNPs)</t> | |||
and PSNPs</t> | </li> | |||
</list>Implementations today may prioritize the r | </ul> | |||
eception of Hellos | <t>Implementations today may prioritize the reception of Hellos | |||
over LSPs and Sequence Number PDUs (SNPs) in order to prevent a burst of LSP updates from | over LSPs and Sequence Number PDUs (SNPs) in order to prevent a burst of LSP updates from | |||
triggering an adjacency timeout which in turn would require additional | triggering an adjacency timeout, which in turn would require additional | |||
LSPs to be updated.</t> | LSPs to be updated.</t> | |||
<t>CSNPs and PSNPs serve to trigger or acknowledge the transmission of s | ||||
<t>CSNPs and PSNPs serve to trigger or acknowledge the tr | pecified | |||
ansmission of specified | ||||
LSPs. On a point-to-point link, PSNPs acknowledge the receipt of one | LSPs. On a point-to-point link, PSNPs acknowledge the receipt of one | |||
or more LSPs. | or more LSPs. | |||
For this reason, <xref target="ISO10589"/> | For this reason, <xref target="ISO10589" format="default"/> | |||
specifies a delay | specifies a delay | |||
(partialSNPInterval) before sending a PSNP so that the number of PSNPs | (partialSNPInterval) before sending a PSNP so that the number of PSNPs | |||
required to be sent is reduced. On receipt of a PSNP, the set of LSPs | required to be sent is reduced. On receipt of a PSNP, the set of LSPs | |||
acknowledged by that PSNP can be marked so that they do not need to be | acknowledged by that PSNP can be marked so that they do not need to be | |||
retransmitted.</t> | retransmitted.</t> | |||
<t>If a PSNP is dropped on reception, the set of LSPs advertised in | ||||
the PSNP cannot be marked as acknowledged, and this results in | ||||
needless retransmissions that further delay transmission of | ||||
other LSPs that are yet to be transmitted. It may also make it more | ||||
likely that a receiver becomes overwhelmed by LSP transmissions.</t> | ||||
<t>Therefore, implementations <bcp14>SHOULD</bcp14> prioritize IS-IS | ||||
PDUs on the way from the incoming interface to the IS-IS process. The | ||||
relative priority of packets in decreasing order <bcp14>SHOULD</bcp14> | ||||
be: Hellos, SNPs, and LSPs. Implementations <bcp14>MAY</bcp14> also | ||||
prioritize IS-IS packets over other protocols, which are less critical | ||||
for the router or network, less sensitive to delay, or more bursty | ||||
(e.g., BGP).</t> | ||||
</section> | ||||
</section> | ||||
<section anchor="Control" numbered="true" toc="default"> | ||||
<name>Congestion and Flow Control</name> | ||||
<section anchor="Overview" numbered="true" toc="default"> | ||||
<name>Overview</name> | ||||
<t>Ensuring the goodput between two entities is a Layer 4 | ||||
responsibility as per the OSI model. A typical example is the TCP | ||||
protocol defined in <xref target="RFC9293" format="default"/> that | ||||
provides flow control, congestion control, and reliability. | ||||
</t> | ||||
<t>Flow control creates a control loop between a transmitter and a recei | ||||
ver so that the transmitter does not overwhelm the receiver. TCP provides a mean | ||||
s for the receiver to govern the amount of data sent by the sender through the u | ||||
se of a sliding window.</t> | ||||
<t> Congestion control prevents the set of transmitters from overwhelmin | ||||
g the path of the packets between two IS-IS implementations. This path typically | ||||
includes a point-to-point link between two IS-IS neighbors, which is usually ov | ||||
ersized compared to the capability of the IS-IS speakers, but potentially also i | ||||
ncludes some internal elements inside each neighbor such as switching fabric, li | ||||
ne card CPU, and forwarding plane buffers that may experience congestion. These | ||||
resources may be shared across multiple IS-IS adjacencies for the system, and it | ||||
is the responsibility of congestion control to ensure that these are shared rea | ||||
sonably.</t> | ||||
<t>Reliability provides loss detection and recovery. IS-IS already has m | ||||
echanisms to ensure the reliable transmission of LSPs. This is not changed by th | ||||
is document.</t> | ||||
<t>If a PSNP is dropped on reception, | <t>Sections <xref target="RWIN-Algo" format="counter"/> and <xref target | |||
the set of LSPs advertised in the PSNP cannot be marked as | ="TxSide" format="counter"/> provide two flow and/or congestion control algorith | |||
acknowledged and this results in needless retransmissions that will | ms that may be implemented by taking advantage of the extensions defined in this | |||
further delay transmission of other LSPs that are yet to be | document. The signal that these IS-IS extensions (defined in Sections <xref tar | |||
transmitted. It may also make it more likely that a receiver becomes | get="FloodingTLV" format="counter"/> and <xref target="Receiver" format="counte | |||
overwhelmed by LSP transmissions.</t> | r"/>) provide is generic and is designed to support different sender-side algori | |||
thms. A sender can unilaterally choose a different algorithm to use.</t> | ||||
<t>Therefore implementations SHOULD prioritize IS-IS PDUs | </section> | |||
on the way from the incoming interface to the IS-IS process. The relative prior | <section anchor="RWIN-Algo" numbered="true" toc="default"> | |||
ity of packets in decreasing order SHOULD be: Hellos, SNPs, LSPs. Implementation | <name>Congestion and Flow Control Algorithm</name> | |||
s MAY also prioritize IS-IS packets over other protocols which are less critical | <section anchor="FlowControl" numbered="true" toc="default"> | |||
for the router or network, less sensitive to delay or more bursty (e.g., BGP).< | <name>Flow Control</name> | |||
/t> | ||||
</section> | ||||
</section> | ||||
<section anchor="Control" title="Congestion and Flow Control"> | ||||
<section anchor="Overview" title="Overview"> | ||||
<t>Ensuring the goodput between two entities is a layer-4 | ||||
responsibility as per the OSI model. A typical example is the TCP protocol defi | ||||
ned in | ||||
<xref target="RFC9293"></xref> that provides flow | ||||
control, congestion control, and reliability. | ||||
</t> | ||||
<t>Flow control creates a control loop between a transmit | ||||
ter and a receiver so that the transmitter does not overwhelm the receiver. TCP | ||||
provides a means for the receiver to govern the amount of data sent by the sende | ||||
r through the use of a sliding window.</t> | ||||
<t> Congestion control prevents the set of transmitters f | ||||
rom overwhelming the path of the packets between two IS-IS implementations. This | ||||
path typically includes a point-to-point link between two IS-IS neighbors which | ||||
is usually over-sized compared to the capability of the IS-IS speakers, but pot | ||||
entially some internal elements inside each neighbor such as switching fabric, l | ||||
ine card CPU, and forwarding plane buffers that may experience congestion. These | ||||
resources may be shared across multiple IS-IS adjacencies for the system and it | ||||
is the responsibility of congestion control to ensure that these are shared rea | ||||
sonably.</t> | ||||
<t>Reliability provides loss detection and recovery. IS-I | ||||
S already has mechanisms to ensure the reliable transmission of LSPs. This is no | ||||
t changed by this document.</t> | ||||
<t>The following two sections provide two Flow and/or Con | ||||
gestion control algorithms that may be implemented by taking advantage of the ex | ||||
tensions defined in this document. The signal that these IS-IS extensions define | ||||
d in <xref target="FloodingTLV"/> and <xref target="Receiver"/> provide are ge | ||||
neric and are designed to support different sender-side algorithms. A sender can | ||||
unilaterally choose a different algorithm to use.</t> | ||||
</section> | ||||
<section anchor="RWIN-Algo" title="Congestion and Flow Control al | <t> A flow control mechanism creates a control loop between a single | |||
gorithm"> | transmitter and a single receiver. This section uses a | |||
mechanism similar to the TCP receive window to allow the receiver to | ||||
govern the amount of data sent by the sender. This receive window | ||||
(RWIN) indicates an allowed number of LSPs that the sender may | ||||
transmit before waiting for an acknowledgment. The size of the | ||||
receive window, in units of LSPs, is initialized with the value | ||||
advertised by the receiver in the Receive Window sub-TLV. | ||||
<section anchor="FlowControl" title="Flow control"> | If no | |||
<t> | value is advertised, the transmitter should initialize RWIN with its | |||
A flow control mechanism creates a control loop | locally configured value for this receiver. | |||
between a single instance of a transmitter and a | </t> | |||
single receiver. This section uses a mechanism | <t> | |||
similar to the TCP receive window to allow the | ||||
receiver to govern the amount of data sent by the | ||||
sender. This receive window ('rwin') indicates an | ||||
allowed number of LSPs that the sender may | ||||
transmit before waiting for an acknowledgment. The | ||||
size of the receive window, in units of LSPs, is | ||||
initialized with the value advertised by the | ||||
receiver in the Receive Window sub-TLV. If no | ||||
value is advertised, the transmitter should | ||||
initialize rwin with its locally configured value for thi | ||||
s neighbor. | ||||
</t> | ||||
<t> | ||||
When the transmitter sends a set of LSPs to the | When the transmitter sends a set of LSPs to the | |||
receiver, it subtracts the number of LSPs sent | receiver, it subtracts the number of LSPs sent | |||
from rwin. If the transmitter receives a PSNP, | from RWIN. If the transmitter receives a PSNP, | |||
then rwin is incremented for each acknowledged | then RWIN is incremented for each acknowledged | |||
LSP. The transmitter must ensure that the value of | LSP. The transmitter must ensure that the value of | |||
rwin never goes negative. | RWIN never goes negative. | |||
</t> | </t> | |||
<t>The RWIN value is of importance when the RTT is the limiting factor | ||||
<t>The RWIN value is of importance when the RTT is the li | for the throughput. In this case, the optimal size is the desired LSP rate mult | |||
miting factor for the throughput. In this case the optimal size is the desired L | iplied by the RTT. The RTT is the addition of the link RTT plus the time taken b | |||
SP rate multiplied by the RTT. The RTT being the addition of the link RTT plus t | y the receiver to acknowledge the first received LSP in its PSNP. The values 50 | |||
he time taken by the receiver to acknowledge the first received LSP in its PSNP. | or 100 may be reasonable default numbers for RWIN. | |||
50 or 100 may be reasonable default numbers. As an example, a RWIN of 100 requi | As an example, an RWIN of 100 requires a control plane input buffer of 150 kbyte | |||
res a control plane input buffer of 150 kbytes per neighbor assuming an IS-IS MT | s per neighbor (assuming an IS-IS MTU of 1500 octets) and limits the throughput | |||
U of 1500 octets and limits the throughput to 10000 LSPs per second and per neig | to 10000 LSPs per second and per neighbor for a link RTT of 10 ms. With the same | |||
hbor for a link RTT of 10 ms. With the same RWIN, the throughput limitation is 2 | RWIN, the throughput limitation is 2000 LSPs per second when the RTT is 50 ms. | |||
000 LSP per second when the RTT is 50ms. That's the maximum throughput assuming | That's the maximum throughput assuming no other limitations such as CPU limitati | |||
no other limitations such as CPU limitations.</t> | ons.</t> | |||
<t>Equally, RTT is of importance for the performance. That is why the | ||||
<t>Equally RTT is of importance for the performance. That | performance improvements on the receiver specified in <xref | |||
is why the | target="Receiver" format="default"/> are important to achieve good | |||
performance improvements on the receiver specified in sec | throughput. If the receiver does not support those performance | |||
tion <xref target="Receiver"/> are | improvements, in the worst case (small RWIN and high RTT) the | |||
important to achieve good throughput. If the receiver doe | throughput will be limited by the LSP Transmission Interval as | |||
s not support | defined in <xref target="InterfaceLSPTransmissionInterval" | |||
those performance improvements, in the worst case (small | format="default"/>.</t> | |||
RWIN and high | <section anchor="TLVoperationP2P" numbered="true" toc="default"> | |||
RTT) the throughput will be limited by the LSP Transmissi | <name>Operation on a Point-to-Point Interface</name> | |||
on Interval | <t>By sending the Receive Window sub-TLV, a node advertises to its n | |||
as defined in section <xref target="InterfaceLSPTransmiss | eighbor its ability to receive that many unacknowledged LSPs from the neighbor. | |||
ionInterval"/>.</t> | This is akin to a receive window or sliding window in flow control. In some impl | |||
ementations, this value should reflect the IS-IS socket buffer size. Special car | ||||
<section anchor="TLVoperationP2P" title="Operatio | e must be taken to leave space for CSNPs, PSNPs, and IIHs if they share the same | |||
n on a point to point interface"> | input queue. In this case, this document suggests advertising an LSP Receive Wi | |||
ndow corresponding to half the size of the IS-IS input queue. </t> | ||||
<t>By sending the Receive Window sub-TLV, | <t>By advertising an LSP Transmission Interval sub-TLV, a node adver | |||
a node advertises to its neighbor its ability to receive that many un-acknowled | tises its ability to receive LSPs separated by at least the advertised value, ou | |||
ged LSPs from the neighbor. This is akin to a receive window or sliding window i | tside of LSP bursts.</t> | |||
n flow control. In some implementations, this value should reflect the IS-IS soc | <t>By advertising an LSP Burst Size sub-TLV, a node advertises its a | |||
ket buffer size. Special care must be taken to leave space for CSNPs and PSNPs a | bility to receive that number of LSPs back to back.</t> | |||
nd IIHs if they share the same input queue. In this case, this document suggests | <t>The LSP transmitter <bcp14>MUST NOT</bcp14> exceed these paramete | |||
advertising an LSP Receive Window corresponding to half the size of the IS-IS i | rs. After having sent a full burst of LSPs, it <bcp14>MUST</bcp14> send the subs | |||
nput queue. </t> | equent LSPs with a minimum of LSP Transmission Interval between LSP transmission | |||
s. For CPU scheduling reasons, this rate <bcp14>MAY</bcp14> be averaged over a s | ||||
<t>By advertising an LSP Transmission Int | mall period, e.g., 10-30 ms.</t> | |||
erval sub-TLV, a node advertises its ability to receive LSPs separated by at lea | <t>If either the LSP transmitter or receiver does not adhere to thes | |||
st the advertised value, outside of LSP bursts.</t> | e parameters, for example, because of transient conditions, this doesn't result | |||
in a fatal condition for IS-IS operation. In the worst case, an LSP is lost at t | ||||
<t>By advertising an LSP Burst Size sub-T | he receiver, and this situation is already remedied by mechanisms in <xref targe | |||
LV, a node advertises its ability to receive that number of LSPs back-to-back.</ | t="ISO10589" format="default"/>. | |||
t> | After a few seconds, neighbors will excha | |||
nge PSNPs (for point-to-point interfaces) or CSNPs (for broadcast interfaces) an | ||||
<t>The LSP transmitter MUST NOT exceed th | d recover from the lost LSPs. This worst case should be avoided as those additio | |||
ese parameters. After having sent a full burst of LSPs, it MUST send the subsequ | nal seconds impact convergence time since the LSDB is not fully synchronized. He | |||
ent LSPs with a minimum of LSP Transmission Interval between LSP transmissions. | nce, it is better to err on the conservative side and to under-run the receiver | |||
For CPU scheduling reasons, this rate MAY be averaged over a small period, e.g., | rather than over-run it.</t> | |||
10-30ms.</t> | </section> | |||
<section numbered="true" toc="default"> | ||||
<t>If either the LSP transmitter or recei | <name>Operation on a Broadcast LAN Interface</name> | |||
ver does not adhere to these parameters, for example because of transient condit | <t>Flow and congestion control on a LAN interface is out of scope fo | |||
ions, this doesn't result in a fatal condition for IS-IS operation. In the worst | r this document.</t> | |||
case, an LSP is lost at the receiver and this situation is already remedied by | </section> | |||
mechanisms in <xref target="ISO10589"/>. | </section> | |||
After a few seconds, neighbors will excha | <section anchor="CongestionControl" numbered="true" toc="default"> | |||
nge PSNPs (for point-to-point interfaces) or CSNPs (for broadcast interfaces) an | <name>Congestion Control</name> | |||
d recover from the lost LSPs. This worst case should be avoided as those additio | <t>Whereas flow control prevents the sender from overwhelming the | |||
nal seconds impact convergence time since the LSDB is not fully synchronized. He | receiver, congestion control prevents senders from overwhelming the | |||
nce it is better to err on the conservative side and to under-run the receiver r | network. For an IS-IS adjacency, the network between two IS-IS | |||
ather than over-run it.</t> | neighbors is relatively limited in scope and includes a single link | |||
that is typically oversized compared to the capability of the IS-IS | ||||
</section> | speakers. In situations where the probability of LSP drop is low, | |||
<section title="Operation on a | flow control (<xref target="FlowControl" format="default"/>) is | |||
broadcast LAN | expected to give good results, without the need to implement | |||
interface"> | congestion control. Otherwise, adding congestion control will help | |||
<t>Flow and congestion control on a LAN interfa | handling congestion of LSPs in the receiver.</t> | |||
ce is out of scope for this document.</t> | <t>This section describes one sender-side congestion control algorithm | |||
</section> | largely inspired by the TCP congestion control algorithm <xref target="RFC5681" | |||
format="default"/>.</t> | ||||
</section> | <t>The proposed algorithm uses a variable congestion window 'cwin'. It | |||
<section anchor="CongestionControl" title="Congestion Control"> | plays a role similar to the receive window described above. The main difference | |||
<t>Whereas flow control prevents the sender from overwhelming t | is that cwin is adjusted dynamically according to various events described belo | |||
he receiver, congestion control prevents senders from overwhelming the network. | w.</t> | |||
For an IS-IS adjacency, the network between two IS-IS neighbors is relatively li | <section anchor="CC1Core" numbered="true" toc="default"> | |||
mited in scope and includes a single link which is typically over-sized compared | <name>Core Algorithm</name> | |||
to the capability of the IS-IS speakers. | <t>In its simplest form, the congestion control algorithm looks like | |||
In situations where the probability of LSP drop is low, flow co | the following:</t> | |||
ntrol <xref target="FlowControl"/> is expected to give good results, without the | <figure anchor="cc1_core_algo"> | |||
need to implement congestion control. Otherwise, adding congestion control will | <artwork name="" type="" align="left" alt=""><![CDATA[ | |||
help handling | +---------------+ | |||
congestion of LSPs in the receiver.</t> | | | | |||
<t>This section describes one sender-side congestion cont | | v | |||
rol algorithm largely inspired by the TCP congestion control algorithm <xref tar | | +----------------------+ | |||
get="RFC5681"></xref>.</t> | | | Congestion avoidance | | |||
<t>The proposed algorithm uses a variable congestion wind | | + ---------------------+ | |||
ow 'cwin'. It plays a role similar to the receive window described above. The ma | | | | |||
in difference is that cwin is adjusted dynamically according to various events d | | | Congestion signal | |||
escribed below.</t> | ----------------+]]></artwork> | |||
</figure> | ||||
<section anchor="CC1Core" title="Core algorithm"> | ||||
<t>In its simplest form, the congestion control a | ||||
lgorithm looks like the following:</t> | ||||
<figure anchor="cc1_core_algo"> | ||||
<artwork> | ||||
+---------------+ | ||||
| | | ||||
| v | ||||
| +----------------------+ | ||||
| | Congestion avoidance | | ||||
| + ---------------------+ | ||||
| | | ||||
| | Congestion signal | ||||
----------------+ | ||||
</artwork> | ||||
</figure> | ||||
<t>The algorithm starts with cwin = cwin0 = LPP + | ||||
1. In the congestion avoidance phase, cwin increases as LSPs are acked: for eve | ||||
ry acked LSP, cwin += 1 / cwin without exceeding RWIN. When LSPs are exchanged, | ||||
cwin LSPs will be acknowledged in 1 RTT, meaning cwin(t) = t/RTT + cwin0. Since | ||||
the RTT is low in many IS-IS deployments, the sending rate can reach fast rates | ||||
in short periods of time.</t> | ||||
<t>When updating cwin, it must not become higher | ||||
than the number of LSPs waiting to be sent, otherwise the sending will not be pa | ||||
ced by the receiving of acks. Said differently, tx pressure is needed to maintai | ||||
n and increase cwin.</t> | ||||
<t>When the congestion signal is triggered, cwin | ||||
is set back to its initial value and the congestion avoidance phase starts again | ||||
.</t> | ||||
</section> | ||||
<section anchor="CC1CongestionSignals" title="Congestion | ||||
signals"> | ||||
<t>The congestion signal can take various forms. | ||||
The more reactive the congestion signals, the fewer LSPs will be lost due to con | ||||
gestion. However, overly aggressive congestion signals will cause a sender to ke | ||||
ep a very low sending rate even without actual congestion on the path.</t> | ||||
<t>Two practical signals are given below.</t> | ||||
<t>Delay: When receiving acknowledgements, a send | ||||
er estimates the acknowledgement time of the receiver. Based on this estimation, | ||||
it can infer that a packet was lost, and infer congestion on the path.</t> | ||||
<t>There can be a timer per LSP, but this can bec | ||||
ome costly for implementations. It is possible to use only a single timer t1 for | ||||
all LSPs: during t1, sent LSPs are recorded in a list list_1. Once the RTT is o | ||||
ver, list_1 is kept and another list list_2 is used to store the next LSPs. LSPs | ||||
are removed from the lists when acked. At the end of the second t1 period, ever | ||||
y LSP in list_1 should have been acked, so list_1 is checked to be empty. list_1 | ||||
can then be reused for the next RTT.</t> | ||||
<t>There are multiple strategies to set the timeo | ||||
ut value t1. It should be based on measurements of the maximum acknowledgement t | ||||
ime (MAT) of each PSNP. The simplest one is to use three times the RTT. Alternat | ||||
ively an exponential moving average of the MATs, like <xref target="RFC6298"/>. | ||||
A more elaborate one is to take a running maximum of the MATs over a period of a | ||||
few seconds. This value should include a margin of error to avoid false positiv | ||||
es (e.g., estimated MAT measure variance) which would have a significant impact | ||||
on performance.</t> | ||||
<t> Loss: if the receiver has signaled the O-flag | ||||
(Ordered acknowledgement) <xref target="Flags"/>, a sender MAY record its sendi | ||||
ng order and check that acknowledgements arrive in the same order. If not, some | ||||
LSPs are missing and this MAY be used to trigger a congestion signal.</t> | ||||
</section> | ||||
<section anchor="CC1Refinement" title="Refinement"> | ||||
<t>With the algorithm presented above, if congest | ||||
ion is detected, cwin goes back to its initial value, and does not use the infor | ||||
mation gathered in previous congestion avoidance phases.</t> | ||||
<t>It is possible to use a fast recovery phase on | ||||
ce congestion is detected, to avoid going through this linear rate of growth fro | ||||
m scratch. When congestion is detected, a fast recovery threshold frthresh is se | ||||
t to frthresh = cwin / 2. In this fast recovery phase, for every acked LSP, cwin | ||||
+= 1. Once cwin reaches frthresh, the algorithm goes back to the congestion avo | ||||
idance phase.</t> | ||||
<figure anchor="cc1_algo_refinement_1"> | ||||
<artwork> | ||||
+---------------+ | ||||
| | | ||||
| v | ||||
| +----------------------+ | ||||
| | Congestion avoidance | | ||||
| + ---------------------+ | ||||
| | | ||||
| | Congestion signal | ||||
| | | ||||
| +----------------------+ | ||||
| | Fast recovery | | ||||
| +----------------------+ | ||||
| | | ||||
| | frthresh reached | ||||
----------------+ | ||||
</artwork> | ||||
</figure> | ||||
</section> | ||||
<section anchor="cc_remarks" title="Remarks"> | <t>The algorithm starts with cwin = cwin0 = LPP + 1. In the congesti | |||
<t> | on avoidance phase, cwin increases as LSPs are acked: for every acked LSP, cwin | |||
This algorithm's performance is dependent | += 1 / cwin without exceeding RWIN. When LSPs are exchanged, cwin LSPs will be a | |||
on the LPP value. Indeed, the smaller LPP | cknowledged in 1 RTT, meaning cwin(t) = t/RTT + cwin0. Since the RTT is low in m | |||
is, the more information is available for | any IS-IS deployments, the sending rate can reach fast rates in short periods of | |||
the congestion control algorithm to | time.</t> | |||
perform well. However, it also increases | <t>When updating cwin, it must not become higher than the number of | |||
the resources spent on sending PSNPs, so a | LSPs waiting to be sent, otherwise the sending will not be paced by the receivin | |||
trade-off must be made. This document | g of acks. Said differently, transmission pressure is needed to maintain and inc | |||
recommends to use an LPP of 15 or less. If | rease cwin.</t> | |||
a Receive Window is advertised, LPP | <t>When the congestion signal is triggered, cwin is set back to its | |||
SHOULD be lower and the best performance | initial value, and the congestion avoidance phase starts again.</t> | |||
is achieved when LPP is an integer | </section> | |||
fraction of the Receive Window. | <section anchor="CC1CongestionSignals" numbered="true" toc="default"> | |||
</t> | <name>Congestion Signals</name> | |||
<t>The congestion signal can take various forms. The more reactive t | ||||
he congestion signals, the fewer LSPs will be lost due to congestion. However, o | ||||
verly aggressive congestion signals will cause a sender to keep a very low sendi | ||||
ng rate even without actual congestion on the path.</t> | ||||
<t>Two practical signals are given below.</t> | ||||
<ol spacing="normal" type="1"> | ||||
<li><t>Delay: When receiving acknowledgments, a sender | ||||
estimates the acknowledgment time of the receiver. Based on | ||||
this estimation, it can infer that a packet was lost and | ||||
that the path is congested.</t> | ||||
<t>There can be a timer per LSP, but this can become costly for | ||||
implementations. It is possible to use only a single timer t1 | ||||
for all LSPs: during t1, sent LSPs are recorded in a list | ||||
list_1. Once the RTT is over, list_1 is kept and another list, | ||||
list_2, is used to store the next LSPs. LSPs are removed from the | ||||
lists when acked. At the end of the second t1 period, every LSP | ||||
in list_1 should have been acked, so list_1 is checked to be | ||||
empty. list_1 can then be reused for the next RTT.</t> | ||||
<t>Note that this congestion control algorithm be | <t>There are multiple strategies to set the timeout value t1. It | |||
nefits from the extensions proposed in this document. The advertisement of a rec | should be based on measurements of the maximum acknowledgment | |||
eive window from the receiver (<xref target="FlowControl"/>) avoids the use of a | time (MAT) of each PSNP. Using three times the RTT is the simplest | |||
n arbitrary maximum value by the sender. The faster acknowledgment of LSPs (<xre | strategy; | |||
f target="LSPACKRate"/>) allows for a faster control loop and hence a faster inc | alternatively, an exponential moving average of the MATs, | |||
rease of the congestion window in the absence of congestion. | as described in <xref target="RFC6298" format="default"/>, can be | |||
</t> | used. A more | |||
</section> | elaborate one is to take a running maximum of the MATs over a | |||
</section> | period of a few seconds. This value should include a margin of | |||
error to avoid false positives (e.g., estimated MAT measure | ||||
variance), which would have a significant impact on | ||||
performance.</t></li> | ||||
<li><t>Loss: if the receiver has signaled the O-flag (see <xref ta | ||||
rget="Flags" format="default"/>), a | ||||
sender <bcp14>MAY</bcp14> record its sending order and check | ||||
that acknowledgments arrive in the same order. If not, some | ||||
LSPs are missing, and this <bcp14>MAY</bcp14> be used to trigger | ||||
a congestion signal.</t></li> | ||||
</ol> | ||||
</section> | ||||
<section anchor="CC1Refinement" numbered="true" toc="default"> | ||||
<name>Refinement</name> | ||||
<t>With the algorithm presented above, if congestion is detected, cw | ||||
in goes back to its initial value and does not use the information gathered in p | ||||
revious congestion avoidance phases.</t> | ||||
<t>It is possible to use a fast recovery phase once congestion is de | ||||
tected and to avoid going through this linear rate of growth from scratch. When | ||||
congestion is detected, a fast recovery threshold frthresh is set to frthresh = | ||||
cwin / 2. In this fast recovery phase, for every acked LSP, cwin += 1. Once cwin | ||||
reaches frthresh, the algorithm goes back to the congestion avoidance phase.</t | ||||
> | ||||
<figure anchor="cc1_algo_refinement_1"> | ||||
<artwork name="" type="" align="left" alt=""><![CDATA[ | ||||
+---------------+ | ||||
| | | ||||
| v | ||||
| +----------------------+ | ||||
| | Congestion avoidance | | ||||
| + ---------------------+ | ||||
| | | ||||
| | Congestion signal | ||||
| | | ||||
| +----------------------+ | ||||
| | Fast recovery | | ||||
| +----------------------+ | ||||
| | | ||||
| | frthresh reached | ||||
----------------+]]></artwork> | ||||
</figure> | ||||
</section> | ||||
<section anchor="cc_remarks" numbered="true" toc="default"> | ||||
<name>Remarks</name> | ||||
<t> This algorithm's performance is dependent on the LPP | ||||
value. Indeed, the smaller the LPP is, the more information is | ||||
available for the congestion control algorithm to perform | ||||
well. However, it also increases the resources spent on sending | ||||
PSNPs, so a trade-off must be made. This document recommends | ||||
using an LPP of 15 or less. If a Receive Window is advertised, LPP | ||||
<bcp14>SHOULD</bcp14> be lower, and the best performance is | ||||
achieved when LPP is an integer fraction of the Receive Window. | ||||
</t> | ||||
<t>Note that this congestion control algorithm benefits from the | ||||
extensions proposed in this document. The advertisement of a | ||||
receive window from the receiver (<xref target="FlowControl" | ||||
format="default"/>) avoids the use of an arbitrary maximum value | ||||
by the sender. The faster acknowledgment of LSPs (<xref | ||||
target="LSPACKRate" format="default"/>) allows for a faster | ||||
control loop and hence a faster increase of the congestion | ||||
window in the absence of congestion. | ||||
</t> | ||||
</section> | ||||
</section> | ||||
<section anchor="Pacing" numbered="true" toc="default"> | ||||
<name>Pacing</name> | ||||
<t>As discussed in <xref target="RFC9002" sectionFormat="comma" | ||||
section="7.7" format="default"/>, a sender <bcp14>SHOULD</bcp14> | ||||
pace sending of all in-flight LSPs based on input from the | ||||
congestion controller.</t> | ||||
<t>Sending multiple packets without any delay between them creates a p | ||||
acket burst that might cause short-term congestion and losses. Senders <bcp14>MU | ||||
ST</bcp14> either use pacing or limit such bursts. Senders <bcp14>SHOULD</bcp14> | ||||
limit bursts to LSP Burst Size.</t> | ||||
<t>Senders can implement pacing as they choose. A perfectly paced send | ||||
er spreads packets evenly over time. For a window-based congestion controller, s | ||||
uch as the one in this section, that rate can be computed by averaging the conge | ||||
stion window over the RTT. Expressed as an inter-packet interval in units of tim | ||||
e:</t><t indent="3">interval = (SRTT / cwin) / N</t> | ||||
<t>SRTT is the Smoothed Round-Trip Time <xref target="RFC6298" format= | ||||
"default"/>.</t> | ||||
<t>Using a value for N that is small, but at least 1 (for example, 1.2 | ||||
5), ensures that variations in RTT do not result in underutilization of the cong | ||||
estion window.</t> | ||||
<t>Practical considerations, such as scheduling delays and computation | ||||
al efficiency, can cause a sender to deviate from this rate over time periods th | ||||
at are much shorter than an RTT.</t> | ||||
<t>One possible implementation strategy for pacing uses a leaky bucket | ||||
algorithm, where the capacity of the "bucket" is limited to the maximum burst s | ||||
ize, and the rate that the "bucket" fills is determined by the above function.</ | ||||
t> | ||||
</section> | ||||
<section anchor="sec_determining_values" numbered="true" toc="default"> | ||||
<name>Determining Values to be Advertised in the Flooding Parameters T | ||||
LV</name> | ||||
<t>The values that a receiver advertises do not need to be perfect. If | ||||
the values are too low, then the transmitter will not use the full bandwidth or | ||||
available CPU resources. If the values are too high, then the receiver may drop | ||||
some LSPs during the first RTT, and this loss will reduce the usable receive wi | ||||
ndow, and the protocol mechanisms will allow the adjacency to recover. Flooding | ||||
slower than both nodes can support will hurt performance as will consistently ov | ||||
erloading the receiver.</t> | ||||
<section anchor="sec_determining_values_static" numbered="true" toc="d | ||||
efault"> | ||||
<name>Static Values</name> | ||||
<t>The values advertised need not be dynamic, as feedback is | ||||
provided by the acknowledgment of LSPs in SNP | ||||
messages. Acknowledgments provide a feedback loop on how fast the | ||||
LSPs are processed by the receiver. They also signal that the LSPs | ||||
can be removed from the receive window, explicitly signaling to the | ||||
sender that more LSPs may be sent. By advertising relatively | ||||
static parameters, we expect to produce overall flooding behavior | ||||
similar to what might be achieved by manually configuring | ||||
per-interface LSP rate-limiting on all interfaces in the | ||||
network. The advertised values could be based, for example, on | ||||
offline tests of the overall LSP-processing speed for a particular | ||||
set of hardware and the number of interfaces configured for | ||||
IS-IS. With such a formula, the values advertised in the Flooding | ||||
Parameters TLV would only change when additional IS-IS interfaces | ||||
are configured.</t> | ||||
<t>Static values are dependent on the CPU generation, class of | ||||
router, and network scaling, typically the number of adjacent | ||||
neighbors. Examples at the time of publication are provided | ||||
below. | ||||
<section anchor="Pacing" title="Pacing"> | The LSP Burst Size could be in the range 5 to 20. From a router | |||
<t>As discussed in <xref target="RFC9002" sectionFormat=" | perspective, this value typically depends on the queue(s) size(s) | |||
comma" section="7.7" /> a sender SHOULD pace sending of all in-flight LSPs based | on the I/O path from the packet forwarding engine to the control | |||
on input from the congestion controller.</t> | plane, which is very platform-dependent. It also depends upon how | |||
<t>Sending multiple packets without any delay between the | many IS-IS neighbors share this I/O path, as typically all | |||
m creates a packet burst that might cause short-term congestion and losses. Send | neighbors will send the same LSPs at the same time. It may also | |||
ers MUST either use pacing or limit such bursts. Senders SHOULD limit bursts to | depend on other incoming control plane traffic that is sharing that | |||
LSP Burst Size.</t> | I/O | |||
<t>Senders can implement pacing as they choose. A perfect | path, how bursty they are, and how many incoming IS-IS packets are | |||
ly paced sender spreads packets evenly over time. For a window-based congestion | prioritized over other incoming control plane traffic. As | |||
controller, such as the one in this section, that rate can be computed by averag | indicated in <xref target="HISTORY" format="default"/>, the | |||
ing the congestion window over the RTT. Expressed as an inter-packet interval in | historical behavior from <xref target="ISO10589" | |||
units of time:</t> | format="default"/> allows a value of 10; hence, 10 seems | |||
<t>interval = (SRTT / cwin) / N</t> | conservative. From a network operation perspective, it would be | |||
<t>SRTT is the smoothed round-tri | beneficial for the burst size to be equal to or higher than the | |||
p time [RFC6298]</t> | number of LSPs that may be originated by a single failure. For a | |||
<t>Using a value for N that is sm | node failure, this is equal to the number of IS-IS neighbors of | |||
all, but at least 1 (for example, 1.25) ensures that variations in RTT do not re | the failed node. | |||
sult in underutilization of the congestion window.</t> | ||||
<t>Practical considerations, such as scheduling delays an | ||||
d computational efficiency, can cause a sender to deviate from this rate over ti | ||||
me periods that are much shorter than an RTT.</t> | ||||
<t>One possible implementation strategy for pacing uses a | ||||
leaky bucket algorithm, where the capacity of the "bucket" is limited to the ma | ||||
ximum burst size and the rate that the "bucket" fills is determined by the above | ||||
function.</t> | ||||
</section> | ||||
<section anchor="sec_determining_values" title="Determining value | The LSP Transmission Interval could be in the range | |||
s to be advertised in the Flooding Parameters TLV"> | of 1 ms to 33 ms. As indicated in <xref target="HISTORY" | |||
<t>The values that a receiver advertises do not need to b | format="default"/>, the historical behavior from <xref | |||
e perfect. If the values are too low then the transmitter will not use the full | target="ISO10589" format="default"/> is 33 ms; hence, 33 ms is | |||
bandwidth or available CPU resources. If the values are too high then the receiv | conservative. The LSP Transmission Interval is an advertisement of | |||
er may drop some LSPs during the first RTT and this loss will reduce the usable | the receiver's sustainable LSP reception rate taking into account | |||
receive window and the protocol mechanisms will allow the adjacency to recover. | all aspects and particularly the control plane CPU and the I/O | |||
Flooding slower than both nodes can support will hurt performance, as will consi | bandwidth. It's expected to improve (hence, decrease) as hardware | |||
stently overloading the receiver.</t> | and software naturally improve over time. It should be chosen | |||
conservatively, as this rate may be used by the sender in all | ||||
conditions -- including the worst conditions. It's also not a | ||||
bottleneck as the flow control algorithm may use a higher rate in | ||||
good conditions, particularly when the receiver acknowledges | ||||
quickly, and the receive window is large enough compared to the | ||||
RTT. | ||||
<section anchor="sec_determining_values_static" title="St | LPP could be in the range of 5 to 90 with a proposed 15. A | |||
atic values"> | smaller value provides faster feedback at the cost of the small | |||
<t>The values advertised need not be dynamic as feedback | overhead of more PSNP messages. | |||
is provided by the acknowledgment of LSPs in SNP messages. Acknowledgments provi | ||||
de a feedback loop on how fast the LSPs are processed by the receiver. They also | ||||
signal that the LSPs can be removed from receive window, explicitly signaling t | ||||
o the sender that more LSPs may be sent. By advertising relatively static parame | ||||
ters, we expect to produce overall flooding behavior similar to what might be ac | ||||
hieved by manually configuring per-interface LSP rate-limiting on all interfaces | ||||
in the network. The advertised values could be based, for example, on offline t | ||||
ests of the overall LSP-processing speed for a particular set of hardware and th | ||||
e number of interfaces configured for IS-IS. With such a formula, the values adv | ||||
ertised in the Flooding Parameters TLV would only change when additional IS-IS i | ||||
nterfaces are configured.</t> | ||||
<t>Static values are dependent on the CPU generation, cla | PartialSNPInterval could be in | |||
ss of router and network scaling, typically the number of adjacent neighbors. | the range 50 to 500 ms with a proposed value of 200 ms. One may | |||
Examples at the time of publication are provided below. L | distinguish the value used locally from the value signaled to the | |||
SP Burst Size could be in the range 5 to 20. From a router perspective, this val | sender. The value used locally benefits from being small but is | |||
ue | not expected to be the main parameter to improve performance. It | |||
typically depends on the queue(s) size(s) on the I/O path | depends on how fast the IS-IS flooding process may be scheduled by | |||
from the packet forwarding engine to the control plane which is very platform d | the CPU. Even when the receiver CPU is busy, it's safe because it wi | |||
ependent. | ll | |||
It also depends upon how many IS-IS neighbors share this | naturally delay its acknowledgments, which provides a negative | |||
I/O path as typically all neighbors will send the same LSPs at the same time. | feedback loop. The value advertised to the sender should be | |||
It may also depend on other incoming control plane traffi | conservative (high enough) as this value could be used by the | |||
c sharing that I/O path, how bursty they are, and how many incoming IS-IS packet | sender to send some LSPs rather than keep waiting for | |||
s | acknowledgments. | |||
are prioritized over other incoming control plane traffic | ||||
. As indicated in <xref target="HISTORY"/>, the historical behavior from <xref | ||||
target="ISO10589"/> allows a value | ||||
of 10 hence 10 seems conservative. From a network operati | ||||
on perspective, it would be beneficial for the burst size to be equal to or high | ||||
er than the | ||||
number of LSPs which may be originated by a single failur | ||||
e. For a node failure, this is equal to the number of IS-IS neighbors of the fai | ||||
led node. | ||||
LSP Transmission Interval could be in the range of 1 ms t | ||||
o 33 ms. As indicated in <xref target="HISTORY"/>, the historical behavior from | ||||
<xref target="ISO10589"/> is 33ms hence | ||||
is conservative. The LSP Transmission Interval is an adve | ||||
rtisement of the receiver's sustainable LSP reception rate taking into account a | ||||
ll aspects | ||||
and in particular the control plane CPU and the I/O bandw | ||||
idth. It's expected to improve (hence decrease) as hardware and software natural | ||||
ly improve | ||||
over time. It should be chosen conservatively as this rat | ||||
e may be used by the sender in all conditions including the worst conditions. | ||||
It's also not a bottleneck as the flow control algorithm | ||||
may use a higher rate in good conditions, in particular when the receiver acknow | ||||
ledges quickly | ||||
and the receive window is large enough compared to the RT | ||||
T. | ||||
LPP could be in the range of 5 to 90 with a proposed 15. | ||||
A smaller value provides faster feedback at the cost of the small overhead of mo | ||||
re PSNP messages. | ||||
PartialSNPInterval could be in the range 50ms to 500ms wi | ||||
th a proposed 200ms. | ||||
One may distinguish the value used locally from the value | ||||
signaled to the sender. The value used locally benefits from being small but is | ||||
not expected | ||||
to be the main parameter to improve performance. It depen | ||||
ds on how fast the IS-IS flooding process may be scheduled by the CPU. It's safe | ||||
as, even when the | ||||
receiver CPU is busy, it will naturally delay its acknowl | ||||
edgments which provides a negative feedback loop. The value advertised to the se | ||||
nder should be | ||||
conservative (high enough) as this value could be used by | ||||
the sender to send some LSPs rather than keep waiting for acknowledgments. Rece | ||||
ive Window in the range | ||||
of 30 to 200 with a proposed 60. In general, the larger t | ||||
he better the performance on links with high RTT. The higher the number and the | ||||
higher the | ||||
number of IS-IS neighbors, the higher the use of control | ||||
plane memory so it's mostly dependent on the amount of memory which may be dedic | ||||
ated to IS-IS flooding | ||||
and the number of IS-IS neighbors. From a memory usage pe | ||||
rspective, a priori, one could use the same value as the TCP receive window, but | ||||
the value | ||||
advertised should not be higher than the buffer of the "s | ||||
ocket" used.</t> | ||||
</section> | ||||
<section anchor="sec_determining_values_dynamic" title="D | Receive Window could be in the range of 30 to 200 with a | |||
ynamic values"> | proposed value of 60. In general, the larger the better the performa | |||
<t>The values may be updated dynamically, to reflect the | nce on | |||
relative change of load on the receiver, by improving the values when the receiv | links with high RTT. The higher that number and the higher the | |||
er load is getting lower and degrading the values when the receiver load is gett | number of IS-IS neighbors, the higher the use of control plane | |||
ing higher. For example, if LSPs are regularly dropped, or if the queue regularl | memory, so it's mostly dependent on the amount of memory, which may | |||
y comes close to being filled, then the values may be too high. On the other han | be dedicated to IS-IS flooding and the number of IS-IS | |||
d, if the queue is barely used (by IS-IS), then the values may be too low.</t> | neighbors. From a memory usage perspective (a priori), one could | |||
<t>The values may also be absolute value reflecting relev | use the same value as the TCP receive window, but the value | |||
ant average hardware resources that are monitored, typically the amount of buffe | advertised should not be higher than the buffer of the "socket" | |||
r space used by incoming LSPs. In this case, care must be taken when choosing th | used.</t> | |||
e parameters influencing the values in order to avoid undesirable or unstable fe | </section> | |||
edback loops. It would be undesirable to use a formula that depends, for example | <section anchor="sec_determining_values_dynamic" numbered="true" toc=" | |||
, on an active measurement of the instantaneous CPU load to modify the values ad | default"> | |||
vertised in the Flooding Parameters TLV. This could introduce feedback into the | <name>Dynamic Values</name> | |||
IGP flooding process that could produce unexpected behavior.</t> | <t>To reflect the relative change of load on the receiver, the | |||
</section> | values may be updated dynamically by improving the values when the | |||
</section> | receiver load is getting lower and by degrading the values when the | |||
receiver load is getting higher. For example, if LSPs are | ||||
regularly dropped, or if the queue regularly comes close to being | ||||
filled, then the values may be too high. On the other hand, if the | ||||
queue is barely used (by IS-IS), then the values may be too | ||||
low.</t> | ||||
<section anchor="OPS_Considerations" title="Operation considerati | <t>Alternatively, the values may be computed | |||
ons"> | to reflect the relevant average hardware resources, e.g., | |||
<t>As discussed in <xref target="TLVoperationLAN"/>, the | the amount of buffer space used by incoming | |||
solution is more effective on point-to-point adjacencies. Hence a broadcast int | LSPs. In this case, care must be taken when choosing the | |||
erface (e.g., Ethernet) only shared by two IS-IS neighbors should be configured | parameters influencing the values in order to avoid undesirable or | |||
as point-to-point in order to have more effective flooding.</t> | unstable feedback loops. For example, it would be undesirable to | |||
</section> | use a formula that depends on an active measurement of the | |||
</section> | instantaneous CPU load to modify the values advertised in the | |||
<section anchor="TxSide" title="Transmitter Based Congestion Control Appr | Flooding Parameters TLV. This could introduce feedback into the | |||
oach"> | IGP flooding process that could produce unexpected behavior.</t> | |||
<t>This section describes an approach to congestion control alg | </section> | |||
orithm based on | </section> | |||
performance measured by the transmitter without dependance on | <section anchor="OPS_Considerations" numbered="true" toc="default"> | |||
<name>Operational Considerations</name> | ||||
<t>As discussed in <xref target="TLVoperationLAN" | ||||
format="default"/>, the solution is more effective on point-to-point | ||||
adjacencies. Hence, a broadcast interface (e.g., Ethernet) only | ||||
shared by two IS-IS neighbors should be configured as point-to-point | ||||
in order to have more effective flooding.</t> | ||||
</section> | ||||
</section> | ||||
<section anchor="TxSide" numbered="true" toc="default"> | ||||
<name>Transmitter-Based Congestion Control Approach</name> | ||||
<t>This section describes an approach to the congestion control algorith | ||||
m based on | ||||
performance measured by the transmitter without dependence on | ||||
signaling from the receiver.</t> | signaling from the receiver.</t> | |||
<section anchor="Router-arch" numbered="true" toc="default"> | ||||
<section anchor="Router-arch" title="Router Architecture Discussion"> | <name>Router Architecture Discussion</name> | |||
<t>(The following description is an abstraction - implementation | <t>Note that the following description is an abstraction; | |||
details vary.)</t> | implementation details vary.</t> | |||
<t>Existing router architectures may utilize multiple input queues. | <t>Existing router architectures may utilize multiple input queues. | |||
On a given line card, IS-IS PDUs from multiple interfaces may be | On a given line card, IS-IS PDUs from multiple interfaces may be | |||
placed in a rate-limited input queue. This queue may be dedicated to | placed in a rate-limited input queue. This queue may be dedicated to | |||
IS-IS PDUs or may be shared with other routing related packets.</t> | IS-IS PDUs or may be shared with other routing related packets.</t> | |||
<t>The input queue may then pass IS-IS PDUs to a "punt queue", which | ||||
<t>The input queue may then pass IS-IS PDUs to a "punt queue" which | ||||
is used to pass PDUs from the data plane to the control plane. The | is used to pass PDUs from the data plane to the control plane. The | |||
punt queue typically also has controls on its size and the rate at | punt queue typically also has controls on its size and the rate at | |||
which packets will be punted.</t> | which packets will be punted.</t> | |||
<t>An input queue in the control plane may then be used to assemble | <t>An input queue in the control plane may then be used to assemble | |||
PDUs from multiple linecards, separate the IS-IS PDUs from other | PDUs from multiple line cards, separate the IS-IS PDUs from other | |||
types of packets, and place the IS-IS PDUs on an input queue | types of packets, and place the IS-IS PDUs in an input queue | |||
dedicated to the IS-IS protocol.</t> | dedicated to the IS-IS protocol.</t> | |||
<t>The IS-IS input queue then separates the IS-IS PDUs and directs | <t>The IS-IS input queue then separates the IS-IS PDUs and directs | |||
them to an instance-specific processing queue. The instance-specific | them to an instance-specific processing queue. The instance-specific | |||
processing queue may then further separate the IS-IS PDUs | processing queue may then further separate the IS-IS PDUs by type | |||
by type (IIHs, SNPs, and LSPs) so that separate processing threads | (IIHs, SNPs, and LSPs) so that separate processing threads with | |||
with varying priorities may be employed to process the incoming | varying priorities may be employed to process the incoming PDUs.</t> | |||
PDUs.</t> | ||||
<t>In such an architecture, it may be difficult for IS-IS in the | <t>In such an architecture, it may be difficult for IS-IS in the | |||
control plane to determine what value should be advertised as a | control plane to determine what value should be advertised as a | |||
receive window.</t> | receive window.</t> | |||
<t>The following section describes an approach to congestion control | <t>The following section describes an approach to congestion control | |||
based on performance measured by the transmitter without dependance | based on performance measured by the transmitter without dependence | |||
on signaling from the receiver.</t> | on signaling from the receiver.</t> | |||
</section> | </section> | |||
<section anchor="Ex2-tx" numbered="true" toc="default"> | ||||
<section anchor="Ex2-tx" title="Guidelines for transmitter side congesti | <name>Guidelines for Transmitter-Side Congestion Controls</name> | |||
on controls"> | <t>The approach described in this section does not depend upon | |||
<t>The approach described in this section does | direct signaling from the receiver. Instead, it adapts the | |||
not depend upon direct signaling from the receiver. Instead it | transmission rate based on measurement of the actual rate of | |||
adapts the transmission rate based on measurement of the actual | acknowledgments received.</t> | |||
rate of acknowledgments received.</t> | <t>Flow control is not used by this approach. When congestion | |||
control is necessary, it can be implemented based on knowledge of | ||||
<t>Flow control is not used by this approach. When congestion control | the current flooding rate and the current acknowledgment rate. The | |||
is necessary, it can be implemented | algorithm used is a local matter. There is no requirement to | |||
based on knowledge of the current flooding | standardize it, but there are a number of aspects that serve as | |||
rate and the current acknowledgement rate. The algorithm used is a | guidelines that can be described. Algorithms based on this approach | |||
local matter. There is no requirement to standardize it but | should follow the recommendations described below. </t> | |||
there are a number of aspects which serve as guidelines | ||||
which can be described. Algorithms based on this approach should | ||||
follow the recommendations described below. </t> | ||||
<t>A maximum LSP transmission rate (LSPTxMax) should be | <t>A maximum LSP transmission rate (LSPTxMax) should be | |||
configurable. This represents the fastest LSP transmission rate | configurable. This represents the fastest LSP transmission rate | |||
which will be attempted. This value should be applicable to all | that will be attempted. This value should be applicable to all | |||
interfaces and should be consistent network wide.</t> | interfaces and should be consistent network wide.</t> | |||
<t>When the current rate of LSP transmission (LSPTxRate) exceeds the | <t>When the current rate of LSP transmission (LSPTxRate) exceeds the | |||
capabilities of the receiver, the congestion control algorithm needs t o | capabilities of the receiver, the congestion control algorithm needs t o | |||
quickly and aggressively reduce the LSPTxRate. Slower | quickly and aggressively reduce the LSPTxRate. Slower | |||
responsiveness is likely to result in a larger number of | responsiveness is likely to result in a larger number of | |||
retransmissions which can introduce much longer delays in | retransmissions, which can introduce much longer delays in | |||
convergence.</t> | convergence.</t> | |||
<t>Dynamic increase of the rate of LSP transmission (LSPTxRate), | ||||
<t>Dynamic increase of the rate of LSP transmission (LSPTxRate) | i.e., making the rate faster, should be done less aggressively and on | |||
(i.e., faster) should be done less aggressively and only be | ly be | |||
done when the neighbor has demonstrated its ability to sustain the | done when the neighbor has demonstrated its ability to sustain the | |||
current LSPTxRate.</t> | current LSPTxRate.</t> | |||
<t>The congestion control algorithm should not assume that the receive | ||||
<t>The congestion control algorithm should not assume the receive | ||||
performance of a neighbor is static, i.e., it should handle | performance of a neighbor is static, i.e., it should handle | |||
transient conditions which result in a slower or faster receive rate | transient conditions that result in a slower or faster receive rate | |||
on the part of a neighbor.</t> | on the part of a neighbor.</t> | |||
<t>The congestion control algorithm should consider the expected | ||||
<t>The congestion control algorithm should consider the expected delay | delay time in receiving an acknowledgment. Therefore, it | |||
time in receiving an acknowledgment. It therefore incorporates the | incorporates the neighbor partialSNPInterval (<xref | |||
neighbor partialSNPInterval (<xref target="partialSNPI"/>) to help | target="partialSNPI" format="default"/>) to help determine whether | |||
determine whether acknowlegments are keeping pace with the rate of | acknowledgments are keeping pace with the rate of LSPs | |||
LSPs transmitted. In the absence of an advertisement of | transmitted. In the absence of an advertisement of | |||
partialSNPInterval, a locally configured value can be used.</t> | partialSNPInterval, a locally configured value can be used.</t> | |||
</section> | </section> | |||
</section> | </section> | |||
</section> | </section> | |||
<section anchor="IANA_Consideration" numbered="true" toc="default"> | ||||
<name>IANA Considerations</name> | ||||
<section anchor="IANA_Consideration1" numbered="true" toc="default"> | ||||
<name>Flooding Parameters TLV</name> | ||||
<t>IANA has made the following allocation in the "IS-IS Top-Level TLV Co | ||||
depoints" registry.</t> | ||||
<section anchor="IANA_Consideration" title="IANA Considerations"> | <table align="center"> | |||
<section anchor="IANA_Consideration1" title="Flooding Parameters TLV"> | <name></name> | |||
<thead> | ||||
<t>IANA has made the following temporary allocation from the IS-IS TLV co | <tr> | |||
depoint registry. This document requests the allocation be made permanent.</t> | <th>Value</th> | |||
<figure anchor="IANA_Registration" title=''> | <th>Name</th> | |||
<preamble></preamble> | <th>IIH</th> | |||
<artwork align="center"> | <th>LSP</th> | |||
Type Description IIH LSP SNP Purge | <th>SNP</th> | |||
---- --------------------------- --- --- --- --- | <th>Purge</th> | |||
21 Flooding Parameters TLV y n y n | </tr> | |||
</artwork> | </thead> | |||
</figure> | <tbody> | |||
<tr> | ||||
<td align="center">21</td> | ||||
<td>Flooding Parameters TLV</td> | ||||
<td>y</td> | ||||
<td>n</td> | ||||
<td>y</td> | ||||
<td>n</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | </section> | |||
<section anchor="IANA_Consideration2" numbered="true" toc="default"> | ||||
<name>Registry: IS-IS Sub-TLV for Flooding Parameters TLV</name> | ||||
<t>IANA has created the following sub-TLV registry in the "IS-IS TLV Cod | ||||
epoints" registry group.</t> | ||||
<dl newline="false" spacing="compact"> | ||||
<dt>Name:</dt> <dd>IS-IS Sub-TLVs for Flooding Parameters TLV</dd> | ||||
<dt>Registration Procedure(s):</dt> <dd>Expert Review</dd> | ||||
<dt>Description:</dt> <dd>This registry defines sub-TLVs for the Flood | ||||
ing Parameters TLV (21).</dd> | ||||
<dt>Reference:</dt> <dd>RFC 9681</dd> | ||||
</dl> | ||||
<table anchor="Registry_Flooding" align="center"> | ||||
<name>Initial Sub-TLV Allocations for Flooding Parameters TLV</name> | ||||
<thead> | ||||
<tr> | ||||
<th>Type</th> | ||||
<th>Description</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td align="center">0</td> | ||||
<td>Reserved</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">1</td> | ||||
<td>LSP Burst Size</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">2</td> | ||||
<td>LSP Transmission Interval</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">3</td> | ||||
<td>LSPs per PSNP</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">4</td> | ||||
<td>Flags</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">5</td> | ||||
<td>PSNP Interval</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">6</td> | ||||
<td>Receive Window</td> | ||||
</tr> | ||||
<tr> | ||||
<td align="center">7-255</td> | ||||
<td>Unassigned</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | ||||
<section anchor="IANA_Consideration3" numbered="true" toc="default"> | ||||
<name>Registry: IS-IS Bit Values for Flooding Parameters Flags Sub-TLV</ | ||||
name> | ||||
<t>IANA has created a new registry, in the "IS-IS TLV Codepoints" regist | ||||
ry group, for assigning Flag bits advertised in the Flags sub-TLV.</t> | ||||
<dl newline="false" spacing="compact"> | ||||
<dt>Name:</dt> <dd>IS-IS Bit Values for Flooding Parameters Flags Sub- | ||||
TLV</dd> | ||||
<dt>Registration Procedure:</dt> <dd>Expert Review</dd> | ||||
<dt>Description:</dt> <dd><t>This registry defines bit values for the | ||||
Flags sub-TLV (4) advertised in the Flooding Parameters TLV (21).</t></dd> | ||||
<dt>Note:</dt><dd><t>In order to minimize encoding space, a new alloca | ||||
tion should pick the smallest available value.</t></dd> | ||||
<dt>Reference:</dt> <dd>RFC 9681</dd> | ||||
</dl> | ||||
<table anchor="Registry_Flags" align="center"> | ||||
<name>Initial Bit Allocations for Flags Sub-TLV</name> | ||||
<thead> | ||||
<tr> | ||||
<th>Bit #</th> | ||||
<th>Description</th> | ||||
</tr> | ||||
</thead> | ||||
<tbody> | ||||
<tr> | ||||
<td>0</td> | ||||
<td>Ordered acknowledgment (O-flag)</td> | ||||
</tr> | ||||
<tr> | ||||
<td>1-63</td> | ||||
<td>Unassigned</td> | ||||
</tr> | ||||
</tbody> | ||||
</table> | ||||
</section> | ||||
</section> | ||||
<section anchor="Security" toc="default" numbered="true"> | ||||
<name>Security Considerations</name> | ||||
<t>Security concerns for IS-IS are addressed in <xref target="ISO10589" | ||||
format="default"/>, <xref target="RFC5304" format="default"/>, and | ||||
<xref target="RFC5310" format="default"/>. These documents describe | ||||
mechanisms that provide for the authentication and integrity of IS-IS | ||||
PDUs, including SNPs and IIHs. These authentication mechanisms are not | ||||
altered by this document.</t> | ||||
<t>With the cryptographic mechanisms described in <xref | ||||
target="RFC5304" format="default"/> and <xref target="RFC5310" | ||||
format="default"/>, an attacker wanting to advertise an incorrect | ||||
Flooding Parameters TLV would have to first defeat these mechanisms.</t> | ||||
<t>In the absence of cryptographic authentication, as IS-IS does not run | ||||
over IP but directly over the link layer, it's considered difficult to | ||||
inject a false SNP or IIH without having access to the link layer.</t> | ||||
<t>If a false SNP or IIH is sent with a Flooding Parameters TLV set to | ||||
conservative values, the attacker can reduce the flooding speed between | ||||
the two adjacent neighbors, which can result in LSDB inconsistencies and | ||||
transient forwarding loops. However, it is not significantly different | ||||
than filtering or altering LSPs, which would also be possible with access | ||||
to the link layer. In addition, if the downstream flooding neighbor has | ||||
multiple IGP neighbors (which is typically the case for reliability or | ||||
topological reasons), it would receive LSPs at a regular speed from its | ||||
other neighbors and hence would maintain LSDB consistency.</t> | ||||
<t>If a false SNP or IIH is sent with a Flooding Parameters TLV set to | ||||
aggressive values, the attacker can increase the flooding speed, which | ||||
can either overload a node or more likely cause loss of | ||||
LSPs. However, it is not significantly different than sending many LSPs, | ||||
which would also be possible with access to the link layer, even with | ||||
cryptographic authentication enabled. In addition, IS-IS has procedures | ||||
to detect the loss of LSPs and recover.</t> | ||||
<t>This TLV advertisement is not flooded across the network but only | ||||
sent between adjacent IS-IS neighbors. This would limit the consequences | ||||
in case of forged messages and also limit the dissemination of such | ||||
information.</t> | ||||
</section> | ||||
<section anchor="IANA_Consideration2" title="Registry: IS-IS Sub-TLV for | </middle> | |||
Flooding Parameters TLV"> | <back> | |||
<t>This document creates the following sub-TLV Registry under the "IS-IS | ||||
TLV Codepoints" grouping:</t> | ||||
<t>Name: IS-IS Sub-TLVs for Flooding Parameters TLV.</t> | ||||
<t>Registration Procedure(s): Expert Review</t> | ||||
<t>Expert(s): TBD</t> | ||||
<t>Description: This registry defines sub-TLVs for the Flooding Parameter | ||||
s TLV(21).</t> | ||||
<t>Reference: This document.</t> | ||||
<texttable anchor="Registry_Flooding" title="Initial Sub-TLV allocations | ||||
for Flooding Parameters TLV"> | ||||
<ttcol align='center'>Type</ttcol> | ||||
<ttcol align='left'>Description</ttcol> | ||||
<c>0</c> | ||||
<c>Reserved</c> | ||||
<c>1</c> | ||||
<c>LSP Burst Size</c> | ||||
<c>2</c> | ||||
<c>LSP Transmission Interval</c> | ||||
<c>3</c> | ||||
<c>LSPs Per PSNP</c> | ||||
<c>4</c> | ||||
<c>Flags</c> | ||||
<c>5</c> | ||||
<c>Partial SNP Interval</c> | ||||
<c>6</c> | ||||
<c>Receive Window</c> | ||||
<c>7-255</c> | ||||
<c>Unassigned</c> | ||||
</texttable> | ||||
</section> | ||||
<section anchor="IANA_Consideration3" title="Registry: IS-IS Bit Values f | <references> | |||
or Flooding Parameters Flags Sub-TLV"> | <name>References</name> | |||
<t>This document requests IANA to create a new registry, under the "IS-IS | <references> | |||
TLV Codepoints" grouping, for assigning Flag bits advertised in the Flags sub- T | <name>Normative References</name> | |||
LV.</t> | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2 | |||
119.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.8 | ||||
174.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5 | ||||
304.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5 | ||||
310.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.6 | ||||
298.xml"/> | ||||
<t>Name: IS-IS Bit Values for Flooding Parameters Flags Sub-TLV.</t> | <reference anchor="ISO10589" target="https://www.iso.org/standard/30932. | |||
html"> | ||||
<front> | ||||
<title>Information technology - Telecommunications and information e | ||||
xchange between systems - Intermediate system to Intermediate system intra-domai | ||||
n routeing information exchange protocol for use in conjunction with the protoco | ||||
l for providing the connectionless-mode network service (ISO 8473)</title> | ||||
<author> | ||||
<organization abbrev="ISO/IEC">International Organization for Stan | ||||
dardization/International Electrotechnical Commission</organization> | ||||
</author> | ||||
<date month="Nov" year="2002"/> | ||||
</front> | ||||
<seriesInfo name="ISO/IEC" value="10589:2002"/> | ||||
<refcontent>Second Edition</refcontent> | ||||
</reference> | ||||
<t>Registration Procedure: Expert Review</t> | </references> | |||
<references> | ||||
<name>Informative References</name> | ||||
<t>Expert Review Expert(s): TBD</t> | <reference anchor="RFC9667" target="https://www.rfc-editor.org/info/rfc96 | |||
67"> | ||||
<front> | ||||
<title>Dynamic Flooding on Dense Graphs</title> | ||||
<author initials="T." surname="Li" fullname="Tony Li" role="editor"> | ||||
<organization>Juniper Networks</organization> | ||||
</author> | ||||
<author initials="P." surname="Psenak" fullname="Peter Psenak" role=" | ||||
editor"> | ||||
<organization>Cisco Systems, Inc.</organization> | ||||
</author> | ||||
<author initials="H." surname="Chen" fullname="Huaimo Chen"> | ||||
<organization>Futurewei</organization> | ||||
</author> | ||||
<author initials="L." surname="Jalil" fullname="Luay Jalil"> | ||||
<organization>Verizon</organization> | ||||
</author> | ||||
<author initials="S." surname="Dontula" fullname="Srinath Dontula"> | ||||
<organization>ATT</organization> | ||||
</author> | ||||
<date month="October" year="2024"/> | ||||
</front> | ||||
<seriesInfo name="RFC" value="9667"/> | ||||
<seriesInfo name="DOI" value="10.17487/RFC9667"/> | ||||
</reference> | ||||
<t>Description: This registry defines bit values for the Flags sub-TLV( | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
4) advertised in the Flooding Parameters TLV(21).</t> | 293.xml"/> | |||
<t>Note: In order to minimize encoding space, a new allocation should p | <xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.9 | |||
ick the smallest available value.</t> | 002.xml"/> | |||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.2 | ||||
973.xml"/> | ||||
<xi:include href="https://bib.ietf.org/public/rfc/bibxml/reference.RFC.5 | ||||
681.xml"/> | ||||
</references> | ||||
</references> | ||||
<t>Reference: This document.</t> | <section anchor="Acknowledgments" numbered="false" toc="default"> | |||
<name>Acknowledgments</name> | ||||
<t>The authors would like to thank <contact fullname="Henk Smit"/>, | ||||
<contact fullname="Sarah Chen"/>, <contact fullname="Xuesong Geng"/>, | ||||
<contact fullname="Pierre Francois"/>, <contact fullname="Hannes | ||||
Gredler"/>, <contact fullname="Acee Lindem"/>, <contact fullname="Mirja | ||||
Kühlewind"/>, <contact fullname="Zaheduzzaman Sarker"/>, and <contact | ||||
fullname="John Scudder"/> for their reviews, comments, and | ||||
suggestions.</t> | ||||
<t>The authors would like to thank <contact fullname="David Jacquet"/>, | ||||
<contact fullname="Sarah Chen"/>, and <contact fullname="Qiangzhou | ||||
Gao"/> for the tests performed on commercial implementations and for | ||||
their identification of some limiting factors.</t> | ||||
</section> | ||||
<texttable anchor="Registry_Flags" title="Initial bit allocations for Fla | <section anchor="Contributors" numbered="false" toc="default"> | |||
gs Sub-TLV"> | <name>Contributors</name> | |||
<ttcol align='center'>Bit #</ttcol> | <t>The following people gave substantial contributions to the content of t | |||
<ttcol align='left'>Description</ttcol> | his document and should be considered as coauthors:</t> | |||
<c>0</c> | ||||
<c>Ordered acknowledgement (O-flag)</c> | ||||
<c>1-63</c> | ||||
<c>Unassigned</c> | ||||
</texttable> | ||||
</section> | <contact fullname="Jayesh J"> | |||
</section> | <organization>Ciena</organization> | |||
<address> | ||||
<email>jayesh.ietf@gmail.com</email> | ||||
</address> | ||||
</contact> | ||||
<section anchor="Security" title="Security Considerations" toc="default"> | <contact fullname="Chris Bowers"> | |||
<organization>Juniper Networks</organization> | ||||
<address> | ||||
<email>cbowers@juniper.net</email> | ||||
</address> | ||||
</contact> | ||||
<t> | <contact fullname="Peter Psenak"> | |||
Security concerns for IS-IS are addressed in <xref target="ISO10589"/> | <organization>Cisco Systems</organization> | |||
, | <address> | |||
<xref target="RFC5304"/> | <email>ppsenak@cisco.com</email> | |||
, and <xref target="RFC5310"/> | </address> | |||
. These documents | </contact> | |||
describe mechanisms that provide for the authentication and integrity of IS- | ||||
IS | ||||
PDUs, including SNPs and IIHs. These authentication mechanisms are not | ||||
altered by this document.</t> | ||||
<t> | ||||
With the cryptographic mechanisms described in <xref target="RFC5304"/> | ||||
and <xref target="RFC5310"/> | ||||
, an attacker wanting to advertise an incorrect | ||||
Flooding Parameters TLV would have to first defeat these mechanisms. | ||||
</t> | ||||
<t>In the absence of cryptographic authentication, as IS-IS does not run over IP | ||||
but directly over the link layer, it's considered difficult to inject false SNP | ||||
/IIH without having access to the link layer.</t> | ||||
<t>If a false SNP/IIH is sent with a Flooding Parameters TLV set to conservative | ||||
values, the attacker can reduce the flooding speed between the two adjacent nei | ||||
ghbors which can result in LSDB inconsistencies and transient forwarding loops. | ||||
However, it is not significantly different than filtering or altering LSPs which | ||||
would also be possible with access to the link layer. In addition, if the downs | ||||
tream flooding neighbor has multiple IGP neighbors, which is typically the case | ||||
for reliability or topological reasons, it would receive LSPs at a regular speed | ||||
from its other neighbors and hence would maintain LSDB consistency.</t> | ||||
<t>If a false SNP/IIH is sent with a Flooding Parameters TLV set to aggressive v | ||||
alues, the attacker can increase the flooding speed which can either overload a | ||||
node or more likely generate loss of LSPs. However, it is not significantly diff | ||||
erent than sending many LSPs which would also be possible with access to the lin | ||||
k layer, even with cryptographic authentication enabled. In addition, IS-IS has | ||||
procedures to detect the loss of LSPs and recover.</t> | ||||
<t>This TLV advertisement is not flooded across the network but only sent betwee | ||||
n adjacent IS-IS neighbors. This would limit the consequences in case of forged | ||||
messages, and also limits the dissemination of such information.</t> | ||||
</section> | ||||
<section anchor="Contributors" title="Contributors"> | </section> | |||
<t>The following people gave a substantial contribution to the content of this d | ||||
ocument and should be considered as coauthors:<list style="symbols"> | ||||
<t>Jayesh J, Ciena, jayesh.ietf@gmail.com</t> | ||||
<t>Chris Bowers, Juniper Networks, cbowers@juniper.net</t> | ||||
<t>Peter Psenak, Cisco Systems, ppsenak@cisco.com</t> | ||||
</list></t> | ||||
</section> | ||||
<section anchor="Acknowledgments" title="Acknowledgments"> | </back> | |||
<t>The authors would like to thank Henk Smit, Sarah Chen, Xuesong Geng, Pierre F | ||||
rancois, Hannes Gredler, Acee Lindem, Mirja Kuhlewind, Zaheduzzaman Sarker and J | ||||
ohn Scudder for their reviews, comments and suggestions.</t> | ||||
<t>The authors would like to thank David Jacquet, Sarah Chen, and Qiangzhou Gao | ||||
for the tests performed on commercial implementations and their identification o | ||||
f some limiting factors.</t> | ||||
</section> | ||||
</middle> | ||||
<back> | ||||
<references title="Normative References"> | ||||
<?rfc include="reference.RFC.2119"?> | ||||
<?rfc include="reference.RFC.8174"?> | ||||
<?rfc include="reference.RFC.5304"?> | ||||
<?rfc include="reference.RFC.5310"?> | ||||
<?rfc include="reference.RFC.6298"?> | ||||
<reference anchor="ISO10589"> | ||||
<front> | ||||
<title>Intermediate system to Intermediate system intra-domain routeing i | ||||
nformation exchange protocol for use in conjunction with the protocol for provid | ||||
ing the connectionless-mode Network Service (ISO 8473)</title> | ||||
<author> | ||||
<organization abbrev="ISO">International Organization for Standar | ||||
dization</organization> | ||||
</author> | ||||
<date month="Nov" year="2002"/> | ||||
</front> | ||||
<seriesInfo name="ISO/IEC" value="10589:2002, Second Edition"/> | ||||
</reference> | ||||
</references> | ||||
<references title="Informative References"> | ||||
<?rfc include="reference.I-D.ietf-lsr-dynamic-flooding"?> | ||||
<?rfc include="reference.RFC.9293"?> | ||||
<?rfc include="reference.RFC.9002"?> | ||||
<?rfc include="reference.RFC.2973"?> | ||||
<?rfc include="reference.RFC.5681"?> | ||||
</references> | ||||
<section anchor="authors-notes" title="Changes / Author Notes"> | ||||
<t>[RFC Editor: Please remove this section before publication]</t> | ||||
<t>IND 00: Initial version.</t> | ||||
<t>WG 00: No change.</t> | ||||
<t>WG 01: IANA allocated code point.</t> | ||||
<t>WG 02: No change.</t> | ||||
<t>WG 03: <list style="symbols"> | ||||
<t>Pacing section added (taken from RFC 9002).</t> | ||||
<t>Some text borrowed from RFC 9002 (QUIC Loss Detection and Congestion C | ||||
ontrol).</t> | ||||
<t>Considerations on the special role of the DIS.</t> | ||||
<t>Editorial changes.</t> | ||||
</list></t> | ||||
<t>WG 04: Update IANA section as per IANA editor comments (2023-03-23).</t> | ||||
<t>WG 06: AD review.</t> | ||||
</section> | ||||
</back> | ||||
</rfc> | </rfc> | |||
End of changes. 78 change blocks. | ||||
1194 lines changed or deleted | 1156 lines changed or added | |||
This html diff was produced by rfcdiff 1.48. |