blogs.oracle.com.html - webdump_tests - Testfiles for webdump
 (HTM) git clone git://git.codemadness.org/webdump_tests
 (DIR) Log
 (DIR) Files
 (DIR) Refs
 (DIR) README
       ---
       blogs.oracle.com.html (134476B)
       ---
            1 <!DOCTYPE html>
            2 <html lang="en-US" class="no-js">
            3 
            4 <head>
            5         <!-- Avoid FOUC issue in FF with async loading of style sheets -->
            6         <style>
            7                 body {
            8                         opacity: 1;
            9                 }
           10         </style>
           11         <title>Syscall latency...  and some uses of speculative execution</title>
           12         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
           13         <meta charset="utf-8">
           14         <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
           15         <link rel="alternate" type="application/rss+xml" href="https://blogs.oracle.com/rss">
           16         <!-- $meta_tags -->
           17         <meta name="country" content="">
           18         <meta name="contenttype_id" content="WM147046">
           19         <meta name="description" content="An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. ">
           20         <link rel="canonical" href="">
           21         <meta name="host_name" content="https://blogs.oracle.com">
           22         <meta name="title" content="Syscall latency...  and some uses of speculative execution">
           23         <meta name="blog_name" content="Oracle Linux Blog">
           24         <meta name="author" content="Ankur">
           25         <meta name="keywords" content="Technologies,Linux Kernel Development">
           26         <meta name="publish_date" content="September 12, 2023">
           27         <meta name="siteid" content="us">
           28         <meta name="Language" content="">
           29         <meta name="robots" content="index, follow">        
           30         <meta name="audience" content="">
           31         <meta name="product" content="">
           32         <meta property="og:type" content="blog">
           33         <meta property="og:title" content="Syscall latency...  and some uses of speculative execution">
           34         <meta property="og:image" content="https://blogs.oracle.com/content/published/api/v1.1/assets/CONTCF8836A82B014903A5283C76DE901346/Medium?format=jpg&channelToken=3189ef66cf584820b5b19e6b10792d6f">
           35         <meta property="og:description" content="An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. ">
           36         <meta property="og:url" content="https://blogs.oracle.com/linux/post/syscall-latency">
           37         <meta name="category" content="Technologies,Linux Kernel Development">
           38         <meta name="twitter:card" content="summary_large_image">
           39         <meta name="twitter:title" content="" />
           40         <meta name="twitter:description" content="" />
           41         <meta name="twitter:image" content="" />
           42         <meta name="google-site-verification" content="OVRFC0CuVBZNzlfzelWzFIN7D4gCrVfzsfmMWvteKHs" />
           43         <link rel="alternate" type="application/rss+xml" class="rss-link" title="Oracle Blogs"
           44                 href="https://blogs.oracle.com/rss">
           45 
           46 
           47         <!-- <link data-wscss href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/blogs-style.css" rel="preload" as="style" onload="this.rel='stylesheet';" onerror="this.rel='stylesheet'"> -->
           48         <link rel="preload" onload="this.rel='stylesheet'" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/blogs-style.css" as="style" />
           49         <link rel="preload" onload="this.rel='stylesheet'" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/css/oracle-style.css" as="style" />
           50 
           51         <!-- favicon -->
           52         <link rel="icon" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/logo.ico" type="image/x-icon" />
           53         <link rel="shortcut icon" href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/logo.ico" type="image/x-icon" />
           54 
           55         <link rel="preconnect" href="https://c.go-mpulse.net" crossorigin />
           56         <link rel="preconnect" href="https://s.go-mpulse.net" crossorigin />
           57         <link rel="preconnect" href="https://tms.oracle.com" crossorigin />
           58         <link rel="preconnect" href="https://www.facebook.com" crossorigin />
           59         <link rel="preconnect" href="https://connect.facebook.net" crossorigin />
           60         <link rel="preconnect" href="https://www.oracle.com" crossorigin />
           61         <link rel="preconnect" href="https://consent.trustarc.com" crossorigin />
           62         <link rel="preconnect" href="https://www.oracleimg.com" crossorigin />
           63         <link rel="preconnect" href="https://oracle.112.2o7.net" crossorigin />
           64         <link rel="preconnect" href="https://trial-eum-clientnsv4-s.akamaihd.net" crossorigin />
           65         <link rel="preconnect" href="https://trial-eum-clienttons-s.akamaihd.net" crossorigin />
           66         <link rel="preconnect" href="https://d.oracleinfinity.io" crossorigin />
           67         <link rel="preconnect" href="https://www.googletagmanager.com" crossorigin />
           68 
           69         <link rel="dns-prefetch" href="https://static.ocecdn.oraclecloud.com" />
           70         <link rel="dns-prefetch" href="https://c.go-mpulse.net" />
           71         <link rel="dns-prefetch" href="https://s.go-mpulse.net" />
           72         <link rel="dns-prefetch" href="https://tms.oracle.com" />
           73         <link rel="dns-prefetch" href="https://www.facebook.com" />
           74         <link rel="dns-prefetch" href="https://connect.facebook.net" />
           75         <link rel="dns-prefetch" href="https://www.oracle.com" />
           76         <link rel="dns-prefetch" href="https://consent.trustarc.com" />
           77         <link rel="dns-prefetch" href="https://www.oracleimg.com" />
           78         <link rel="dns-prefetch" href="https://oracle.112.2o7.net" />
           79         <link rel="dns-prefetch" href="[https://trial-eum-clientnsv4-s.akamaihd.net" />
           80         <link rel="dns-prefetch" href="https://trial-eum-clienttons-s.akamaihd.net" />
           81         <link rel="dns-prefetch" href="https://d.oracleinfinity.io" />
           82         <link rel="dns-prefetch" href="https://www.googletagmanager.com" />
           83 
           84         <script type="text/javascript" src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/jquery/jquery-min.js"
           85                 onload="$('head link[data-reqjq][rel=preload]').each(function(){var a = document.createElement('script');a.async=false;a.src=$(this).attr('href');this.parentNode.insertBefore(a, this);});$(function(){$('script[data-reqjq][data-src]').each(function(){this.async=true;this.src=$(this).data('src');});});"></script>
           86 
           87         <!-- <script type="text/javascript" src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js"></script> -->
           88 
           89 
           90         <!--<link data-wsjs data-reqjq href="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js" rel="preload" as="script">->
           91 
           92         <script type="text/javascript" src="http://webstandards.us.oracle.com:9292/global_assets_v22.9.3/assets/js/redwood-blogs.js"></script> -->
           93 
           94 
           95         <script id="scsRenderInfo" type="application/json">{&quot;sitePrefix&quot;:&quot;..&#x2F;&quot;,&quot;pageModel&quot;:{&quot;properties&quot;:{&quot;title&quot;:&quot;Blog Theme - Details&quot;,&quot;pageLayout&quot;:&quot;post-detail.html&quot;,&quot;mobileLayout&quot;:&quot;&quot;,&quot;pageDescription&quot;:&quot;&quot;,&quot;keywords&quot;:&quot;&quot;,&quot;hideFromSearchEngines&quot;:false,&quot;styles&quot;:[],&quot;header&quot;:&quot; &quot;,&quot;footer&quot;:&quot;&quot;,&quot;noIndex&quot;:false,&quot;noFollow&quot;:false,&quot;noArchive&quot;:false,&quot;noSnippet&quot;:false,&quot;isCobrowseEnabled&quot;:false,&quot;overrideWebAnalytics&quot;:false,&quot;webAnalyticsScript&quot;:null},&quot;slots&quot;:{&quot;post-id&quot;:{&quot;components&quot;:[&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true},&quot;blog-search1&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;homepage-banner&quot;:{&quot;components&quot;:[&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;},&quot;search&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;recent-posts&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;category-id&quot;:{&quot;components&quot;:[],&quot;grid&quot;:&quot;&quot;},&quot;blogs-category-nav&quot;:{&quot;components&quot;:[&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true},&quot;Next-Previous-Posts&quot;:{&quot;components&quot;:[&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789&quot;],&quot;grid&quot;:&quot;&lt;div class=\&quot;scs-row\&quot;&gt;&lt;div class=\&quot;scs-col\&quot; style=\&quot;width: 100%;\&quot;&gt;&lt;div id=\&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789\&quot;&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&lt;&#x2F;div&gt;&quot;,&quot;preRenderedByController&quot;:true}},&quot;componentInstances&quot;:{&quot;c30bb2b5-2186-4cd6-aeb8-2f23c0d9360c&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;componentName&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;default&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;&quot;,&quot;contentPlaceholder&quot;:false,&quot;contentTypes&quot;:[],&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:{},&quot;componentConfig&quot;:{&quot;id&quot;:&quot;Blogs-Email-Subscription&quot;,&quot;settingsData&quot;:{&quot;settingsHeight&quot;:0,&quot;settingsWidth&quot;:0,&quot;settingsRenderOption&quot;:&quot;none&quot;,&quot;componentLayouts&quot;:[],&quot;styles&quot;:[{&quot;name&quot;:&quot;Medium Green&quot;,&quot;class&quot;:&quot;Ora-Paragraph-Banner-default-style&quot;},{&quot;name&quot;:&quot;Dark Green&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-dark-green&quot;},{&quot;name&quot;:&quot;Orange&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-orange&quot;},{&quot;name&quot;:&quot;Teal&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-teal&quot;},{&quot;name&quot;:&quot;Medium Teal&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-medium-teal&quot;},{&quot;name&quot;:&quot;Blue&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-blue&quot;},{&quot;name&quot;:&quot;Medium Brown&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-medium-brown&quot;},{&quot;name&quot;:&quot;Granite&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-granite&quot;},{&quot;name&quot;:&quot;Ecru&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-ecru&quot;},{&quot;name&quot;:&quot;Fog Blue&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-fog-blue&quot;},{&quot;name&quot;:&quot;Yellow&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-yellow&quot;},{&quot;name&quot;:&quot;Dark Brown&quot;,&quot;class&quot;:&quot;ora-paragraph-banner-dark-brown&quot;}],&quot;triggers&quot;:[],&quot;actions&quot;:[]}},&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:false,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;nestedComponents&quot;:[{&quot;id&quot;:&quot;oraParagraphBannerCtaText&quot;,&quot;type&quot;:&quot;scs-button&quot;,&quot;data&quot;:{&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;styleClass&quot;:&quot;scs-button-secondary-style&quot;,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;width&quot;:0}}],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:false,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0}},&quot;a37b49d5-e11f-4e1f-a5e0-fd37af71a288&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;scs-contentplaceholder&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;&quot;,&quot;componentName&quot;:&quot;&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;Blogs-Redwood-Post-Detail-Layout&quot;,&quot;contentPlaceholder&quot;:true,&quot;contentTypes&quot;:[&quot;Blog-Post&quot;,&quot;Syndicated-Blog-Post&quot;],&quot;contentTypeDisplayName&quot;:&quot;&quot;,&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:&quot;&quot;,&quot;componentConfig&quot;:&quot;&quot;,&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;105&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:true,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true},&quot;f62eb3cd-6ac8-407d-9e06-69cbbc8d821e&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;Blogs-Redwood-Category-Nav&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;sampleComp&quot;,&quot;componentName&quot;:&quot;Blogs-Redwood-Category-Nav&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;default&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;&quot;,&quot;contentPlaceholder&quot;:false,&quot;contentTypes&quot;:[],&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:{&quot;taxonomyId&quot;:&quot;6BC2FEFCC79B49D7A757708E6D9BE2CB&quot;},&quot;componentConfig&quot;:{&quot;id&quot;:&quot;sample-component&quot;,&quot;settingsData&quot;:{&quot;settingsHeight&quot;:240,&quot;settingsWidth&quot;:300,&quot;settingsRenderOption&quot;:&quot;dialog&quot;,&quot;componentLayouts&quot;:[{&quot;name&quot;:&quot;default&quot;,&quot;displayName&quot;:&quot;IMAGE_LEFT_LAYOUT&quot;},{&quot;name&quot;:&quot;right&quot;,&quot;displayName&quot;:&quot;IMAGE_RIGHT_LAYOUT&quot;},{&quot;name&quot;:&quot;top&quot;,&quot;displayName&quot;:&quot;IMAGE_TOP_LAYOUT&quot;}],&quot;styles&quot;:[{&quot;name&quot;:&quot;Bold&quot;,&quot;class&quot;:&quot;sample-component-bold-style&quot;},{&quot;name&quot;:&quot;Italic&quot;,&quot;class&quot;:&quot;sample-component-italic-style&quot;}],&quot;triggers&quot;:[{&quot;triggerName&quot;:&quot;imageClicked&quot;,&quot;triggerDescription&quot;:&quot;Image clicked&quot;,&quot;triggerPayload&quot;:[{&quot;name&quot;:&quot;payloadData&quot;,&quot;displayName&quot;:&quot;Trigger Payload Data&quot;}]}],&quot;actions&quot;:[{&quot;actionName&quot;:&quot;setImageWidth&quot;,&quot;actionDescription&quot;:&quot;Update the image width&quot;,&quot;actionPayload&quot;:[{&quot;name&quot;:&quot;imageWidth&quot;,&quot;description&quot;:&quot;Image Width in pixels&quot;,&quot;type&quot;:{&quot;ojComponent&quot;:{&quot;component&quot;:&quot;ojInputText&quot;}},&quot;value&quot;:&quot;&quot;}]}]}},&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:false,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[{&quot;id&quot;:&quot;imageId&quot;,&quot;type&quot;:&quot;scs-image&quot;,&quot;data&quot;:{&quot;imageUrl&quot;:&quot;[!--$SCS_DIST_FOLDER--]&#x2F;renderer&#x2F;app&#x2F;sdk&#x2F;images&#x2F;sample-image.png&quot;,&quot;marginBottom&quot;:0,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:20,&quot;marginTop&quot;:0}},{&quot;id&quot;:&quot;titleId&quot;,&quot;type&quot;:&quot;scs-title&quot;,&quot;data&quot;:{&quot;userText&quot;:&quot;&lt;div&gt;Local Component&lt;&#x2F;div&gt;&quot;,&quot;fontColor&quot;:&quot;#000000&quot;,&quot;fontFamily&quot;:&quot;&#x27;Helvetica Neue Light&#x27;, Helvetica, Arial, sans-serif&quot;,&quot;fontSize&quot;:20,&quot;marginBottom&quot;:14,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;useStyleClass&quot;:&quot;false&quot;}},{&quot;id&quot;:&quot;paragraphId&quot;,&quot;type&quot;:&quot;scs-paragraph&quot;,&quot;data&quot;:{&quot;userText&quot;:&quot;&lt;p style=\&quot;line-height:1.4em;margin-bottom:4px;\&quot;&gt;As a page author, you can edit the content and settings for this component. To change settings, including triggers and actions, click the component menu and choose Settings.&lt;&#x2F;p&gt;&lt;p style=\&quot;line-height:1.4em;\&quot;&gt;As a component developer, you can change the component and its defaults (such as this text) either by working with the files directly through the components browser or by using the desktop app to work with the files on your local system.&lt;&#x2F;p&gt;&quot;,&quot;fontColor&quot;:&quot;#333333&quot;,&quot;fontFamily&quot;:&quot;&#x27;Helvetica Neue Regular&#x27;, Helvetica, Arial, sans-serif&quot;,&quot;fontSize&quot;:14,&quot;marginBottom&quot;:10,&quot;marginLeft&quot;:0,&quot;marginRight&quot;:0,&quot;marginTop&quot;:0,&quot;useStyleClass&quot;:&quot;false&quot;}}],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true},&quot;a057a3dc-2397-4b35-88dc-e9904a3f1789&quot;:{&quot;type&quot;:&quot;scs-component&quot;,&quot;id&quot;:&quot;scs-contentplaceholder&quot;,&quot;data&quot;:{&quot;actions&quot;:&quot;&quot;,&quot;alignment&quot;:&quot;fill&quot;,&quot;assets&quot;:&quot;&quot;,&quot;borderColor&quot;:&quot;#808080&quot;,&quot;borderRadius&quot;:0,&quot;borderStyle&quot;:&quot;none&quot;,&quot;borderWidth&quot;:1,&quot;componentId&quot;:&quot;&quot;,&quot;componentName&quot;:&quot;&quot;,&quot;componentFactory&quot;:&quot;&quot;,&quot;componentLayout&quot;:&quot;&quot;,&quot;contentId&quot;:&quot;&quot;,&quot;contentLayoutCategory&quot;:&quot;Blogs-Redwood-Next-Prev-Post&quot;,&quot;contentPlaceholder&quot;:true,&quot;contentTypes&quot;:[&quot;Blog-Post&quot;,&quot;Syndicated-Blog-Post&quot;],&quot;contentTypeDisplayName&quot;:&quot;&quot;,&quot;contentTypeCategory&quot;:&quot;ContentType&quot;,&quot;contentViewing&quot;:&quot;&quot;,&quot;customRenderComplete&quot;:false,&quot;customSettingsData&quot;:&quot;&quot;,&quot;componentConfig&quot;:&quot;&quot;,&quot;description&quot;:&quot;&quot;,&quot;detailPageId&quot;:&quot;105&quot;,&quot;height&quot;:&quot;&quot;,&quot;initialized&quot;:true,&quot;isCaaSLayout&quot;:true,&quot;linkType&quot;:&quot;scs-link-action&quot;,&quot;marginBottom&quot;:5,&quot;marginLeft&quot;:5,&quot;marginRight&quot;:5,&quot;marginTop&quot;:5,&quot;nestedComponents&quot;:[],&quot;renderOnAccess&quot;:&quot;&quot;,&quot;styleClass&quot;:&quot;&quot;,&quot;styleClassName&quot;:&quot;&quot;,&quot;seeded&quot;:true,&quot;useStyleClass&quot;:&quot;true&quot;,&quot;visible&quot;:true,&quot;visibleOnMobile&quot;:&quot;&quot;,&quot;visibleNestedComponents&quot;:[],&quot;width&quot;:0},&quot;preRenderedByController&quot;:true}}},&quot;navigationCurr&quot;:105}</script>
           96 <script id="scsRenderObject" type="text/javascript">var require = {waitSeconds: 0};</script>
           97         
           98          
           99 
          100         <script type="text/javascript">
          101                 window.SCSMacros = window.SCSMacros || {};
          102                 var url = window.location.href.split('?')[0];
          103                 var slug = url.substring(url.lastIndexOf('/') + 1);
          104                 //var slug = window.location.href.substring(window.location.href.lastIndexOf('/') + 1);
          105                 window.SCSMacros.getSlugMacro = slug;
          106         </script>
          107 
          108 
          109         <script type="text/javascript">
          110                 function expand() {
          111                         var x = document.getElementsByClassName("u03-collapsed");
          112                         var y = document.getElementsByClassName("u03-expanded");
          113                         for (var i = 0; i < x.length; i += 1) {
          114                                 x[i].style.display = 'none';
          115                         }
          116                         for (var i = 0; i < y.length; i += 1) {
          117                                 y[i].style.display = 'inline';
          118                         }
          119                 }
          120 
          121                 function collapse() {
          122                         var x = document.getElementsByClassName("u03-collapsed");
          123                         var y = document.getElementsByClassName("u03-expanded");
          124                         for (var i = 0; i < x.length; i += 1) {
          125                                 x[i].style.display = 'inline';
          126                         }
          127                         for (var i = 0; i < y.length; i += 1) {
          128                                 y[i].style.display = 'none';
          129                         }
          130                 }
          131         </script>
          132         <!--DTM/Launch embed code - Header -->
          133 
          134 
          135                               <script>!function(e){var n="https://s.go-mpulse.net/boomerang/";if("False"=="True")e.BOOMR_config=e.BOOMR_config||{},e.BOOMR_config.PageParams=e.BOOMR_config.PageParams||{},e.BOOMR_config.PageParams.pci=!0,n="https://s2.go-mpulse.net/boomerang/";if(window.BOOMR_API_key="G52AM-AGLAF-9JTSA-TBAP5-PCJJE",function(){function e(){if(!o){var e=document.createElement("script");e.id="boomr-scr-as",e.src=window.BOOMR.url,e.async=!0,i.parentNode.appendChild(e),o=!0}}function t(e){o=!0;var n,t,a,r,d=document,O=window;if(window.BOOMR.snippetMethod=e?"if":"i",t=function(e,n){var t=d.createElement("script");t.id=n||"boomr-if-as",t.src=window.BOOMR.url,BOOMR_lstart=(new Date).getTime(),e=e||d.body,e.appendChild(t)},!window.addEventListener&&window.attachEvent&&navigator.userAgent.match(/MSIE [67]\./))return window.BOOMR.snippetMethod="s",void t(i.parentNode,"boomr-async");a=document.createElement("IFRAME"),a.src="about:blank",a.title="",a.role="presentation",a.loading="eager",r=(a.frameElement||a).style,r.width=0,r.height=0,r.border=0,r.display="none",i.parentNode.appendChild(a);try{O=a.contentWindow,d=O.document.open()}catch(_){n=document.domain,a.src="javascript:var d=document.open();d.domain='"+n+"';void(0);",O=a.contentWindow,d=O.document.open()}if(n)d._boomrl=function(){this.domain=n,t()},d.write("<bo"+"dy onload='document._boomrl();'>");else if(O._boomrl=function(){t()},O.addEventListener)O.addEventListener("load",O._boomrl,!1);else if(O.attachEvent)O.attachEvent("onload",O._boomrl);d.close()}function a(e){window.BOOMR_onload=e&&e.timeStamp||(new Date).getTime()}if(!window.BOOMR||!window.BOOMR.version&&!window.BOOMR.snippetExecuted){window.BOOMR=window.BOOMR||{},window.BOOMR.snippetStart=(new Date).getTime(),window.BOOMR.snippetExecuted=!0,window.BOOMR.snippetVersion=12,window.BOOMR.url=n+"G52AM-AGLAF-9JTSA-TBAP5-PCJJE";var i=document.currentScript||document.getElementsByTagName("script")[0],o=!1,r=document.createElement("link");if(r.relList&&"function"==typeof r.relList.supports&&r.relList.supports("preload")&&"as"in r)window.BOOMR.snippetMethod="p",r.href=window.BOOMR.url,r.rel="preload",r.as="script",r.addEventListener("load",e),r.addEventListener("error",function(){t(!0)}),setTimeout(function(){if(!o)t(!0)},3e3),BOOMR_lstart=(new Date).getTime(),i.parentNode.appendChild(r);else t(!1);if(window.addEventListener)window.addEventListener("load",a,!1);else if(window.attachEvent)window.attachEvent("onload",a)}}(),"".length>0)if(e&&"performance"in e&&e.performance&&"function"==typeof e.performance.setResourceTimingBufferSize)e.performance.setResourceTimingBufferSize();!function(){if(BOOMR=e.BOOMR||{},BOOMR.plugins=BOOMR.plugins||{},!BOOMR.plugins.AK){var n=""=="true"?1:0,t="",a="jwstvjqx2o5kqziiqb3q-f-48999fc27-clientnsv4-s.akamaihd.net",i="false"=="true"?2:1,o={"ak.v":"36","ak.cp":"87563","ak.ai":parseInt("165106",10),"ak.ol":"0","ak.cr":11,"ak.ipv":4,"ak.proto":"h2","ak.rid":"208b9d63","ak.r":43514,"ak.a2":n,"ak.m":"dscx","ak.n":"essl","ak.bpcip":"77.165.58.0","ak.cport":51967,"ak.gh":"23.209.124.154","ak.quicv":"","ak.tlsv":"tls1.3","ak.0rtt":"","ak.csrc":"-","ak.acc":"","ak.t":"1695055991","ak.ak":"hOBiQwZUYzCg5VSAfCLimQ==vTvvCcS7yUvMFUwhArQjprHmS4SyRkG4kcqkubHf0SBAfLCipu8Z3GPJ9e1GyMuPUYO3XEA5R7RpN9uJTq4BkLQbssemRdXUWVprB4rKDAEYqcmRCULA0ABeQ3qfODyP2aGzM6krlmzdUN3sDRsfq+Nqtt/d3qfpf8l9Q/pZ/isYuJ22cZkbqcxVDrtC3ZfyKoGxC4nBvQYyr+3eRkNd8Mn4h1+thrx2qiKO5Edi+DH9ERMG1glOjsIjPS83+W6/oMyxNe216KPSO85XtLtxCjStIG+EsWHRDNn8MY7U1+NNOw66FTYI/LJSo1dChisD40fj1vqVfOoGWFXZRyiJ2eXHK16Azfupm2/vPIH7sRubbBznZp4fKYBBPzIHbMC7CN7dAPBdYhG7M0KpFWkrEWxSbdA9McNb2A+jwoaq3mo=","ak.pv":"262","ak.dpoabenc":"","ak.tf":i};if(""!==t)o["ak.ruds"]=t;var r={i:!1,av:function(n){var t="http.initiator";if(n&&(!n[t]||"spa_hard"===n[t]))o["ak.feo"]=void 0!==e.aFeoApplied?1:0,BOOMR.addVar(o)},rv:function(){var e=["ak.bpcip","ak.cport","ak.cr","ak.csrc","ak.gh","ak.ipv","ak.m","ak.n","ak.ol","ak.proto","ak.quicv","ak.tlsv","ak.0rtt","ak.r","ak.acc","ak.t","ak.tf"];BOOMR.removeVar(e)}};BOOMR.plugins.AK={akVars:o,akDNSPreFetchDomain:a,init:function(){if(!r.i){var e=BOOMR.subscribe;e("before_beacon",r.av,null,null),e("onbeacon",r.rv,null,null),r.i=!0}return this},is_complete:function(){return!0}}}}()}(window);</script></head>
          136 
          137 <body class="f20 f20v1" style="opacity:0">
          138         <script src="https://tms.oracle.com/main/prod/utag.sync.js"></script>
          139 
          140         <!-- Loading script asynchronously -->
          141         <script type="text/javascript">
          142                 (function (a, b, c, d) {
          143                         if (location.href.indexOf("tealium=dev") == -1) {
          144                                 a = 'https://tms.oracle.com/main/prod/utag.js';
          145                         } else {
          146                                 a = 'https://tms.oracle.com/main/dev/utag.js';
          147                         }
          148                         b = document; c = 'script'; d = b.createElement(c); d.src = a; d.type = 'text/java' + c; d.async = true;
          149                         a = b.getElementsByTagName(c)[0]; a.parentNode.insertBefore(d, a);
          150                 })();
          151         </script>
          152         <div class="f20w1">
          153 
          154 
          155                 <!-- U18v2 -->
          156                 <div class="u18 u18v2">
          157 
          158                         <div id="u18skip2content">
          159                                 <ul>
          160                                         <li><a id="u18skip2c" href="#maincontent">Skip to content</a></li>
          161                                         <li><a id="u18acc" href="https://www.oracle.com/corporate/accessibility/">Accessibility Policy</a></li>
          162                                 </ul>
          163                         </div>
          164 
          165                         <nav role="banner">
          166                                 <div class="u18w1 cwidth">
          167 
          168                                         <div class="u18w2">
          169                                                 <div class="u18-logo"><a href="https://blogs.oracle.com"><span>Oracle</span></a></div>
          170                                                 <div class="u18-title"><a href="" class="blog-name"></a></div>
          171                                         </div>
          172 
          173                                         <div class="u18w3">
          174 
          175                                                 <div class="u18-search">
          176                             <div class="u18-searchlink">
          177                                 <a href="#search" aria-label="Open Search Field"><span>Search</span></a>
          178                             </div>
          179                             <div class="u18-searchform">
          180                                 <a class="u18-search-action" id="u18exitsearch" href="#exitsearch"
          181                                     aria-label="Exit Search Field"><span>Exit Search Field</span></a>
          182                                 <div id="search" class="scs-slot" data-allowed-items="[ 'scs-contentsearch' ]">
          183                                 </div>
          184                                 <a class="u18-search-action" id="u18clearsearch" href="#clearsearch"
          185                                     aria-label="Clear Search Field" tabindex="0"><span>Clear Search Field</span></a>
          186                             </div>
          187                         </div>
          188                                                 <div class="u18-langdd u18-dd">
          189                                                         <div class="u18-langselect u18-ddlink">
          190                                                                 <a href="#select-language" aria-label="Select Language" role="button"><span
          191                                                                                 class="globe">Select Language</span></a>
          192                                                         </div>
          193 
          194                                                         <div class="u18-langoptions u18-menu">
          195                                                                 <ul class="languagelist" id="languagelist">
          196                                                                         <li><a href="#" class="u18v1w5v1"></a></li>
          197                                                                 </ul>
          198                                                         </div>
          199                                                 </div>
          200 
          201                                                 <div class="u18-menudd u18-dd">
          202                                                         <div class="u18-hamburger u18-ddlink">
          203                                                                 <a href="#menu" aria-label="Menu" aria-haspopup="true"
          204                                                                         role="button"><span>Menu</span></a>
          205                                                         </div>
          206                                                         <div class="u18-menuoptions u18-menu" aria-hidden="true">
          207                                                                 <div id="menu" class="slide-menu">
          208                                                                         <!-- <ul class="ul.icn-list" id="u18-subview"> -->
          209                                                                         <div id="blogs-category-nav" class="scs-slot"
          210                                                                                 data-allowed-items="[ 'Blogs-Redwood-Category-Nav' ]"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="f62eb3cd-6ac8-407d-9e06-69cbbc8d821e"><div class="scs-component-bounding-box"><!-- -->
          211 <div>
          212         <div class="scs-custom-component scs-component sampleComp-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
          213                 <div class="scs-component-content" style="width:100%;">
          214                         <div style="" class="scs-custom-component-wrapper">
          215                                 <div id="f62eb3cd-6ac8-407d-9e06-69cbbc8d821ecustomComponentDiv" data-scs-hydrate="true" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
          216                                         <div class="blogs-nav">
          217 
          218   <span class="h2-nav categories-text">CATEGORIES</span>
          219   <ul class="ul.icn-list" id="u18-subview">
          220 
          221 
          222 
          223     <li class="mainMenu hasNoMenu">
          224       <a class="categ-menu" href="../category/lnx-announcements">Announcements</a>
          225     </li>
          226     
          227 
          228 
          229 
          230     <li class="mainMenu hasNoMenu">
          231       <a class="categ-menu" href="../category/lnx-events">Events</a>
          232     </li>
          233     
          234 
          235 
          236 
          237     <li class="mainMenu hasNoMenu">
          238       <a class="categ-menu" href="../category/lnx-oracle-cloud-infrastructure">Oracle Cloud Infrastructure</a>
          239     </li>
          240     
          241 
          242 
          243 
          244     <li class="mainMenu hasNoMenu">
          245       <a class="categ-menu" href="../category/lnx-partners">Partners</a>
          246     </li>
          247     
          248 
          249 
          250 
          251     <li class="mainMenu hasNoMenu">
          252       <a class="categ-menu" href="../category/lnx-perspectives">Perspectives</a>
          253     </li>
          254     
          255 
          256 
          257     
          258     <li class="mainMenu">
          259       <a class="hasMenu active categ-menu" href="../category/lnx-ksplice">Technologies</a>
          260       <div class="sub-categories">
          261         <span class="back-btn" style="display: none;"><a href="javascript:void(0)">Back</a></span>
          262        
          263         <ul style="margin: 0 !important">
          264           <li>
          265             <a href="../category/lnx-technologies">Technologies</a>
          266           </li>
          267           <li>
          268             <a href="../category/lnx-ksplice">Ksplice</a>
          269           </li>
          270           <li>
          271             <a href="../category/lnx-linux-kernel-development">Linux Kernel Development</a>
          272           </li>
          273           <li>
          274             <a href="../category/lnx-linux-toolchain-and-tracing">Linux Toolchain &amp; Tracing</a>
          275           </li>
          276         </ul>
          277      
          278       </div>
          279     </li>
          280 
          281 
          282 
          283     <li class="mainMenu hasNoMenu">
          284       <a class="categ-menu" href="../category/lnx-training">Training</a>
          285     </li>
          286     
          287 
          288   </ul>
          289 
          290   <div class="u18-navdivider"></div>
          291   
          292  
          293   <ul>
          294     <li class="h2-nav related-content">RELATED CONTENT</li>
          295     <div id="related-content">
          296       <li><a href="#">Wim Coekaert's blog</a></li>
          297       <li><a href="#">Hardware Cert. List</a></li>
          298       <li><a href="#">ISV Catalog </a></li>
          299       <li><a href="#">Validated Configs  </a></li>
          300       <li><a href="#">Developers</a></li>
          301       <li><a href="#">GitHub</a></li>
          302       <li><a href="#">Open Source</a></li>
          303     </div>
          304   </ul>
          305   <div class="u18-navdivider"></div>
          306 </div>
          307 
          308 
          309 <div class="hydrated-container" data-hydrated="{&quot;contentId&quot;:&quot;CORE8B88E20204C04A0DADCEBC0499683C49&quot;,&quot;categories&quot;:[&quot;Announcements&quot;,&quot;Events&quot;,&quot;Oracle Cloud Infrastructure&quot;,&quot;Partners&quot;,&quot;Perspectives&quot;,&quot;Technologies&quot;,&quot;Training&quot;],&quot;compiledSite&quot;:true}"></div>
          310 <!-- <script>
          311 
          312   function showCategories() {
          313     document.getElementsByClassName("categ-menu").classList.remove("categ-active");
          314     document.getElementsByClassName("mainMenu").style.display = "";
          315     document.getElementsByClassName("back-btn").classList.remove("show");
          316     document.getElementsByClassName("categories-text").style.display = "";
          317     document.getElementsByClassName("back-btn").style.display = "none";
          318     document.getElementsByClassName("sub-categories").classList.remove("active");
          319     document.getElementsByClassName("hasMenu").classList.add("active");
          320   }
          321 
          322   function showSubCategories(eventTarget) {
          323     eventTarget.classList.add("categ-active");
          324     document.getElementsByClassName("mainMenu").not(eventTarget).each(function () {
          325       this.style.display = "none";
          326     });
          327     document.querySelectorAll('.sub-categories.active li:first-child a').focus();
          328     document.getElementsByClassName("categories-text").style.display = "none";
          329     document.getElementsByClassName("categ-active").next('.back-btn').classList.add("show");
          330     eventTarget.parent('.mainMenu').style.display = "";
          331     eventTarget.siblings('.sub-categories').classList.add("active");
          332     eventTarget.classList.remove("active");
          333     document.getElementsByClassName("back-btn").style.display = "";
          334     document.querySelectorAll(".sub-categories.active li:last-child a").addEventListener('keydown', function (e) {
          335       if (e.keyCode == 9) {
          336         showCategories();
          337         document.getElementsByClassName("hasMenu").classList.add("active");
          338       }
          339     });
          340 
          341   }
          342 
          343   document.on('click', '.hasMenu', function (e) {
          344     e.preventDefault();
          345     showSubCategories(this);
          346   });
          347   document.on('click', '.back-btn', function (e) {
          348     showCategories();
          349 
          350   });
          351   document.getElementsByClassName("back-btn").keydown(function (e) {
          352     if (e.keyCode == 9) {
          353       showCategories();
          354       $('.categ-menu.active').parent('.mainMenu').next('li').find('a').focus();
          355     }
          356   });
          357 
          358   document.getElementsByClassName("hasMenu").keydown(function (e) {
          359     if (e.keyCode == 9) {
          360       showSubCategories(this);
          361     }
          362   });
          363   document.getElementsByClassName("mainMenu").keydown(function (e) {
          364     if (e.shiftKey && e.keyCode == 9) {
          365       let hasMenuElement = this.prev('li').find('a').classList.contains("hasMenu");
          366       if (hasMenuElement) {
          367         e.preventDefault();
          368         showSubCategories(this.prev('li').find('a.hasMenu'));
          369         document.querySelectorAll(".sub-categories.active li:last-child a").focus();
          370       }
          371     }
          372   })
          373 
          374 
          375   let hydrateData = document.getElementsByClassName("hydrated-container")[0].getAttribute('data-hydrated');
          376 
          377   if (hydrateData) {
          378 
          379     var data = JSON.parse(hydrateData);
          380     var postCategories = data.categories ? data.categories : [];
          381     var metatags = document.getElementsByTagName("meta");
          382     for (var i = 0; i < metatags.length; i++) {
          383       if (metatags[i].name === "category" && postCategories.length !== 0) {
          384         document.getElementsByTagName("meta")[i].content = postCategories.join();
          385       }
          386       if (metatags[i].name === "keywords" && postCategories.length !== 0) {
          387         document.getElementsByTagName("meta")[i].content = postCategories.join();
          388       }
          389 
          390     }
          391   }
          392 
          393 </script> -->
          394                                 </div>
          395                         </div>
          396                 </div>
          397         </div>
          398 </div>
          399 </div></div></div></div></div>
          400                                                                         <!-- </ul> -->
          401                                                                 </div>
          402 
          403                                                                 <ul>
          404                                                                         <li><a href="" class="homepage">Blogs Home</a></li>
          405                                                                         <li><a href="" class="blogdirectory">Blogs Directory</a></li>
          406                                                                         <li><a href="" class="authordirectory">Featured Authors</a></li>
          407 
          408                                                                         <li><a href="" class="rss-link">RSS</a></li>
          409                                                                 </ul>
          410                                                         </div>
          411 
          412                                                 </div>
          413 
          414                                         </div>
          415 
          416                                 </div>
          417                         </nav>
          418                         <a id="maincontent"></a>
          419                 </div>
          420                 <!-- /U18v2 -->
          421                 <!-- RH03v5 -->
          422                 <section class="rh03 rh03v5 rw-ocean-150bg rw-pattern16w rw-pattern-15p rw-strip rw-strip-custom social">
          423                         <div class="rh03w1 cwidth social-wrapper">
          424 
          425                                 <!-- <div class="rh03bc">
          426 
          427                                         
          428                                         <div class="rh03bc">
          429                                                 <div class="rh03bc1">
          430                                         <ol>
          431                                                 <li><a href="placeholder.html">Oracle blogs</a></li>
          432                                                 <li><a href="placeholder.html">Lorem ipsum dolor</a></li>
          433                                         </ol>
          434                                 </div> 
          435                                         </div>
          436                                         
          437 
          438                                 </div> -->
          439 
          440                                 <div class="rh03pgtitle">
          441                                         <div class="blog-name"></div>
          442                                         <div class="rh03subtitle">
          443                                                 <p></p>
          444                                         </div>
          445                                         <!-- <div id="bannerdescription"></div> -->
          446                                 </div>
          447                                 <div class="social-share-wrapper">
          448                                         <label id="social-share">Follow: </label>
          449                                         <ul class="social-share" aria-labelledby="social-share">
          450                                                 <li>
          451                                                         <a href="" title="Oracle blog RSS" class="icn-rss" target="_blank">
          452                                                                 <span class="sr-only">RSS</span>
          453                                                         </a>
          454                                                 </li>
          455                                                 <li>
          456                                                         <a href="" title="Oracle blog on Facebook" class="icn-facebook" id="facebook-url" target="_blank">
          457                                                                 <span class="sr-only">Facebook</span>
          458                                                         </a>
          459                                                 </li>
          460                                                 <li>
          461                                                         <a href="" title="Oracle blog on Twitter" class="icn-twitter" id="twitter-url" target="_blank">
          462                                                                 <span class="sr-only">Twitter</span>
          463                                                         </a>
          464                                                 </li>
          465                                                 <li>
          466                                                         <a href="" title="Oracle blog on Linkedin" class="icn-linkedin" id="linkedin-url" target="_blank">
          467                                                                 <span class="sr-only">LinkedIn</span>
          468                                                         </a>
          469                                                 </li>
          470                                                 <li>
          471                                                         <a href="" title="Oracle blog on Youtube" class="icn-youtube" id="youtube-url" target="_blank">
          472                                                                 <span class="sr-only">Youtube</span>
          473                                                         </a>
          474                                                 </li>
          475                                                 <li>
          476                                                         <a href="" title="Oracle blog on Instagram" class="icn-instagram" id="instagram-url" target="_blank">
          477                                                                 <span class="sr-only">Instagram</span>
          478                                                         </a>
          479                                                 </li>
          480                                         </ul>
          481                                 </div>
          482 
          483                         </div>
          484                         <div class="rh03customstrip" data-bgimg="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/rwstrip-blogs-fpo.png"></div>
          485                 </section>
          486                 <!-- /RH03v5 -->
          487                 <div class="scs-slot" id="post-id"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="a37b49d5-e11f-4e1f-a5e0-fd37af71a288"><div class="scs-component-bounding-box"><!-- -->
          488 <div>
          489         <div class="scs-custom-component scs-component scs-component-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
          490                 <div class="scs-component-content" style="width:100%;">
          491                         <div style="" class="scs-custom-component-wrapper">
          492                                 <div id="a37b49d5-e11f-4e1f-a5e0-fd37af71a288customComponentDiv" data-scs-hydrate="true" data-scs-contenttype="Blog-Post" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
          493                                         <style>
          494     .title {
          495         background-color: #fff;
          496         border: 1px solid #F1EFED;
          497         border-radius: 22px;
          498         max-width: 940px;
          499         margin: 0 auto;
          500         padding: 5px 25px;
          501     }
          502 </style>
          503 <!-- RC81v1 -->
          504 
          505 <section class="rc81 rc81v1 cpad">
          506 
          507     <div class="rc81w1 bwidth">
          508         
          509             <div class="rc81">
          510                 <ul>
          511                   <li class="post-categories"><a href="../category/lnx-technologies" class="rc81accent"> Technologies<span>, </span> </a></li> 
          512                   <li class="post-categories"><a href="../category/lnx-linux-kernel-development" class="rc81accent"> Linux Kernel Development<span>, </span> </a></li> 
          513                 </ul>
          514                 
          515               </div>
          516               <p class="rc81accent" id="categories"></p>
          517         <h1>Syscall latency...  and some uses of speculative execution</h1>
          518         <span id="publishdate">September 12, 2023 |</span><span id="publishdate"> 23 minute read</span>
          519 
          520         <div class="rc81sub ">
          521             <img src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/ui_defaultuserimage.jpg" alt="">
          522 
          523             <span><a id="postAuthorName" href="/authors/ankur-arora">Ankur Arora</a>
          524                 <div><span class="rc81title rw-neutral-200txt"></span>
          525             
          526         </div>
          527     </span></div>
          528 
          529      
          530     <!--
          531         <div class="rc81photo">
          532              <img src=""> 
          533         </div>
          534         -->
          535 
          536     </div>
          537 </section>
          538 <!-- /RC81v1 --><!-- RC82v0 -->
          539 <!-- /RC82v0 --><!-- RC86v0 -->
          540 <section class="rc86 rc86v0 cpad">
          541     <div class="rc86w1 bwidth">
          542         <div class="rc86social">
          543             <a href="https://www.facebook.com/dialog/share?app_id=209650819625026&amp;href=../post/syscall-latency" class="sharelink icn-img icn-facebook" aria-label="Share post on Facebook" data-sharetype="facebook">
          544                 <!-- <span>Facebook</span> -->
          545             </a>
          546             <a href="https://twitter.com/share?url=../post/syscall-latency" class="sharelink icn-img icn-twitter" aria-label="Share post on Twitter" data-sharetype="twitter">
          547                 <!-- <span>Twitter</span> -->
          548             </a>
          549             <a href="https://www.linkedin.com/shareArticle?url=../post/syscall-latency" aria-label="Share post on Linkedin" class="sharelink icn-img icn-linkedin" data-sharetype="linked-in">
          550                 <!-- <span>LinkedIn</span> -->
          551             </a>
          552             <a href="placeholder.html" class="sharelink icn-img icn-email" aria-label="Share post on Email" data-sharetype="email">
          553                 <!-- <span>Email</span> -->
          554             </a>
          555         </div>
          556     </div>
          557 </section>
          558 <!-- /RC86v0 -->
          559 
          560 <!-- RC84v0 -->
          561 <section class="rc84v0 rc84zoom ">
          562     <div class="rc84w1 bwidth">
          563         <div class="rc84zoomui">
          564             <b>Text Size <span id="rc84fs">100%</span>:</b>
          565             <div>
          566                 <a href="#smaller-text" class="rc84-smaller" aria-label="decrease font size to 90%">-</a>
          567                 <a href="#larger-text" class="rc84-larger" aria-label="increase font size to 110%">+</a>
          568             </div>
          569         </div>
          570 
          571 
          572         <div class="rc84post">
          573 
          574             <!-- RC84v1 -->
          575             <section class="rc84 rc84v1">
          576 
          577                  <h2 id="introduction">Introduction</h2>
          578 
          579 <p>Moving from UEK5 to UEK6 brought about an unwelcome surprise: an increase in syscall latency on some x86 systems. The root cause, as we will see, was slightly slower evaluation of audit rules, which, given that they are evaluated for every syscall, is not great.</p>
          580 
          581 <p>In this post we start off by exploring the root cause which turns out to not be UEK specific, it also impacts upstream kernels as well. Then we detail the fixes and how they take advantage of the speculative out-of-order nature of the CPU pipeline.</p>
          582 
          583 <p>The changes, even though they target low-level optimizations, are quite straight-forward, almost trivial.</p>
          584 
          585 <h3 id="background">Background</h3>
          586 
          587 <p>Execution latency of the <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code>[1] increased by about 15% (measured on an Intel Skylake-X system), from 191ns on UEK5, to 217ns on UEK6.</p>
          588 
          589 <p>This was measured in the usual way:</p>
          590 
          591 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">clock_gettime(CLOCK_MONOTONIC, &amp;start);
          592 for (i = 0; i &lt; large_number; i++)
          593         syscall(SYS_getpid);
          594 clock_gettime(CLOCK_MONOTONIC, &amp;stop);</pre>
          595 
          596 <p>A quick <code style="background:#eeeeee;border:1px solic #cccccc;">perf record</code>, showed that almost all of the increased latency was in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> which was more expensive in UEK6.</p>
          597 
          598 <p>Oracle Exadata, where this problem was seen has 37 audit rules that are evaluated in the syscall path. Since audit only wants to log unusual or exceptional events, the benchmark would evaluate these rules in every iteration, but never generate any output. Essentially, purely local computation that became slower without there having been any material changes to the audit code or in the audit rules.</p>
          599 
          600 <h3 id="cpu-parameters">CPU-parameters</h3>
          601 
          602 <p>Some Intel Skylake-X parameters that we'll make use of later:</p>
          603 
          604 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">L1-load-latency: 4-6 cycles
          605 L2-load-latency: 14 cycles 
          606 L1-cache-size: 32K (512 cachelines: 64 sets, 8 ways each)
          607 
          608 ROB size: 224 micro-ops</pre>
          609 
          610 <p>The parameters are taken from the <a href="https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html">Intel SDM</a>.</p>
          611 
          612 <p><strong>Note:</strong> L1/L2 being the respective data-cache level and ROB, being the Reorder Buffer, where instructions are staged for in-order retirement.</p>
          613 
          614 <h2 id="root-cause-analysis">Root cause analysis</h2>
          615 
          616 <p>Drilling down with <code style="background:#eeeeee;border:1px solic #cccccc;">perf stat -d</code>:</p>
          617 
          618 <p>UEK5 (191 ns):</p>
          619 
          620 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
          621 # output normalized for a single getpid() call
          622 
          623  677.9    cycles                #  3.542 GHz
          624 1635.0    instructions          #  2.40  insn per cycle
          625  325.0    branches
          626    0.5    branch-misses         #  0.16% of all branches
          627  404.0    L1-dcache-loads
          628    0.4    L1-dcache-load-misses #  0.10% of all L1-dcache accesses</pre>
          629 
          630 <p>UEK6 (217ns):</p>
          631 
          632 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
          633 # output normalized for a single getpid() call
          634 
          635  770.4    cycles                #    3.545 GHz
          636 1652.0    instructions          #    2.14  insn per cycle
          637  332.2    branches
          638    1.5    branch-misses         #    0.45% of all branches
          639  407.3    L1-dcache-loads
          640    8.6    L1-dcache-load-misses #    2.13% of all L1-dcache accesses</pre>
          641 
          642 <p>Comparing, this is an increase of ~100 cycles with the L1d-loads and instruction counts being almost identical across UEK5 and UEK6. This underscores the fact that audit code which forms the bulk of instructions executed hasn’t changed all that much.</p>
          643 
          644 <p>The IPC is commensurately lower[2]. The proximal cause seems to be the increased L1d-load-misses and the one extra branch-miss.</p>
          645 
          646 <p>These observations were confirmed via enough non-correlated runs (with intervening reboot for each) and so are statistically significant. The L1d-load-miss numbers are somewhat variable across boot cycles, but the trend is close to what we see above.</p>
          647 
          648 <h3 id="audit_filter_syscall"><code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
          649 
          650 <p>From <code style="background:#eeeeee;border:1px solic #cccccc;">perf record</code> we know that the bulk of the increased runtime went to <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code>. The procedure itself is primarily a loop that walks the list of rules, calling <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> for each rule to check if it needs to be evaluated for the current syscall. For <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> the answer will be <code style="background:#eeeeee;border:1px solic #cccccc;">false</code> most of the time (32 of 37 times.)</p>
          651 
          652 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall(...) {
          653     struct audit_entry *e; 
          654     struct audit_entry *ctx;
          655 
          656     list = audit_filter_list[AUDIT_FILTER_EXIT]; 
          657 
          658     list_for_each_entry_rcu(e, list, list) {
          659 
          660         if (audit_in_mask(&amp;e-&gt;rule, ctx-&gt;major) &amp;&amp; 
          661             audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
          662                                &amp;state, false, x)) { 
          663                 rcu_read_unlock(); 
          664                 ctx-&gt;current_state = state;
          665                 return state;
          666         }
          667     }
          668 
          669 
          670 }
          671  
          672 audit_in_mask(const struct audit_krule *rule, unsigned long val) {
          673     if (val &gt; 0xffffffff)
          674         return false; 
          675 
          676     /*
          677      * val contains the current syscall number. AUDIT_WORD does
          678      * some bit shifting on it.
          679     */
          680     word = AUDIT_WORD(val);
          681     if (word &gt;= AUDIT_BITMASK_SIZE)
          682         return false;
          683 
          684     bit = AUDIT_BIT(val);
          685 
          686     /*
          687      * The load in rule-&gt;mask[word] depends on the audit_krule (which
          688      * hangs off the current rule entry) and the syscall number.
          689      */
          690     return rule-&gt;mask[word] &amp; bit;
          691 }
          692 audit_filter_rules(...) {
          693     /*
          694      * Large switch statement which we ignore for the rest of this
          695      * analysis because, as we will see later, loads executed in it don't
          696      * have an "interesting" alignment and so their latency should be easy
          697      * enough to hide.
          698      */
          699 }</pre>
          700 
          701 <h3 id="memory-accesses">Memory accesses</h3>
          702 
          703 <p>Next let’s look at the data structures accessed in the <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> loop and where the L1d-load-misses might be coming from.</p>
          704 
          705 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">/* Data structure layout annotated with size and cacheline occupancy
          706  * information using pahole. */
          707 
          708 struct audit_entry {    /* via audit_filter_list[AUDIT_FILTER_EXIT] */
          709 
          710         struct list_head           list;                 /*     0    16 */
          711         struct callback_head       rcu;                  /*    16    16 */
          712         struct audit_krule         rule;                 /*    32   376 */
          713         ...
          714         /* size: 408, cachelines: 7, members: 3 */
          715         /* last cacheline: 24 bytes */
          716 };
          717 
          718 struct audit_krule {    /* inlined in struct audit_entry */
          719         ...
          720         u32                        mask[64];             /*    16   256 */
          721         ...
          722         /* size: 376, cachelines: 6, members: 17 */
          723         /* last cacheline: 56 bytes */
          724 };
          725 
          726 struct audit_context {
          727         ...
          728         int                        major;                /*    20     4 */
          729         ...
          730         /* size: 920, cachelines: 15, members: 46 (slightly larger on UEK6) */
          731         /* sum members: 912, holes: 2, sum holes: 8 */
          732         /* last cacheline: 24 bytes */
          733 };</pre>
          734 
          735 <p>The effective execution loop in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> (with cacheline access annotations):</p>
          736 
          737 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">struct audit_entry *e = &amp;audit_filter_list[AUDIT_FILTER_EXIT];
          738 
          739 for_each_iteration {
          740     e = e-&gt;next;                    /* cacheline-0 of audit_entry */
          741     if (e == list)
          742         jmp out;
          743     if (audit_in_mask(e-&gt;rule.mask, /* cacheline-0 of audit_entry */
          744                       ctx-&gt;major))  /* cacheline-0 of audit_context */
          745         audit_filter_rules(e-&gt;rule);
          746 }
          747 out:</pre>
          748 
          749 <p>As the annotations above mention, there are a total of three loads:</p>
          750 
          751 <ol type="1">
          752         <li>Pointer chasing in <code style="background:#eeeeee;border:1px solic #cccccc;">e-&gt;next</code>: the first cacheline of <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>.</li>
          753         <li><code style="background:#eeeeee;border:1px solic #cccccc;">e-&gt;rule.mask[]</code>: accesses the same cacheline as load (1) above.</li>
          754         <li><code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code>: accesses the first cacheline of <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code>.</li>
          755 </ol>
          756 
          757 <p>Loads (1) and (2) will access a total of 37 cachelines, corresponding to a rule per iteration. Also notice that every single basic block in the rest of the iteration (apart from some error checking in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code>) has data dependence on the evaluation of <code style="background:#eeeeee;border:1px solic #cccccc;">e=e-&gt;next</code>. Worse this is a loop carried dependency, so each iteration depends on the previous one.</p>
          758 
          759 <p>The cacheline for load (3) is accessed once every iteration. This load is unnecessary, <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> contains the syscall number, which is a constant for the duration of the syscall. However, because the compiler’s alias analysis cannot prove that <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> is not mutilated, it does not get cached in a register. This also means that <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> will do out-of-bound validation checks related to <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> over and over.</p>
          760 
          761 <p>Recalling the <code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat -d</code> output above there are a total of around 400 L1d-loads for each <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> call. Of those, the loop does a total of 37*3 loads which map to a total of 38 unique cachelines.</p>
          762 
          763 <p>Alright, I hear you think: granted, walking linked-lists is difficult, there are a lot of cachelines in a lot of iterations or whatever, life is hard and the compiler doesn’t know what it is doing[3]. Even given all of that, nothing here has changed from UEK5 to UEK6, so none of this explains why UEK6 would incur more L1d-load-misses[4].</p>
          764 
          765 <p>Which is true, so that’s next.</p>
          766 
          767 <h3 id="theory-of-the-case">Theory of the case</h3>
          768 
          769 <p>From the background above, we know that the loop is pure computation, and purely local computation at that, so code changes elsewhere should have no effect. And there were no significant code changes from UEK5 to UEK6, so the loop is unchanged (which also applies to the generated assembly.)</p>
          770 
          771 <p>Now insofar as L1d-load-misses are concerned: the number of cachelines accessed (from about 400 L1d-loads per <code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> call, not all of which are to unique cachelines) amount to a number comfortably below the Skylake-X L1d-cache capacity of 512 cachelines. So this loop should not incur any capacity misses.</p>
          772 
          773 <p>Which leaves conflict misses as the probable cause[5]. Skylake-X has an 8-way associative L1: if more than 8 loads in the loop map to the same cache-set some accesses would incur conflict misses.</p>
          774 
          775 <p>Accesses in the loop and how they map to cache-sets:</p>
          776 
          777 <ul>
          778         <li><code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>: aligns at a 512B boundary, which limits it to cache-sets <code style="background:#eeeeee;border:1px solic #cccccc;">{0, 8, 16, ... 56}</code>, for a total of 8*8 cache-slots.</li>
          779         <li><code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code>: aligns at a 1024B boundary, which resolves to cache-sets <code style="background:#eeeeee;border:1px solic #cccccc;">{0, 16, 32, 48}</code>, for a total of 4*8 cache-slots. As described earlier, this is a single cacheline which competes with a subset of the <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code> cachelines.</li>
          780 </ul>
          781 
          782 <p>Even then, this is 37 cachelines slotted into 64 slots and another slotting into 32 of those 64. This should be easy enough to satisfy, assuming that the kernel allocator has a reasonably sane distribution and isn’t skewed towards a particular set of cachelines (or is similarly skewed on both UEK5 and UEK6.)</p>
          783 
          784 <h3 id="allocation-skew">Allocation skew</h3>
          785 
          786 <p>If, allocations for <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code> were distributed uniformly, they would map into cache-sets uniformly, ending with similar populations across the cache-sets. This would give a cacheline-spread metric of ~0 (obtained by calculating the standard-deviation of populations across cache-sets.)</p>
          787 
          788 <p>What we see:</p>
          789 
          790 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">cacheline-spread on UEK5:   1.58
          791 cacheline-spread on UEK6:   1.91</pre>
          792 
          793 <p>(These results are from a large number (&gt; 100) of non-correlated runs. <code style="background:#eeeeee;border:1px solic #cccccc;">auditd</code> allocates at boot, so this was done by rebooting between each run.)</p>
          794 
          795 <p>From these numbers, UEK5 is far from a flat distribution, and UEK6 is somewhat worse, but not dispositively so. Additionally, a slight imbalance will not cause performance degradation: that happens only after cache conflicts kick in, which is after cache-set population crosses the associativity threshold.</p>
          796 
          797 <p>To validate this, we measure how well cycles correlate[6] with 1) with L1d-misses, and 2) cacheline-spread:</p>
          798 <style type="text/css">.divTable {
          799   display: table;
          800   width: 80%;
          801 }
          802 .divTableRow {
          803   display: table-row;
          804 }
          805 .divTableHeading {
          806   display: table-header-group;
          807   background-color: #ddd;
          808   font-weight: bold;
          809 }
          810 .divTableCell {
          811   display: table-cell;
          812   padding: 3px 10px;
          813   border: 1px solid #999999;
          814 }
          815 </style>
          816 <p>&nbsp;</p>
          817 
          818 <div class="divTable">
          819 <div class="divTableHeading">
          820 <div class="divTableCell">Kernel</div>
          821 
          822 <div class="divTableCell">cycles:L1d-misses</div>
          823 
          824 <div class="divTableCell">cycles:cacheline-spread</div>
          825 </div>
          826 
          827 <div class="divTableRow">
          828 <div class="divTableCell">UEK5</div>
          829 
          830 <div class="divTableCell">0.74</div>
          831 
          832 <div class="divTableCell">0.22</div>
          833 </div>
          834 
          835 <div class="divTableRow">
          836 <div class="divTableCell">UEK6</div>
          837 
          838 <div class="divTableCell">0.74</div>
          839 
          840 <div class="divTableCell">0.61</div>
          841 </div>
          842 </div>
          843 
          844 <p>&nbsp;</p>
          845 
          846 <p>For both UEK5 and UEK6, “cycles:L1d-misses” is tightly correlated (though the value of 0.74 for both is happenstance) which makes sense. “cycles:cacheline-spread”, however, is well correlated only on UEK6, not UEK5. This suggests that the UEK6 allocator skew is meaningfully worse, enough to cause lower performance.</p>
          847 
          848 <p>Alright, having beaten this dead horse enough, let’s figure out how to fix it next[7].</p>
          849 
          850 <h2 id="speeding-it-up">Speeding it up</h2>
          851 
          852 <p>To get back our lost performance, our task is simple: optimize a hot-loop[8] which is itself executed in the hot syscall path. Compounding the problem, the critical load in the loop is accessed via a linked list.</p>
          853 
          854 <p>Stated like that, it sounds pretty bad. But, as we will see the structure of the problem helps quite a bit:</p>
          855 
          856 <ol type="1">
          857         <li>On a sane system, the common-case is extremely common, syscalls are frequent, and audit logging is unusual. This means that low branch mispreds are not unusual and something we might even depend on.</li>
          858         <li>We are optimizing a no-op loop: the loop walks a bunch of rules, does error checking, and decides if it needs to log. In the common-case, it will conclude that it doesn’t. (This is really (1) restated to stress the no-op nature of the loop.)</li>
          859 </ol>
          860 
          861 <p>A no-op loop implies that the code does not actually care about most of the values it computes. It just inches towards a foregone conclusion.</p>
          862 
          863 <p>This it does (as all code does) by means of dependency chains that transform the input state to output. Here, most dependency chains are short and, are really <em>only used to predict the control flow</em>. The only long dependency chain, woven through all the loop iterations, is the one walking the linked-list.</p>
          864 
          865 <p>Now, critically since the branches are predicted perfectly or almost so, the control flow can run quite a bit further than any loads and dependent computation. The control flow thus essentially feeds these loads and other instructions to the ROB, where they wait until resources/dependencies become available, compute the output from their chain which, to reiterate, will only be used to predict the control flow.</p>
          866 
          867 <p>Given that the control flow is already feeding instructions from the correct direction, these are in effect orphan chains that eventually retire without anyone having cared for the output they compute or how long that took.</p>
          868 
          869 <p>Except: this happy state continues only until we run into a resource constraint. For instance, the size of the ROB on Skylake-X is 224 entries and each loop iteration is ~20 instructions. This means instructions worth around 10 loop iterations can be present in the ROB. Now, given that instructions retire on x86 in-order, long running instructions (L1d-load-misses of course, but also L1d-load hits[9]) with long dependence chains would slow retirement down, even were control-flow to be predicted perfectly.</p>
          870 
          871 <p>Bearing these observations in mind, our fixes will try to reduce the amount and cost of work per loop iteration. This allows the loop to retire as close to the gating latency of any long running instructions in the loop.</p>
          872 
          873 <h3 id="cache-ctx-major-in-audit_filter_syscall">Cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
          874 
          875 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -785,13 +785,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
          876  {
          877         struct audit_entry *e;
          878         enum audit_state state;
          879 +       unsigned long major = ctx-&gt;major;
          880 
          881         if (auditd_test_task(tsk))
          882                 return AUDIT_DISABLED;
          883 
          884         rcu_read_lock();
          885         list_for_each_entry_rcu(e, list, list) {
          886 -               if (audit_in_mask(&amp;e-&gt;rule, ctx-&gt;major) &amp;&amp;
          887 +               if (audit_in_mask(&amp;e-&gt;rule, major) &amp;&amp;
          888                     audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
          889                                        &amp;state, false)) {
          890                         rcu_read_unlock();</pre>
          891 
          892 <p>Caching <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in a local variable helps in two ways:</p>
          893 
          894 <ul>
          895         <li>Explicitly indicates to the compiler that there are no stores to the cached value. <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> operates on <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> doing some bit-shifting and error checking. Now that the compiler knows that <code style="background:#eeeeee;border:1px solic #cccccc;">major</code> is not modified, it can hoist most of that logic out of the loop so it is not reevaluated over-and-over in every loop iteration.</li>
          896         <li>As described earlier, <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_context</code> has similar natural alignment concerns as <code style="background:#eeeeee;border:1px solic #cccccc;">struct audit_entry</code>. Allowing the compiler to cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code> in a register (or on the stack) reduces one potential source of contention.</li>
          897 </ul>
          898 
          899 <p>With this change the number of instructions executed/loop-iteration reduce by 8 (of 20.) Note that most of those were almost free ALU instructions.</p>
          900 
          901 <p>L1d-loads: we removed one L1d-load but added two (due to the compiler now spilling and reloading some state to/from the stack.) However, given that stack accesses are much less likely to have conflicting alignment constraints, the increased loads are less of a concern than the one we got rid of.</p>
          902 
          903 <p>cycles: improve by about 40 cycles. This is because the greater room in the ROB allows our almost perfect branch prediction to speculatively run even further ahead of other instructions.</p>
          904 
          905 <p>Change in latency for UEK6:</p>
          906 
          907 <div class="divTable">
          908 <div class="divTableHeading">
          909 <div class="divTableCell">Version</div>
          910 
          911 <div class="divTableCell">Min<br>
          912 (ns)</div>
          913 
          914 <div class="divTableCell">Mean<br>
          915 (ns)</div>
          916 
          917 <div class="divTableCell">Median<br>
          918 (ns)</div>
          919 
          920 <div class="divTableCell">Max<br>
          921 (ns)</div>
          922 </div>
          923 
          924 <div class="divTableRow">
          925 <div class="divTableCell">baseline</div>
          926 
          927 <div class="divTableCell">196.26</div>
          928 
          929 <div class="divTableCell">212.00</div>
          930 
          931 <div class="divTableCell">207.80</div>
          932 
          933 <div class="divTableCell">240.52</div>
          934 </div>
          935 
          936 <div class="divTableRow">
          937 <div class="divTableCell">ctx-&gt;major</div>
          938 
          939 <div class="divTableCell">183.50</div>
          940 
          941 <div class="divTableCell">201.41</div>
          942 
          943 <div class="divTableCell">198.80</div>
          944 
          945 <div class="divTableCell">226.93</div>
          946 </div>
          947 </div>
          948 
          949 <p>&nbsp;</p>
          950 
          951 <p>From the min-max range, there is a rather large variation in latency that’s caused by variations in allocation resulting in high or low cacheline-spread. In almost all cases though, the latency improves by ~10ns or thereabouts.</p>
          952 
          953 <p>That said, after removing 8 instructions and one load (and adding two less consequential loads), the performance gain is rather miniscule: ~1 cycle/iteration. Just that the loop executes 37 times, so we make it up in volume.</p>
          954 
          955 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/87a39a3d2ca9a5c7e4d35e4cf4b839c53cc0678d">UEK6 commit-1</a> and in <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=069545997510833281f45f83e097017b9fef19b7">Upstream commit-1</a>.</p>
          956 
          957 <h3 id="annotate-branch-direction-for-audit_in_mask">Annotate branch direction for <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code></h3>
          958 
          959 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -790,12 +790,13 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,
          960          rcu_read_lock();
          961          list_for_each_entry_rcu(e, list, list) {
          962  -               if (audit_in_mask(&amp;e-&gt;rule, major) &amp;&amp;
          963  -                   audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
          964  -                                      &amp;state, false)) {
          965                                         ...
          966  +               if (unlikely(audit_in_mask(&amp;e-&gt;rule, major))) {
          967  +                       if (audit_filter_rules(tsk, &amp;e-&gt;rule, ctx, NULL,
          968  +                                              &amp;state, false)) {</pre>
          969 
          970 <p>Annotate <code style="background:#eeeeee;border:1px solic #cccccc;">audit_in_mask()</code> as <code style="background:#eeeeee;border:1px solic #cccccc;">unlikely()</code> to allow the compiler to pessimize the call to <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_rules()</code>. Two reasons for this change:</p>
          971 
          972 <ul>
          973         <li>The primary motivation was to get rid of the extra branch mispred. This change succeeds in that task but it is unclear why: there’s no significant change in the basic-block structure. The only change is from a branch inversion due to the unlikely clause.</li>
          974         <li>The branch inversion means that the not-taken direction is chosen more often: 32/37 times (changing from 5/37 earlier.) The issue-latency for not-taken branches is 0.5-1 cycles, for taken branches 1-2 cycles[10] is slightly cheaper.</li>
          975 </ul>
          976 
          977 <p>L1d-loads: reduce by 2 for each loop iteration. This is because the spills and reloads introduced in the “Cache <code style="background:#eeeeee;border:1px solic #cccccc;">ctx-&gt;major</code>…” patch have now been shifted to the unlikely path (the prologue and epilogue of the <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_rules()</code> call.)</p>
          978 
          979 <p>cycles: performance improves on average by ~30 cycles/call.</p>
          980 
          981 <p>Change in latency for UEK6:</p>
          982 
          983 <div class="divTable">
          984 <div class="divTableHeading">
          985 <div class="divTableCell">Version</div>
          986 
          987 <div class="divTableCell">Min<br>
          988 (ns)</div>
          989 
          990 <div class="divTableCell">Mean<br>
          991 (ns)</div>
          992 
          993 <div class="divTableCell">Median<br>
          994 (ns)</div>
          995 
          996 <div class="divTableCell">Max<br>
          997 (ns)</div>
          998 </div>
          999 
         1000 <div class="divTableRow">
         1001 <div class="divTableCell">ctx-&gt;major</div>
         1002 
         1003 <div class="divTableCell">183.50</div>
         1004 
         1005 <div class="divTableCell">201.41</div>
         1006 
         1007 <div class="divTableCell">198.80</div>
         1008 
         1009 <div class="divTableCell">226.93</div>
         1010 </div>
         1011 
         1012 <div class="divTableRow">
         1013 <div class="divTableCell">ctx-&gt;major+annot</div>
         1014 
         1015 <div class="divTableCell">165.26</div>
         1016 
         1017 <div class="divTableCell">188.72</div>
         1018 
         1019 <div class="divTableCell">184.25</div>
         1020 
         1021 <div class="divTableCell">230.34</div>
         1022 </div>
         1023 </div>
         1024 
         1025 <p>&nbsp;</p>
         1026 
         1027 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/0288dbdbfb5768ad8ae8a445c72f523bcb99eca0">UEK6 commit-2</a>.</p>
         1028 
         1029 <h3 id="remove-static-linkage-from-audit_filter_syscall">Remove static linkage from <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code></h3>
         1030 
         1031 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">@@ -777,7 +777,7 @@ static bool audit_in_mask(const struct audit_krule *rule, unsigned long
         1032    * also not high enough that we already know we have to write an audit
         1033    * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT).
         1034    */
         1035  -static enum audit_state audit_filter_syscall(struct task_struct *tsk,
         1036  +enum audit_state audit_filter_syscall(struct task_struct *tsk,
         1037                                               struct audit_context *ctx,
         1038                                               struct list_head *list)</pre>
         1039 
         1040 <p><code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> is only used locally in the file and so is marked <code style="background:#eeeeee;border:1px solic #cccccc;">static</code>. Additionally, it’s only ever called with a fixed <code style="background:#eeeeee;border:1px solic #cccccc;">list</code> value of <code style="background:#eeeeee;border:1px solic #cccccc;">&amp;audit_filter_list[AUDIT_FILTER_EXIT])</code>.</p>
         1041 
         1042 <p>GCC’s constant propagation pass makes use of these two things to, quite reasonably, const-propagate the third argument to the point of use.</p>
         1043 
         1044 <p>This causes the exit check in the <code style="background:#eeeeee;border:1px solic #cccccc;">list_for_each</code> loop to look like this:</p>
         1045 
         1046 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall.constprop.18(task, ctx):
         1047    0:       48 8b 1b                mov    (%rbx),%rbx
         1048    3:       48 81 fb e0 67 ac 82    cmp    $0xffffffff82ac67e0,%rbx
         1049                     ffffffff8118b5ed: R_X86_64_32S  audit_filter_list+0x40
         1050   10:       75 e2                   jne    start_iter</pre>
         1051 
         1052 <p>while, without const-propagation it would have looked like this:</p>
         1053 
         1054 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">audit_filter_syscall(task, ctx, list):
         1055    0:       48 8b 1b                mov    (%rbx),%rbx
         1056    3:       4c 39 e3                cmp    %r12,%rbx
         1057    6:       75 e6                   jne    start_iter</pre>
         1058 
         1059 <p>Now either one ought to be alright, both <code style="background:#eeeeee;border:1px solic #cccccc;">cmp imm32,r</code> and <code style="background:#eeeeee;border:1px solic #cccccc;">cmp r,r</code> forms are equivalent with a latency of 1 cycle, and both are a single micro-op each.</p>
         1060 
         1061 <p>The second form of the <code style="background:#eeeeee;border:1px solic #cccccc;">cmp</code>, however, can be macro-op fused with the <code style="background:#eeeeee;border:1px solic #cccccc;">jne</code>; not entirely sure if the first form can be[11]. The second form is also denser, though that’s not a concern here.</p>
         1062 
         1063 <p>Disallowing GCC from making assumptions about calling contexts by removing the <code style="background:#eeeeee;border:1px solic #cccccc;">static</code> linkage from <code style="background:#eeeeee;border:1px solic #cccccc;">audit_filter_syscall()</code> forces it to pass the <code style="background:#eeeeee;border:1px solic #cccccc;">list</code> parameter in a register which results in a small performance improvement: ~20 cycles (about 0.5 cycles/loop iteration.)</p>
         1064 
         1065 <p>Change in latency for UEK6:</p>
         1066 
         1067 <div class="divTable">
         1068 <div class="divTableHeading">
         1069 <div class="divTableCell">Version</div>
         1070 
         1071 <div class="divTableCell">Min<br>
         1072 (ns)</div>
         1073 
         1074 <div class="divTableCell">Mean<br>
         1075 (ns)</div>
         1076 
         1077 <div class="divTableCell">Median<br>
         1078 (ns)</div>
         1079 
         1080 <div class="divTableCell">Max<br>
         1081 (ns)</div>
         1082 </div>
         1083 
         1084 <div class="divTableRow">
         1085 <div class="divTableCell">ctx-&gt;major+annot</div>
         1086 
         1087 <div class="divTableCell">165.26</div>
         1088 
         1089 <div class="divTableCell">188.72</div>
         1090 
         1091 <div class="divTableCell">184.25</div>
         1092 
         1093 <div class="divTableCell">230.34</div>
         1094 </div>
         1095 
         1096 <div class="divTableRow">
         1097 <div class="divTableCell">ctx-&gt;major+annot+extern</div>
         1098 
         1099 <div class="divTableCell">159.88</div>
         1100 
         1101 <div class="divTableCell">184.35</div>
         1102 
         1103 <div class="divTableCell">177.62</div>
         1104 
         1105 <div class="divTableCell">250.82</div>
         1106 </div>
         1107 </div>
         1108 
         1109 <p>&nbsp;</p>
         1110 
         1111 <p>More details (<code style="background:#eeeeee;border:1px solic #cccccc;">perf-stat</code> and the before/after versions of the generated code) in <a href="https://github.com/oracle/linux-uek/commit/5a74015e20bff63d1052359fbc2c3418e0f6bc4e">UEK6 commit-3</a> and, <a href="https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=50979953c0c41e929e5f955800da68e1bb24c7ab">Upstream commit-3</a>.</p>
         1112 
         1113 <h2 id="summary">Summary</h2>
         1114 
         1115 <p>The audit subystem is fairly stable in the Linux kernel, not given to frequent changes. So it was puzzling when it became slower in recent kernels, and because a primary user is the syscall path, concerning[12].</p>
         1116 
         1117 <p>The cause turned out to be higher skew in allocated buffers which results in more lopsided cache-set distribution.</p>
         1118 
         1119 <p>The fixes compensate for the higher costs in the loop by taking advantage of the peculiarities of the execution path and optimizing for the speculative nature of the CPU pipeline.</p>
         1120 
         1121 <p>The three patches, in sum reduce the overhead by about 30ns (~100 cycles).</p>
         1122 
         1123 <p>Final <code style="background:#eeeeee;border:1px solic #cccccc;">perf stat -d -r 5</code> go from:</p>
         1124 
         1125 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
         1126 # output normalized for a single getpid() call
         1127 
         1128 cycles                  761.65  (  +- 5.22% )
         1129 instructions           1639.17  (  +- 0.00% )
         1130 IPC                       2.18  (  +- 5.50% )
         1131 branches                328.21  (  +- 0.00% )
         1132 branch-misses             1.37  (  +- 6.56% )
         1133 L1-dcache-loads         404.35  (  +- 0.00% )
         1134 L1-dcache-load-misses     7.99  (  +- 70.71% )</pre>
         1135 
         1136 <p>to:</p>
         1137 
         1138 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
         1139 # output normalized for a single getpid() call
         1140 
         1141 cycles                  669.09  (  +- 11.23% )
         1142 instructions           1342.04  (  +-  0.00% )
         1143 IPC                       2.03  (  +-  9.85% )
         1144 branches                328.19  (  +-  0.00% )
         1145 branch-misses             0.56  (  +-  5.35% )
         1146 L1-dcache-loads         384.31  (  +-  0.00% )
         1147 L1-dcache-load-misses     5.77  (  +- 84.57% )</pre>
         1148 
         1149 <p>This compares quite well to the UEK5-baseline:</p>
         1150 
         1151 <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;"># perf stat -d -r 5 ./getpid
         1152 # output normalized for a single getpid() call
         1153 
         1154 cycles                  672.90  (  +-  1.65% )
         1155 instructions           1622.08  (  +-  0.00% )
         1156 IPC                       2.41  (  +-  1.65% )
         1157 branches                321.20  (  +-  0.00% )
         1158 branch-misses             0.51  (  +-  0.00% )
         1159 L1-dcache-loads         401.32  (  +-  0.00% )
         1160 L1-dcache-load-misses     2.28  (  +- 59.62% )</pre>
         1161 
         1162 <p>Note for non-Skylake-X architectures: Intel Icelake and AMD Milan (the other architectures tested) cope with L1d-load-misses much better so the baseline performance is much better.</p>
         1163 
         1164 <p>With these patches, they only show a small improvement (~10ns): Icelake has a bigger L1d-cache (48K), and a much bigger ROB. Milan also has a bigger ROB and does memory renaming and bunch of other pipeline optimizations that limit the effect of these optimizations.</p>
         1165 
         1166 <p><strong>Endnote:</strong> what I found personally instructive was how much C really is “a portable assembler” and the significant codegen (and performance) changes that can result from minimal changes to the code.</p>
         1167 
         1168 <h2 id="references">References</h2>
         1169 
         1170 <ol type="1">
         1171         <li>
         1172         <p><code style="background:#eeeeee;border:1px solic #cccccc;">getpid()</code> has a minimal kernel execution path (only does a PID lookup), and so is generally used to measure the overhead of the syscall path.</p>
         1173         </li>
         1174         <li>
         1175         <p>Comparing the IPC for the audit-only portion shows that a starker drop:</p>
         1176 
         1177         <pre class="brush: bash;" style="background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;">UEK5: 1427.0  instructions  #  3.41  insn per cycle
         1178 UEK6: 1432.0  instructions  #  2.84  insn per cycle</pre>
         1179         </li>
         1180         <li>
         1181         <p>Alas no, alias analysis is an undecidable problem.</p>
         1182         </li>
         1183         <li>
         1184         <p>Or for that matter, what causes the extra branch-miss.</p>
         1185         </li>
         1186         <li>
         1187         <p>Another possibility is out-of-line code -- frequent interrupts, vmexits etc -- trashing the cache but from profiling these were a non-issue.</p>
         1188         </li>
         1189         <li>
         1190         <p>Measured using the pearson-quotient(x, y): correlation coefficient between quantities x and y.</p>
         1191         </li>
         1192         <li>
         1193         <p>You might notice that this analysis does not address the extra branch-miss. That's because I still have no clue what causes it.</p>
         1194         </li>
         1195         <li>
         1196         <p>The correct fix would be to fix whatever ails the allocator. However, from a quick look at the changes that have gone into related code, it seems non-trivial to find a particular commit which points to the root cause of the skew (especially given that the skew is not constant, but varies from run-to-run.) Also, notably, the fixes described below also apply to UEK5, which means that even if UEK6 becomes faster, UEK5 will also improve somewhat.</p>
         1197         </li>
         1198         <li>
         1199         <p>As mentioned in <a href="#cpu-parameters">CPU-parameters</a>, L1d-loads take 4-6 cycles on Skylake-X. We also know that in the good case (UEK5), this loop is capable of an IPC of 3.41 insn per cycle. So, hiding L1d-load latency is critical for good performance.</p>
         1200         </li>
         1201         <li>
         1202         <p><a href="https://www.agner.org/optimize/instruction_tables.pdf%3E">https://www.agner.org/optimize/instruction_tables.pdf</a>, pg 298 (Skylake-X)</p>
         1203         </li>
         1204         <li>
         1205         <p>The first form fused, needs three inputs: <code style="background:#eeeeee;border:1px solic #cccccc;">%rbx</code>, an <code style="background:#eeeeee;border:1px solic #cccccc;">imm32</code> encoding the distance to the address being compared, and an <code style="background:#eeeeee;border:1px solic #cccccc;">imm8</code> encoding the distance to the branch-dest; the second needs two registers: <code style="background:#eeeeee;border:1px solic #cccccc;">%rbx</code>, <code style="background:#eeeeee;border:1px solic #cccccc;">%r12</code> and only the <code style="background:#eeeeee;border:1px solic #cccccc;">imm8</code>.</p>
         1206         </li>
         1207         <li>
         1208         <p>Just for context, a kernel build (x86-defconfig) makes an aggregate of 27M syscalls, with a syscall every 44us.</p>
         1209         </li>
         1210 </ol>
         1211 
         1212 
         1213             </section>
         1214             <!-- /RC84v1 -->
         1215 
         1216             <!-- RC84v2 -->
         1217             <section class="rc84v2 cpad">
         1218                 <div class="rc84w1 cwidth">
         1219 
         1220                     <div class="rc84bio">
         1221                         <div class="rc84img">
         1222                             <img src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/img/ui_defaultuserimage.jpg" alt="">
         1223                         </div>
         1224                         <div class="rc84blurb">
         1225                             <div class="blogtile-w2-inner text-wrap">
         1226                                 <h4>Ankur Arora</h4>
         1227 
         1228                                 <p></p>
         1229                             </div>
         1230                         </div>
         1231                     </div>
         1232 
         1233                 </div>
         1234             </section>
         1235             <!-- /RC84v2 -->
         1236 
         1237 
         1238         </div>
         1239 
         1240     </div>
         1241 </section>
         1242 <!-- /RC84v0 -->
         1243 
         1244 <!-- /RC83v0 -->
         1245 <input type="hidden" name="hiddenField" value="September 12, 2023" id="pubdate">
         1246 <input type="hidden" name="hiddenField" value="linux" id="primarychannel">
         1247 <div class="hydrate-container" data-hydrate="{&quot;metaItems&quot;:[{&quot;translatable&quot;:true,&quot;createdDate&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:01.308Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;},&quot;fileExtension&quot;:&quot;contentItem&quot;,&quot;name&quot;:&quot;Ankur&quot;,&quot;description&quot;:&quot;&quot;,&quot;language&quot;:&quot;en-US&quot;,&quot;links&quot;:[{&quot;href&quot;:&quot;https://orasites-prodapp.cec.ocp.oraclecloud.com/content/published/api/v1.1/items/COREF415334566DE45208D79D6CD6FA88629?channelToken=3189ef66cf584820b5b19e6b10792d6f&quot;,&quot;rel&quot;:&quot;self&quot;,&quot;method&quot;:&quot;GET&quot;,&quot;mediaType&quot;:&quot;application/json&quot;}],&quot;id&quot;:&quot;COREF415334566DE45208D79D6CD6FA88629&quot;,&quot;updatedDate&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:01.308Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;},&quot;type&quot;:&quot;Blog-Author&quot;,&quot;fields&quot;:{&quot;twitter_handle&quot;:null,&quot;facebook_url&quot;:null,&quot;profile_image&quot;:null,&quot;timezone&quot;:&quot;EST&quot;,&quot;last_name&quot;:&quot;Arora&quot;,&quot;bio&quot;:null,&quot;linkedin_url&quot;:null,&quot;middle_name&quot;:null,&quot;first_name&quot;:&quot;Ankur&quot;,&quot;job_title&quot;:null,&quot;email&quot;:&quot;ankur.a.arora&quot;},&quot;slug&quot;:&quot;ankur-arora&quot;}],&quot;contentData&quot;:{&quot;featured_image_display_option&quot;:null,&quot;featured_image_alt_text&quot;:null,&quot;attachments&quot;:null,&quot;og_title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;featured_image_alternate_text&quot;:&quot;&quot;,&quot;industry&quot;:null,&quot;title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;body&quot;:&quot;<!DOCTYPE html> <h2 id=\&quot;introduction\&quot;>Introduction</h2>\n\n<p>Moving from UEK5 to UEK6 brought about an unwelcome surprise: an increase in syscall latency on some x86 systems. The root cause, as we will see, was slightly slower evaluation of audit rules, which, given that they are evaluated for every syscall, is not great.</p>\n\n<p>In this post we start off by exploring the root cause which turns out to not be UEK specific, it also impacts upstream kernels as well. Then we detail the fixes and how they take advantage of the speculative out-of-order nature of the CPU pipeline.</p>\n\n<p>The changes, even though they target low-level optimizations, are quite straight-forward, almost trivial.</p>\n\n<h3 id=\&quot;background\&quot;>Background</h3>\n\n<p>Execution latency of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code>[1] increased by about 15% (measured on an Intel Skylake-X system), from 191ns on UEK5, to 217ns on UEK6.</p>\n\n<p>This was measured in the usual way:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nclock_gettime(CLOCK_MONOTONIC, &amp;amp;start);\nfor (i = 0; i &amp;lt; large_number; i++)\n        syscall(SYS_getpid);\nclock_gettime(CLOCK_MONOTONIC, &amp;amp;stop);</pre>\n\n<p>A quick <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf record</code>, showed that almost all of the increased latency was in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> which was more expensive in UEK6.</p>\n\n<p>Oracle Exadata, where this problem was seen has 37 audit rules that are evaluated in the syscall path. Since audit only wants to log unusual or exceptional events, the benchmark would evaluate these rules in every iteration, but never generate any output. Essentially, purely local computation that became slower without there having been any material changes to the audit code or in the audit rules.</p>\n\n<h3 id=\&quot;cpu-parameters\&quot;>CPU-parameters</h3>\n\n<p>Some Intel Skylake-X parameters that we&amp;#39;ll make use of later:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nL1-load-latency: 4-6 cycles\nL2-load-latency: 14 cycles \nL1-cache-size: 32K (512 cachelines: 64 sets, 8 ways each)\n\nROB size: 224 micro-ops</pre>\n\n<p>The parameters are taken from the <a href=\&quot;https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sdm.html\&quot;>Intel SDM</a>.</p>\n\n<p><strong>Note:</strong> L1/L2 being the respective data-cache level and ROB, being the Reorder Buffer, where instructions are staged for in-order retirement.</p>\n\n<h2 id=\&quot;root-cause-analysis\&quot;>Root cause analysis</h2>\n\n<p>Drilling down with <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf stat -d</code>:</p>\n\n<p>UEK5 (191 ns):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\n 677.9    cycles                #  3.542 GHz\n1635.0    instructions          #  2.40  insn per cycle\n 325.0    branches\n   0.5    branch-misses         #  0.16% of all branches\n 404.0    L1-dcache-loads\n   0.4    L1-dcache-load-misses #  0.10% of all L1-dcache accesses</pre>\n\n<p>UEK6 (217ns):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\n 770.4    cycles                #    3.545 GHz\n1652.0    instructions          #    2.14  insn per cycle\n 332.2    branches\n   1.5    branch-misses         #    0.45% of all branches\n 407.3    L1-dcache-loads\n   8.6    L1-dcache-load-misses #    2.13% of all L1-dcache accesses</pre>\n\n<p>Comparing, this is an increase of ~100 cycles with the L1d-loads and instruction counts being almost identical across UEK5 and UEK6. This underscores the fact that audit code which forms the bulk of instructions executed hasn&amp;rsquo;t changed all that much.</p>\n\n<p>The IPC is commensurately lower[2]. The proximal cause seems to be the increased L1d-load-misses and the one extra branch-miss.</p>\n\n<p>These observations were confirmed via enough non-correlated runs (with intervening reboot for each) and so are statistically significant. The L1d-load-miss numbers are somewhat variable across boot cycles, but the trend is close to what we see above.</p>\n\n<h3 id=\&quot;audit_filter_syscall\&quot;><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<p>From <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf record</code> we know that the bulk of the increased runtime went to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code>. The procedure itself is primarily a loop that walks the list of rules, calling <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> for each rule to check if it needs to be evaluated for the current syscall. For <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> the answer will be <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>false</code> most of the time (32 of 37 times.)</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall(...) {\n    struct audit_entry *e; \n    struct audit_entry *ctx;\n\n    list = audit_filter_list[AUDIT_FILTER_EXIT]; \n\n    list_for_each_entry_rcu(e, list, list) {\n\n        if (audit_in_mask(&amp;amp;e-&amp;gt;rule, ctx-&amp;gt;major) &amp;amp;&amp;amp; \n            audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n                               &amp;amp;state, false, x)) { \n                rcu_read_unlock(); \n                ctx-&amp;gt;current_state = state;\n                return state;\n        }\n    }\n\n\n}\n \naudit_in_mask(const struct audit_krule *rule, unsigned long val) {\n    if (val &amp;gt; 0xffffffff)\n        return false; \n\n    /*\n     * val contains the current syscall number. AUDIT_WORD does\n     * some bit shifting on it.\n    */\n    word = AUDIT_WORD(val);\n    if (word &amp;gt;= AUDIT_BITMASK_SIZE)\n        return false;\n\n    bit = AUDIT_BIT(val);\n\n    /*\n     * The load in rule-&amp;gt;mask[word] depends on the audit_krule (which\n     * hangs off the current rule entry) and the syscall number.\n     */\n    return rule-&amp;gt;mask[word] &amp;amp; bit;\n}\naudit_filter_rules(...) {\n    /*\n     * Large switch statement which we ignore for the rest of this\n     * analysis because, as we will see later, loads executed in it don&amp;#39;t\n     * have an &amp;quot;interesting&amp;quot; alignment and so their latency should be easy\n     * enough to hide.\n     */\n}</pre>\n\n<h3 id=\&quot;memory-accesses\&quot;>Memory accesses</h3>\n\n<p>Next let&amp;rsquo;s look at the data structures accessed in the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> loop and where the L1d-load-misses might be coming from.</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n/* Data structure layout annotated with size and cacheline occupancy\n * information using pahole. */\n\nstruct audit_entry {    /* via audit_filter_list[AUDIT_FILTER_EXIT] */\n\n        struct list_head           list;                 /*     0    16 */\n        struct callback_head       rcu;                  /*    16    16 */\n        struct audit_krule         rule;                 /*    32   376 */\n        ...\n        /* size: 408, cachelines: 7, members: 3 */\n        /* last cacheline: 24 bytes */\n};\n\nstruct audit_krule {    /* inlined in struct audit_entry */\n        ...\n        u32                        mask[64];             /*    16   256 */\n        ...\n        /* size: 376, cachelines: 6, members: 17 */\n        /* last cacheline: 56 bytes */\n};\n\nstruct audit_context {\n        ...\n        int                        major;                /*    20     4 */\n        ...\n        /* size: 920, cachelines: 15, members: 46 (slightly larger on UEK6) */\n        /* sum members: 912, holes: 2, sum holes: 8 */\n        /* last cacheline: 24 bytes */\n};</pre>\n\n<p>The effective execution loop in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> (with cacheline access annotations):</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nstruct audit_entry *e = &amp;amp;audit_filter_list[AUDIT_FILTER_EXIT];\n\nfor_each_iteration {\n    e = e-&amp;gt;next;                    /* cacheline-0 of audit_entry */\n    if (e == list)\n        jmp out;\n    if (audit_in_mask(e-&amp;gt;rule.mask, /* cacheline-0 of audit_entry */\n                      ctx-&amp;gt;major))  /* cacheline-0 of audit_context */\n        audit_filter_rules(e-&amp;gt;rule);\n}\nout:</pre>\n\n<p>As the annotations above mention, there are a total of three loads:</p>\n\n<ol type=\&quot;1\&quot;>\n\t<li>Pointer chasing in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e-&amp;gt;next</code>: the first cacheline of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e-&amp;gt;rule.mask[]</code>: accesses the same cacheline as load (1) above.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code>: accesses the first cacheline of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code>.</li>\n</ol>\n\n<p>Loads (1) and (2) will access a total of 37 cachelines, corresponding to a rule per iteration. Also notice that every single basic block in the rest of the iteration (apart from some error checking in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code>) has data dependence on the evaluation of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>e=e-&amp;gt;next</code>. Worse this is a loop carried dependency, so each iteration depends on the previous one.</p>\n\n<p>The cacheline for load (3) is accessed once every iteration. This load is unnecessary, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> contains the syscall number, which is a constant for the duration of the syscall. However, because the compiler&amp;rsquo;s alias analysis cannot prove that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> is not mutilated, it does not get cached in a register. This also means that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> will do out-of-bound validation checks related to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> over and over.</p>\n\n<p>Recalling the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat -d</code> output above there are a total of around 400 L1d-loads for each <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> call. Of those, the loop does a total of 37*3 loads which map to a total of 38 unique cachelines.</p>\n\n<p>Alright, I hear you think: granted, walking linked-lists is difficult, there are a lot of cachelines in a lot of iterations or whatever, life is hard and the compiler doesn&amp;rsquo;t know what it is doing[3]. Even given all of that, nothing here has changed from UEK5 to UEK6, so none of this explains why UEK6 would incur more L1d-load-misses[4].</p>\n\n<p>Which is true, so that&amp;rsquo;s next.</p>\n\n<h3 id=\&quot;theory-of-the-case\&quot;>Theory of the case</h3>\n\n<p>From the background above, we know that the loop is pure computation, and purely local computation at that, so code changes elsewhere should have no effect. And there were no significant code changes from UEK5 to UEK6, so the loop is unchanged (which also applies to the generated assembly.)</p>\n\n<p>Now insofar as L1d-load-misses are concerned: the number of cachelines accessed (from about 400 L1d-loads per <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> call, not all of which are to unique cachelines) amount to a number comfortably below the Skylake-X L1d-cache capacity of 512 cachelines. So this loop should not incur any capacity misses.</p>\n\n<p>Which leaves conflict misses as the probable cause[5]. Skylake-X has an 8-way associative L1: if more than 8 loads in the loop map to the same cache-set some accesses would incur conflict misses.</p>\n\n<p>Accesses in the loop and how they map to cache-sets:</p>\n\n<ul>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>: aligns at a 512B boundary, which limits it to cache-sets <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>{0, 8, 16, ... 56}</code>, for a total of 8*8 cache-slots.</li>\n\t<li><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code>: aligns at a 1024B boundary, which resolves to cache-sets <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>{0, 16, 32, 48}</code>, for a total of 4*8 cache-slots. As described earlier, this is a single cacheline which competes with a subset of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code> cachelines.</li>\n</ul>\n\n<p>Even then, this is 37 cachelines slotted into 64 slots and another slotting into 32 of those 64. This should be easy enough to satisfy, assuming that the kernel allocator has a reasonably sane distribution and isn&amp;rsquo;t skewed towards a particular set of cachelines (or is similarly skewed on both UEK5 and UEK6.)</p>\n\n<h3 id=\&quot;allocation-skew\&quot;>Allocation skew</h3>\n\n<p>If, allocations for <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code> were distributed uniformly, they would map into cache-sets uniformly, ending with similar populations across the cache-sets. This would give a cacheline-spread metric of ~0 (obtained by calculating the standard-deviation of populations across cache-sets.)</p>\n\n<p>What we see:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\ncacheline-spread on UEK5:   1.58\ncacheline-spread on UEK6:   1.91</pre>\n\n<p>(These results are from a large number (&amp;gt; 100) of non-correlated runs. <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>auditd</code> allocates at boot, so this was done by rebooting between each run.)</p>\n\n<p>From these numbers, UEK5 is far from a flat distribution, and UEK6 is somewhat worse, but not dispositively so. Additionally, a slight imbalance will not cause performance degradation: that happens only after cache conflicts kick in, which is after cache-set population crosses the associativity threshold.</p>\n\n<p>To validate this, we measure how well cycles correlate[6] with 1) with L1d-misses, and 2) cacheline-spread:</p>\n<style type=\&quot;text/css\&quot;>.divTable {\n  display: table;\n  width: 80%;\n}\n.divTableRow {\n  display: table-row;\n}\n.divTableHeading {\n  display: table-header-group;\n  background-color: #ddd;\n  font-weight: bold;\n}\n.divTableCell {\n  display: table-cell;\n  padding: 3px 10px;\n  border: 1px solid #999999;\n}\n</style>\n<p>&amp;nbsp;</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Kernel</div>\n\n<div class=\&quot;divTableCell\&quot;>cycles:L1d-misses</div>\n\n<div class=\&quot;divTableCell\&quot;>cycles:cacheline-spread</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>UEK5</div>\n\n<div class=\&quot;divTableCell\&quot;>0.74</div>\n\n<div class=\&quot;divTableCell\&quot;>0.22</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>UEK6</div>\n\n<div class=\&quot;divTableCell\&quot;>0.74</div>\n\n<div class=\&quot;divTableCell\&quot;>0.61</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>For both UEK5 and UEK6, &amp;ldquo;cycles:L1d-misses&amp;rdquo; is tightly correlated (though the value of 0.74 for both is happenstance) which makes sense. &amp;ldquo;cycles:cacheline-spread&amp;rdquo;, however, is well correlated only on UEK6, not UEK5. This suggests that the UEK6 allocator skew is meaningfully worse, enough to cause lower performance.</p>\n\n<p>Alright, having beaten this dead horse enough, let&amp;rsquo;s figure out how to fix it next[7].</p>\n\n<h2 id=\&quot;speeding-it-up\&quot;>Speeding it up</h2>\n\n<p>To get back our lost performance, our task is simple: optimize a hot-loop[8] which is itself executed in the hot syscall path. Compounding the problem, the critical load in the loop is accessed via a linked list.</p>\n\n<p>Stated like that, it sounds pretty bad. But, as we will see the structure of the problem helps quite a bit:</p>\n\n<ol type=\&quot;1\&quot;>\n\t<li>On a sane system, the common-case is extremely common, syscalls are frequent, and audit logging is unusual. This means that low branch mispreds are not unusual and something we might even depend on.</li>\n\t<li>We are optimizing a no-op loop: the loop walks a bunch of rules, does error checking, and decides if it needs to log. In the common-case, it will conclude that it doesn&amp;rsquo;t. (This is really (1) restated to stress the no-op nature of the loop.)</li>\n</ol>\n\n<p>A no-op loop implies that the code does not actually care about most of the values it computes. It just inches towards a foregone conclusion.</p>\n\n<p>This it does (as all code does) by means of dependency chains that transform the input state to output. Here, most dependency chains are short and, are really <em>only used to predict the control flow</em>. The only long dependency chain, woven through all the loop iterations, is the one walking the linked-list.</p>\n\n<p>Now, critically since the branches are predicted perfectly or almost so, the control flow can run quite a bit further than any loads and dependent computation. The control flow thus essentially feeds these loads and other instructions to the ROB, where they wait until resources/dependencies become available, compute the output from their chain which, to reiterate, will only be used to predict the control flow.</p>\n\n<p>Given that the control flow is already feeding instructions from the correct direction, these are in effect orphan chains that eventually retire without anyone having cared for the output they compute or how long that took.</p>\n\n<p>Except: this happy state continues only until we run into a resource constraint. For instance, the size of the ROB on Skylake-X is 224 entries and each loop iteration is ~20 instructions. This means instructions worth around 10 loop iterations can be present in the ROB. Now, given that instructions retire on x86 in-order, long running instructions (L1d-load-misses of course, but also L1d-load hits[9]) with long dependence chains would slow retirement down, even were control-flow to be predicted perfectly.</p>\n\n<p>Bearing these observations in mind, our fixes will try to reduce the amount and cost of work per loop iteration. This allows the loop to retire as close to the gating latency of any long running instructions in the loop.</p>\n\n<h3 id=\&quot;cache-ctx-major-in-audit_filter_syscall\&quot;>Cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -785,13 +785,14 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n {\n        struct audit_entry *e;\n        enum audit_state state;\n+       unsigned long major = ctx-&amp;gt;major;\n\n        if (auditd_test_task(tsk))\n                return AUDIT_DISABLED;\n\n        rcu_read_lock();\n        list_for_each_entry_rcu(e, list, list) {\n-               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, ctx-&amp;gt;major) &amp;amp;&amp;amp;\n+               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, major) &amp;amp;&amp;amp;\n                    audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n                                       &amp;amp;state, false)) {\n                        rcu_read_unlock();</pre>\n\n<p>Caching <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in a local variable helps in two ways:</p>\n\n<ul>\n\t<li>Explicitly indicates to the compiler that there are no stores to the cached value. <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> operates on <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> doing some bit-shifting and error checking. Now that the compiler knows that <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>major</code> is not modified, it can hoist most of that logic out of the loop so it is not reevaluated over-and-over in every loop iteration.</li>\n\t<li>As described earlier, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_context</code> has similar natural alignment concerns as <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>struct audit_entry</code>. Allowing the compiler to cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code> in a register (or on the stack) reduces one potential source of contention.</li>\n</ul>\n\n<p>With this change the number of instructions executed/loop-iteration reduce by 8 (of 20.) Note that most of those were almost free ALU instructions.</p>\n\n<p>L1d-loads: we removed one L1d-load but added two (due to the compiler now spilling and reloading some state to/from the stack.) However, given that stack accesses are much less likely to have conflicting alignment constraints, the increased loads are less of a concern than the one we got rid of.</p>\n\n<p>cycles: improve by about 40 cycles. This is because the greater room in the ROB allows our almost perfect branch prediction to speculatively run even further ahead of other instructions.</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>baseline</div>\n\n<div class=\&quot;divTableCell\&quot;>196.26</div>\n\n<div class=\&quot;divTableCell\&quot;>212.00</div>\n\n<div class=\&quot;divTableCell\&quot;>207.80</div>\n\n<div class=\&quot;divTableCell\&quot;>240.52</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major</div>\n\n<div class=\&quot;divTableCell\&quot;>183.50</div>\n\n<div class=\&quot;divTableCell\&quot;>201.41</div>\n\n<div class=\&quot;divTableCell\&quot;>198.80</div>\n\n<div class=\&quot;divTableCell\&quot;>226.93</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>From the min-max range, there is a rather large variation in latency that&amp;rsquo;s caused by variations in allocation resulting in high or low cacheline-spread. In almost all cases though, the latency improves by ~10ns or thereabouts.</p>\n\n<p>That said, after removing 8 instructions and one load (and adding two less consequential loads), the performance gain is rather miniscule: ~1 cycle/iteration. Just that the loop executes 37 times, so we make it up in volume.</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/87a39a3d2ca9a5c7e4d35e4cf4b839c53cc0678d\&quot;>UEK6 commit-1</a> and in <a href=\&quot;https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=069545997510833281f45f83e097017b9fef19b7\&quot;>Upstream commit-1</a>.</p>\n\n<h3 id=\&quot;annotate-branch-direction-for-audit_in_mask\&quot;>Annotate branch direction for <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -790,12 +790,13 @@ static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n         rcu_read_lock();\n         list_for_each_entry_rcu(e, list, list) {\n -               if (audit_in_mask(&amp;amp;e-&amp;gt;rule, major) &amp;amp;&amp;amp;\n -                   audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n -                                      &amp;amp;state, false)) {\n                                        ...\n +               if (unlikely(audit_in_mask(&amp;amp;e-&amp;gt;rule, major))) {\n +                       if (audit_filter_rules(tsk, &amp;amp;e-&amp;gt;rule, ctx, NULL,\n +                                              &amp;amp;state, false)) {</pre>\n\n<p>Annotate <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_in_mask()</code> as <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>unlikely()</code> to allow the compiler to pessimize the call to <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_rules()</code>. Two reasons for this change:</p>\n\n<ul>\n\t<li>The primary motivation was to get rid of the extra branch mispred. This change succeeds in that task but it is unclear why: there&amp;rsquo;s no significant change in the basic-block structure. The only change is from a branch inversion due to the unlikely clause.</li>\n\t<li>The branch inversion means that the not-taken direction is chosen more often: 32/37 times (changing from 5/37 earlier.) The issue-latency for not-taken branches is 0.5-1 cycles, for taken branches 1-2 cycles[10] is slightly cheaper.</li>\n</ul>\n\n<p>L1d-loads: reduce by 2 for each loop iteration. This is because the spills and reloads introduced in the &amp;ldquo;Cache <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>ctx-&amp;gt;major</code>&amp;hellip;&amp;rdquo; patch have now been shifted to the unlikely path (the prologue and epilogue of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_rules()</code> call.)</p>\n\n<p>cycles: performance improves on average by ~30 cycles/call.</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major</div>\n\n<div class=\&quot;divTableCell\&quot;>183.50</div>\n\n<div class=\&quot;divTableCell\&quot;>201.41</div>\n\n<div class=\&quot;divTableCell\&quot;>198.80</div>\n\n<div class=\&quot;divTableCell\&quot;>226.93</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot</div>\n\n<div class=\&quot;divTableCell\&quot;>165.26</div>\n\n<div class=\&quot;divTableCell\&quot;>188.72</div>\n\n<div class=\&quot;divTableCell\&quot;>184.25</div>\n\n<div class=\&quot;divTableCell\&quot;>230.34</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/0288dbdbfb5768ad8ae8a445c72f523bcb99eca0\&quot;>UEK6 commit-2</a>.</p>\n\n<h3 id=\&quot;remove-static-linkage-from-audit_filter_syscall\&quot;>Remove static linkage from <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code></h3>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n@@ -777,7 +777,7 @@ static bool audit_in_mask(const struct audit_krule *rule, unsigned long\n   * also not high enough that we already know we have to write an audit\n   * record (i.e., the state is AUDIT_SETUP_CONTEXT or AUDIT_BUILD_CONTEXT).\n   */\n -static enum audit_state audit_filter_syscall(struct task_struct *tsk,\n +enum audit_state audit_filter_syscall(struct task_struct *tsk,\n                                              struct audit_context *ctx,\n                                              struct list_head *list)</pre>\n\n<p><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> is only used locally in the file and so is marked <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>static</code>. Additionally, it&amp;rsquo;s only ever called with a fixed <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list</code> value of <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>&amp;amp;audit_filter_list[AUDIT_FILTER_EXIT])</code>.</p>\n\n<p>GCC&amp;rsquo;s constant propagation pass makes use of these two things to, quite reasonably, const-propagate the third argument to the point of use.</p>\n\n<p>This causes the exit check in the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list_for_each</code> loop to look like this:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall.constprop.18(task, ctx):\n   0:       48 8b 1b                mov    (%rbx),%rbx\n   3:       48 81 fb e0 67 ac 82    cmp    $0xffffffff82ac67e0,%rbx\n                    ffffffff8118b5ed: R_X86_64_32S  audit_filter_list+0x40\n  10:       75 e2                   jne    start_iter</pre>\n\n<p>while, without const-propagation it would have looked like this:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\naudit_filter_syscall(task, ctx, list):\n   0:       48 8b 1b                mov    (%rbx),%rbx\n   3:       4c 39 e3                cmp    %r12,%rbx\n   6:       75 e6                   jne    start_iter</pre>\n\n<p>Now either one ought to be alright, both <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp imm32,r</code> and <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp r,r</code> forms are equivalent with a latency of 1 cycle, and both are a single micro-op each.</p>\n\n<p>The second form of the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>cmp</code>, however, can be macro-op fused with the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>jne</code>; not entirely sure if the first form can be[11]. The second form is also denser, though that&amp;rsquo;s not a concern here.</p>\n\n<p>Disallowing GCC from making assumptions about calling contexts by removing the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>static</code> linkage from <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>audit_filter_syscall()</code> forces it to pass the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>list</code> parameter in a register which results in a small performance improvement: ~20 cycles (about 0.5 cycles/loop iteration.)</p>\n\n<p>Change in latency for UEK6:</p>\n\n<div class=\&quot;divTable\&quot;>\n<div class=\&quot;divTableHeading\&quot;>\n<div class=\&quot;divTableCell\&quot;>Version</div>\n\n<div class=\&quot;divTableCell\&quot;>Min<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Mean<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Median<br />\n(ns)</div>\n\n<div class=\&quot;divTableCell\&quot;>Max<br />\n(ns)</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot</div>\n\n<div class=\&quot;divTableCell\&quot;>165.26</div>\n\n<div class=\&quot;divTableCell\&quot;>188.72</div>\n\n<div class=\&quot;divTableCell\&quot;>184.25</div>\n\n<div class=\&quot;divTableCell\&quot;>230.34</div>\n</div>\n\n<div class=\&quot;divTableRow\&quot;>\n<div class=\&quot;divTableCell\&quot;>ctx-&amp;gt;major+annot+extern</div>\n\n<div class=\&quot;divTableCell\&quot;>159.88</div>\n\n<div class=\&quot;divTableCell\&quot;>184.35</div>\n\n<div class=\&quot;divTableCell\&quot;>177.62</div>\n\n<div class=\&quot;divTableCell\&quot;>250.82</div>\n</div>\n</div>\n\n<p>&amp;nbsp;</p>\n\n<p>More details (<code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf-stat</code> and the before/after versions of the generated code) in <a href=\&quot;https://github.com/oracle/linux-uek/commit/5a74015e20bff63d1052359fbc2c3418e0f6bc4e\&quot;>UEK6 commit-3</a> and, <a href=\&quot;https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/kernel/auditsc.c?id=50979953c0c41e929e5f955800da68e1bb24c7ab\&quot;>Upstream commit-3</a>.</p>\n\n<h2 id=\&quot;summary\&quot;>Summary</h2>\n\n<p>The audit subystem is fairly stable in the Linux kernel, not given to frequent changes. So it was puzzling when it became slower in recent kernels, and because a primary user is the syscall path, concerning[12].</p>\n\n<p>The cause turned out to be higher skew in allocated buffers which results in more lopsided cache-set distribution.</p>\n\n<p>The fixes compensate for the higher costs in the loop by taking advantage of the peculiarities of the execution path and optimizing for the speculative nature of the CPU pipeline.</p>\n\n<p>The three patches, in sum reduce the overhead by about 30ns (~100 cycles).</p>\n\n<p>Final <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>perf stat -d -r 5</code> go from:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  761.65  (  +- 5.22% )\ninstructions           1639.17  (  +- 0.00% )\nIPC                       2.18  (  +- 5.50% )\nbranches                328.21  (  +- 0.00% )\nbranch-misses             1.37  (  +- 6.56% )\nL1-dcache-loads         404.35  (  +- 0.00% )\nL1-dcache-load-misses     7.99  (  +- 70.71% )</pre>\n\n<p>to:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  669.09  (  +- 11.23% )\ninstructions           1342.04  (  +-  0.00% )\nIPC                       2.03  (  +-  9.85% )\nbranches                328.19  (  +-  0.00% )\nbranch-misses             0.56  (  +-  5.35% )\nL1-dcache-loads         384.31  (  +-  0.00% )\nL1-dcache-load-misses     5.77  (  +- 84.57% )</pre>\n\n<p>This compares quite well to the UEK5-baseline:</p>\n\n<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\n# perf stat -d -r 5 ./getpid\n# output normalized for a single getpid() call\n\ncycles                  672.90  (  +-  1.65% )\ninstructions           1622.08  (  +-  0.00% )\nIPC                       2.41  (  +-  1.65% )\nbranches                321.20  (  +-  0.00% )\nbranch-misses             0.51  (  +-  0.00% )\nL1-dcache-loads         401.32  (  +-  0.00% )\nL1-dcache-load-misses     2.28  (  +- 59.62% )</pre>\n\n<p>Note for non-Skylake-X architectures: Intel Icelake and AMD Milan (the other architectures tested) cope with L1d-load-misses much better so the baseline performance is much better.</p>\n\n<p>With these patches, they only show a small improvement (~10ns): Icelake has a bigger L1d-cache (48K), and a much bigger ROB. Milan also has a bigger ROB and does memory renaming and bunch of other pipeline optimizations that limit the effect of these optimizations.</p>\n\n<p><strong>Endnote:</strong> what I found personally instructive was how much C really is &amp;ldquo;a portable assembler&amp;rdquo; and the significant codegen (and performance) changes that can result from minimal changes to the code.</p>\n\n<h2 id=\&quot;references\&quot;>References</h2>\n\n<ol type=\&quot;1\&quot;>\n\t<li>\n\t<p><code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>getpid()</code> has a minimal kernel execution path (only does a PID lookup), and so is generally used to measure the overhead of the syscall path.</p>\n\t</li>\n\t<li>\n\t<p>Comparing the IPC for the audit-only portion shows that a starker drop:</p>\n\n\t<pre class=\&quot;brush: bash;\&quot; style=\&quot;background:#eeeeee;border:1px solid #cccccc;padding:5px 10px;\&quot;>\nUEK5: 1427.0  instructions  #  3.41  insn per cycle\nUEK6: 1432.0  instructions  #  2.84  insn per cycle</pre>\n\t</li>\n\t<li>\n\t<p>Alas no, alias analysis is an undecidable problem.</p>\n\t</li>\n\t<li>\n\t<p>Or for that matter, what causes the extra branch-miss.</p>\n\t</li>\n\t<li>\n\t<p>Another possibility is out-of-line code -- frequent interrupts, vmexits etc -- trashing the cache but from profiling these were a non-issue.</p>\n\t</li>\n\t<li>\n\t<p>Measured using the pearson-quotient(x, y): correlation coefficient between quantities x and y.</p>\n\t</li>\n\t<li>\n\t<p>You might notice that this analysis does not address the extra branch-miss. That&amp;#39;s because I still have no clue what causes it.</p>\n\t</li>\n\t<li>\n\t<p>The correct fix would be to fix whatever ails the allocator. However, from a quick look at the changes that have gone into related code, it seems non-trivial to find a particular commit which points to the root cause of the skew (especially given that the skew is not constant, but varies from run-to-run.) Also, notably, the fixes described below also apply to UEK5, which means that even if UEK6 becomes faster, UEK5 will also improve somewhat.</p>\n\t</li>\n\t<li>\n\t<p>As mentioned in <a href=\&quot;#cpu-parameters\&quot;>CPU-parameters</a>, L1d-loads take 4-6 cycles on Skylake-X. We also know that in the good case (UEK5), this loop is capable of an IPC of 3.41 insn per cycle. So, hiding L1d-load latency is critical for good performance.</p>\n\t</li>\n\t<li>\n\t<p><a href=\&quot;https://www.agner.org/optimize/instruction_tables.pdf%3E\&quot;>https://www.agner.org/optimize/instruction_tables.pdf</a>, pg 298 (Skylake-X)</p>\n\t</li>\n\t<li>\n\t<p>The first form fused, needs three inputs: <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%rbx</code>, an <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm32</code> encoding the distance to the address being compared, and an <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm8</code> encoding the distance to the branch-dest; the second needs two registers: <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%rbx</code>, <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>%r12</code> and only the <code style=\&quot;background:#eeeeee;border:1px solic #cccccc;\&quot;>imm8</code>.</p>\n\t</li>\n\t<li>\n\t<p>Just for context, a kernel build (x86-defconfig) makes an aggregate of 27M syscalls, with a syscall every 44us.</p>\n\t</li>\n</ol>\n&quot;,&quot;translate&quot;:false,&quot;og_description&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;featuredpost&quot;:false,&quot;audience&quot;:null,&quot;product&quot;:null,&quot;comments&quot;:true,&quot;meta_title&quot;:&quot;Syscall latency...  and some uses of speculative execution&quot;,&quot;time_to_read&quot;:23,&quot;author&quot;:[{&quot;id&quot;:&quot;COREF415334566DE45208D79D6CD6FA88629&quot;,&quot;type&quot;:&quot;Blog-Author&quot;,&quot;typeCategory&quot;:&quot;ContentType&quot;,&quot;links&quot;:[]}],&quot;translated-pages&quot;:null,&quot;canonical_url&quot;:null,&quot;featured_image&quot;:{&quot;id&quot;:&quot;CONTCF8836A82B014903A5283C76DE901346&quot;,&quot;type&quot;:&quot;DigitalAsset&quot;,&quot;typeCategory&quot;:&quot;DigitalAssetType&quot;,&quot;links&quot;:[]},&quot;og_image&quot;:{&quot;id&quot;:&quot;CONTCF8836A82B014903A5283C76DE901346&quot;,&quot;type&quot;:&quot;DigitalAsset&quot;,&quot;typeCategory&quot;:&quot;DigitalAssetType&quot;,&quot;links&quot;:[]},&quot;meta_description&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;meta_robots&quot;:&quot;index, follow&quot;,&quot;primary_channel&quot;:&quot;linux&quot;,&quot;globalhomepagefeaturedpost&quot;:false,&quot;publish_date&quot;:{&quot;value&quot;:&quot;2023-09-12T15:00:00.000Z&quot;,&quot;timezone&quot;:&quot;UTC&quot;,&quot;formated&quot;:&quot;September 12, 2023&quot;},&quot;desc&quot;:&quot;An in-depth exploration into why syscall latency increased on some x86 kernels in recent kernels. &quot;,&quot;author_id&quot;:&quot;CORE8B88E20204C04A0DADCEBC0499683C49&quot;,&quot;categories&quot;:[{&quot;category&quot;:&quot;Technologies&quot;,&quot;pageUrl&quot;:&quot;../category/lnx-technologies&quot;},{&quot;category&quot;:&quot;Linux Kernel Development&quot;,&quot;pageUrl&quot;:&quot;../category/lnx-linux-kernel-development&quot;}]}}"></div>
         1248 <!--  -->
         1249 <script>
         1250     window.SCSMacros = window.SCSMacros || {};
         1251     var pubdate = document.getElementById("pubdate").value;
         1252     window.SCSMacros.getCreationdateMacro = pubdate;
         1253    
         1254 
         1255 </script>
         1256 <script>
         1257     if (SCS && SCS['siteId'] === "Blogs-Home") {
         1258         var Primarychannel = document.getElementById("primarychannel").value;
         1259         if(Primarychannel != 'undefined' && Primarychannel != 'Blogs-Home'){
         1260         var slug = window.location.pathname.split('/')[2];
         1261         var domain = window.location.origin;
         1262         if(domain === "https://blogs.oracle.com"){
         1263              window.location.replace("https://blogs.oracle.com/"+Primarychannel+"/post/"+slug); 
         1264         }else{
         1265             window.location.replace("https://blogs-stage.oracle.com/"+Primarychannel+"/post/"+slug); 
         1266         }
         1267     }
         1268     }
         1269 </script>
         1270 
         1271 <script type="text/javascript">
         1272     /*! ORACLE - TRACKING URL */
         1273     $(document).ready(function() {
         1274         const oracleLinks = document.querySelectorAll('a[href*="go.oracle.com"][data-trackas]');
         1275 
         1276 oracleLinks.forEach(link => {
         1277   const url = new URL(link.href);
         1278 
         1279   if (!url.searchParams.has('source') && !url.searchParams.has('src1')) {
         1280     const currentUrlParams = new URLSearchParams(window.location.search);
         1281     const existingUrlParams = new URLSearchParams(url.search);
         1282 
         1283     if (currentUrlParams.has('source')) {
         1284       currentUrlParams.set('src1', currentUrlParams.get('source'));
         1285       currentUrlParams.delete('source');
         1286     }
         1287 
         1288     let mergedParams = existingUrlParams.toString();
         1289     if (mergedParams) {
         1290       mergedParams += '&';
         1291     } else {
         1292       mergedParams += '?';
         1293     }
         1294     mergedParams += currentUrlParams.toString();
         1295 
         1296     url.search = mergedParams;
         1297 
         1298     link.href = decodeURIComponent(url.toString());
         1299    
         1300   }
         1301 });
         1302     });
         1303 </script>
         1304 
         1305                                 </div>
         1306                         </div>
         1307                 </div>
         1308         </div>
         1309 </div>
         1310 </div></div></div></div></div>
         1311                 <!-- <div class="scs-slot" id="category-id"></div> -->
         1312                 <!-- RH03v5 -->
         1313                 <div id="Next-Previous-Posts" class="scs-slot"><div class="scs-row"><div class="scs-col" style="width: 100%;"><div id="a057a3dc-2397-4b35-88dc-e9904a3f1789"><div class="scs-component-bounding-box"><!-- -->
         1314 <div>
         1315         <div class="scs-custom-component scs-component scs-component-default-style" style="margin-top:5px;margin-right:5px;margin-bottom:5px;margin-left:5px;">
         1316                 <div class="scs-component-content" style="width:100%;">
         1317                         <div style="" class="scs-custom-component-wrapper">
         1318                                 <div id="a057a3dc-2397-4b35-88dc-e9904a3f1789customComponentDiv" data-scs-hydrate="true" data-scs-contenttype="Blog-Post" data-asset-operation="view:CORE8B88E20204C04A0DADCEBC0499683C49">
         1319                                         <section class="rc83 rc83v0 rw-neutral-00bg cpad xwidth">
         1320     <div class="rc83w1 cwidth">
         1321         <div class="rc83pagenav">
         1322             <div class="rc83nav-lt">
         1323                                 <a href="oracle-linux-automation-manager-21" class="rc83arrow-lt">
         1324                     <div class="icn-img icn-chevron-left"><br></div>
         1325                     <p id="PreviousPostText">Previous Post</p>
         1326                 </a>
         1327                 <h4>Discover the Latest Advancements in Automation with Oracle Linux Automation Manager 2.1</h4>
         1328                 <div class="rc83sub">
         1329                     <span><a href="/authors/monica-s">Monica S</a> | </span><span>3</span><span> min read</span>
         1330                 </div>
         1331              </div>
         1332             <div class="rc83nav-rt">
         1333                 <a href="get-inspired-at-oracle-cloudworld-2023hear-from-customers-technical-industry-experts-and-executives-and-get-your-questions-answered" class="rc83arrow-rt">
         1334                     <p id="NextPostText">Next Post</p>
         1335                     <div class="icn-img icn-chevron-right"><br></div>
         1336                 </a>
         1337                 <h4>Get inspired at Oracle CloudWorld 2023—hear from customers, technical industry experts, and executives and get your questions answered</h4>
         1338                 <div class="rc83sub">
         1339                     <span><a href="/authors/michele-resta">Michele Resta</a> | </span><span>3</span><span> min read</span>
         1340                 </div>
         1341              </div>
         1342         </div>
         1343     </div>
         1344 </section>
         1345 
         1346                                 </div>
         1347                         </div>
         1348                 </div>
         1349         </div>
         1350 </div>
         1351 </div></div></div></div></div>
         1352                 <!-- <div class="scs-slot" id="recent-posts"></div> -->
         1353                 <!-- U10v6 -->
         1354                 <div class="u10 u10v6" data-trackas="ffooter" data-ocomid="redwood">
         1355 
         1356                         <div class="u10w1">
         1357 
         1358                                 <div class="u10w2">
         1359                                         <div class="u10w3">
         1360                                                 <h5>Resources for</h5>
         1361                                                 <ul>
         1362                                                         <li><a href="https://www.oracle.com/corporate/">About</a></li>
         1363                                                         <li><a href="https://www.oracle.com/corporate/careers/"
         1364                                                                         data-lbl="about-oracle:careers">Careers</a></li>
         1365                                                         <li><a href="https://developer.oracle.com">Developers</a></li>
         1366                                                         <li><a href="https://investor.oracle.com/home/default.aspx">Investors</a></li>
         1367                                                         <li><a href="https://www.oracle.com/partner/">Partners</a></li>
         1368                                                         <li><a href="https://www.oracle.com/startup/">Startups</a></li>
         1369                                                 </ul>
         1370                                         </div>
         1371                                 </div>
         1372                                 <div class="u10w2">
         1373                                         <div class="u10w3">
         1374                                                 <h5>Why Oracle</h5>
         1375                                                 <ul>
         1376                                                         <li><a href="https://www.oracle.com/corporate/analyst-reports.html">Analyst Reports</a></li>
         1377                                                         <li><a href="https://www.oracle.com/cx/what-is-crm/ ">Best CRM</a></li>
         1378                                                         <li><a href="https://www.oracle.com/cloud/economics/">Cloud Economics</a></li>
         1379                                                         <li><a href="https://www.oracle.com/corporate/citizenship/">Corporate Responsibility</a>
         1380                                                         </li>
         1381                                                         <li><a href="https://www.oracle.com/corporate/careers/diversity-inclusion/">Diversity and
         1382                                                                         Inclusion</a></li>
         1383                                                         <li><a href="https://www.oracle.com/corporate/security-practices/">Security Practices</a>
         1384                                                         </li>
         1385                                                 </ul>
         1386                                         </div>
         1387                                 </div>
         1388                                 <div class="u10w2">
         1389                                         <div class="u10w3">
         1390                                                 <h5>Learn</h5>
         1391                                                 <ul>
         1392                                                         <li><a href="https://www.oracle.com/cx/service/what-is-customer-service/ ">What is Customer
         1393                                                                         Service?</a></li>
         1394                                                         <li><a href=" https://www.oracle.com/erp/what-is-erp/">What is ERP?</a></li>
         1395                                                         <li><a
         1396                                                                         href=" https://www.oracle.com/cx/marketing/automation/what-is-marketing-automation/ ">What
         1397                                                                         is Marketing Automation?</a></li>
         1398                                                         <li><a href="https://www.oracle.com/erp/what-is-procurement/ ">What is Procurement?</a></li>
         1399                                                         <li><a
         1400                                                                         href="https://www.oracle.com/human-capital-management/talent-management/what-is-talent-management/ ">What
         1401                                                                         is Talent Management?</a></li>
         1402                                                         <li><a
         1403                                                                         href=" https://www.oracle.com/cloud/compute/virtual-machines/what-is-virtual-machine/ ">What
         1404                                                                         is VM?</a></li>
         1405                                                 </ul>
         1406                                         </div>
         1407                                 </div>
         1408                                 <div class="u10w2">
         1409                                         <div class="u10w3">
         1410                                                 <h5>What's New</h5>
         1411                                                 <ul>
         1412                                                         <li><a
         1413                                                                         href="https://www.oracle.com/cloud/free/?source=:ow:o:h:nav:050120SiteFooter&intcmp=:ow:o:h:nav:050120SiteFooter">Try
         1414                                                                         Oracle Cloud Free Tier</a></li>
         1415                                                         <li><a href="https://www.oracle.com/solutions/green/">Oracle Sustainability</a></li>
         1416                                                         <li><a href="https://www.oracle.com/corporate/covid-19.html ">Oracle COVID-19 Response</a>
         1417                                                         </li>
         1418                                                         <li><a href="https://www.oracle.com/sailgp/">Oracle and SailGP</a></li>
         1419                                                         <li><a href="https://www.oracle.com/premier-league/">Oracle and Premier League</a></li>
         1420                                                         <li><a href="https://www.oracle.com/redbullracing/">Oracle and Red Bull Racing Honda</a>
         1421                                                         </li>
         1422                                                 </ul>
         1423                                         </div>
         1424                                 </div>
         1425                                 <div class="u10w2">
         1426                                         <div class="u10w3">
         1427                                                 <h5>Contact Us</h5>
         1428                                                 <ul>
         1429                                                         <li><a href="tel:18006330738">US Sales 1.800.633.0738</a></li>
         1430                                                         <li><a href="https://www.oracle.com/corporate/contact/">How can we help?</a></li>
         1431                                                         <li><a href="https://go.oracle.com/subscriptions">Subscribe to Oracle Content</a></li>
         1432                                                         <li><a
         1433                                                                         href="https://www.oracle.com/cloud/free/?source=:ow:o:h:nav:050120SiteFooter&intcmp=:ow:o:h:nav:050120SiteFooter">Try
         1434                                                                         Oracle Cloud Free Tier</a></li>
         1435                                                         <li><a href="https://www.oracle.com/events/">Events</a></li>
         1436                                                         <li><a href="https://www.oracle.com/news/" data-lbl="news-events:newsroom">News</a></li>
         1437                                                 </ul>
         1438                                         </div>
         1439                                 </div>
         1440                                 <div class="u10w4">
         1441                                         <hr />
         1442                                 </div>
         1443 
         1444                                 <div class="u10w5 ">
         1445                                         <ul class="u10-links u10w10">
         1446                                                 <li><a href="https://www.oracle.com/legal/copyright.html" data-lbl="copyright">&#169; 2022 Oracle</a></li>
         1447                                                 
         1448                                                 <li><a data-lbl="privacy"
         1449                                                                 href="https://www.oracle.com/legal/privacy/">Privacy</a><span>/</span><a
         1450                                                                 data-lbl="do-not-sell-my-info"
         1451                                                                 href="https://www.oracle.com/legal/privacy/privacy-choices.html">Do Not Sell My Info</a>
         1452                                                 </li>
         1453                                                 <li>
         1454                                                         <div id='teconsent'> </div>
         1455                                                 </li>
         1456                                                 <li><a href="https://www.oracle.com/legal/privacy/privacy-policy.html#advertising"
         1457                                                                 data-lbl="ad-choices">Ad Choices</a></li>
         1458                                                 <li><a href="https://www.oracle.com/corporate/careers/" data-lbl="careers">Careers</a></li>
         1459                                         </ul>
         1460 
         1461                                 </div>
         1462                         </div>
         1463 
         1464                 </div>
         1465                 <!-- /U10v6 -->
         1466 
         1467 
         1468         </div>
         1469         <script type="text/javascript" src="https://www.oracle.com/us/assets/metrics/ora_compendiumblogs.js"></script>
         1470         <script type="text/javascript" src="https://www.oracle.com/assets/truste-oraclelib.js"></script>
         1471         <script async="async" type="text/javascript"
         1472                 src="//consent.trustarc.com/notice?domain=oracle.com&c=teconsent&js=bb&noticeType=bb&text=true&gtm=1&cdn=1&pcookie"
         1473                 crossorigin=""></script>
         1474         <script type="text/javascript" src="../_cache_8b25/siteinfo-common.js" charset="utf-8"></script><script type="text/javascript" src="../siteinfo-dynamic.js"></script>
         1475         <script src="../_cache_8b25/_sitesclouddelivery/renderer/renderer.js"></script>
         1476         
         1477         
         1478 
         1479 
         1480         <script src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/blogs-script.js"></script>
         1481         <script src="../_cache_8b25/_themesdelivery/Blogs-New-Theme/assets/js/oracle-script.js"></script>
         1482 
         1483         <!-- Avoid FOUC issue in FF with async loading of style sheets -->
         1484         <style>
         1485                 body {
         1486                         opacity: 1;
         1487                 }
         1488         </style>
         1489 
         1490         <script type="text/javascript">
         1491                 $(document).ready(function () {
         1492                         $('a[data-lbl="copyright"]').html("&copy; " + new Date().getFullYear() +" Oracle " );
         1493                                 
         1494                         });
         1495         </script>
         1496         <!--DTM/Launch embed code - Footer -->
         1497 
         1498 </body>
         1499 
         1500