diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/c_cpp_properties.json b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000000..2b8e56ba4b
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/c_cpp_properties.json
@@ -0,0 +1,17 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "compilerPath": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+            "compilerArgs": [ "-fsycl" ],
+            "cStandard": "gnu17",
+            "cppStandard": "gnu++17",
+            "intelliSenseMode": "linux-gcc-x64"
+        }
+    ],
+    "version": 4
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/launch.json b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/launch.json
new file mode 100644
index 0000000000..df305ab886
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/launch.json
@@ -0,0 +1,35 @@
+{
+    "configurations": [
+    {
+        "name": "C/C++: Intel icpx build and debug gaussian_blur",
+        "type": "cppdbg",
+        "request": "launch",
+        "program": "${workspaceFolder}/bin/${config:programName}_d",
+        "args": ["${workspaceFolder}/bin/sample_image.jpg"],
+        "stopAtEntry": true,
+        "cwd": "${fileDirname}",
+        "environment": [],
+        "externalConsole": false,
+        "MIMode": "gdb",
+        "setupCommands": [
+            {
+                "description": "Enable pretty-printing for gdb",
+                "text": "-enable-pretty-printing",
+                "ignoreFailures": true
+            },
+            {
+                "description":  "Set Disassembly Flavor to Intel",
+                "text": "-gdb-set disassembly-flavor intel",
+                "ignoreFailures": true
+            },
+            {
+                "description": "Needed by Intel oneAPI: Disable target async",
+                "text": "set target-async off",
+                "ignoreFailures": true
+            }
+        ],
+        "preLaunchTask": "gaussian_blur Debug C/C++: Intel icpx build active file",
+        "miDebuggerPath": "/opt/intel/oneapi/debugger/latest/gdb/intel64/bin/gdb-oneapi"
+    }
+    ]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/settings.json b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/settings.json
new file mode 100644
index 0000000000..d7ab33f09f
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "programName": "gaussian_blur"
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/tasks.json b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/tasks.json
new file mode 100644
index 0000000000..70aca5a0ce
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/.vscode/tasks.json
@@ -0,0 +1,49 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "cppbuild",
+			"label": "gaussian_blur Debug C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-fdiagnostics-color=always",
+				"-fsycl-device-code-split=per_kernel",
+				"-fno-limit-debug-info",
+				"-g",
+				"-O0",
+				"${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}_d"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/icpx"
+		},
+		{
+			"type": "cppbuild",
+			"label": "gaussian_blur Release C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-DNDEBUG",
+                "${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/icpx"
+		}
+	]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/CMakeLists.txt b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/CMakeLists.txt
new file mode 100644
index 0000000000..9a59e896ed
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/CMakeLists.txt
@@ -0,0 +1,16 @@
+if(WIN32)
+        set(CMAKE_CXX_COMPILER "dpcpp-cl")
+        set(CMAKE_C_COMPILER "dpcpp-cl")
+else()
+        set(CMAKE_CXX_COMPILER "icpx")
+endif()
+set(CMAKE_CXX_STANDARD 17)
+if(NOT DEFINED ${CMAKE_BUILD_TYPE})
+	set(CMAKE_BUILD_TYPE "RELEASE")
+endif()
+if( CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
+endif()
+cmake_minimum_required (VERSION 3.4)
+project (gaussian_blur)
+add_subdirectory (src)
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image-blurred.png b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image-blurred.png
new file mode 100644
index 0000000000..6decbd0b6c
Binary files /dev/null and b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image-blurred.png differ
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image.jpg b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image.jpg
new file mode 100644
index 0000000000..be810ad43e
Binary files /dev/null and b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/Images/sample_image.jpg differ
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/LICENSE.txt b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/LICENSE.txt
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/README.md b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/README.md
new file mode 100644
index 0000000000..06f7782232
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/README.md
@@ -0,0 +1,101 @@
+# Image Gaussian Blur example program
+
+## Purpose
+This SYCL code example implements a Gaussian blur filter, blurring
+either a JPG or PNG image from the command line. The original file is not modified.
+The output file is in a PNG format.
+
+__Output Image:__
+
+
+
+
+## Prerequisites
+
+| Minimum Requirements              | Description
+|:---                               |:---
+| OS                                | Linux* Ubuntu* 20.04.5 LTS
+| Hardware                          | Intel® 11th Gen Intel Core i7-1185G7 + Mesa Intel Xe Graphics
+| Compiler Toolchain                | Visual Studio Code IDE, Intel oneAPI Base Toolkit (inc its prerequisite)
+| Libraries                         | Install Intel oneAPI Base Toolkit 
+| Tools                             | Visual Studio Code 1.73.1, VSCode Microsoft C/C++ extns, a .png capable image viewer
+
+## Build and Run using Visual Code Studio
+
+### Linux*
+
+Within a terminal window change directory to this project's folder. At the 
+terminal prompt type:
+
+```
+cd ImageGuassianBlur
+code .
+```
+
+Visual Studio Code will open this project displaying its files in the Explorer 
+pane. 
+The project is already set up with build configurations to build either a
+debug build or a release build of the program. When a program is built, it is
+placed in the bin directory of this project's top folder. 
+
+To build the program hit Ctrl+Shift+b and choose the type of program to build.
+The debug executable will have a '_d' appended to its name.
+
+To blur an image, copy the images/sample_image.jpg to the bin directory.
+To execute the program, type in the Visual Studio Code terminal window:
+```
+cd bin
+./gaussian_blur_d sample_image.jpg
+```
+A new image file will appear in the bin directory 'sample_image-blurred.png'.
+To view the image, select it in the directory folder app and hit return.
+Ubuntu will display the image using the preview app.
+
+## Build and Run using CMake
+### Linux*
+```
+mkdir build
+cd build
+cmake ..
+make
+```
+
+To blur an image, copy the images/sample_image.jpg to the directory of the new
+executable. Type in the terminal window:
+
+```
+cd build/src
+./gaussian_blur sample_image.jpg
+```
+Open the resulting file: `sample_image-blurred.png` with an image viewer.
+
+## Debug the program using Visual Studio Code
+
+### Linux*
+
+Due to an issue with the image load library function stbi_load, make the 
+directory bin (if it does not exist already) and copy the sample_image.jpg
+file into it. This will allow the program to find the file and continue the
+debug session.
+
+To debug the program, either choose from the IDE's run menu 
+'Start debugging' or hit F5 on the keyboard.
+The debug launch.json configuration file defines the debug session to:
+* To halt the program at the first line of code after main().
+Use the GUI debug panel's buttons to step over code (key F10) lines to see the 
+program advance. 
+Breakpoints can be set either in the main code or the kernel code.
+
+Note: Setting breakpoints in the kernel code does not present the normal 
+      step through code behavior. Instead a breakpoint event is occurring
+      on each thread being executed and so switches to the context of 
+      that thread. To step through the code of a single thread, use the 
+      Intel gdb-oneapi command 'set scheduler-locking step' or 'on' in the 
+      IDE's debug console prompt. As this is not the main thread, be sure
+      to revert this setting on returning to debug any host side code. 
+      Use the command 'set scheduler-locking replay' or 'off'.  
+
+## License
+
+Code samples are licensed under the Apache 2.0 license. See
+[LICENSE.txt](LICENSE.txt) for details.
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/CMakeLists.txt b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/CMakeLists.txt
new file mode 100644
index 0000000000..53d0c0958a
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fsycl")
+set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+add_executable (gaussian_blur gaussian_blur.cpp)
+target_link_libraries(gaussian_blur OpenCL sycl)
+
diff --git a/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/gaussian_blur.cpp b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/gaussian_blur.cpp
new file mode 100644
index 0000000000..8fd35018dd
--- /dev/null
+++ b/DirectProgramming/DPC++/DenseLinearAlgebra/GaussianImageFilter/src/gaussian_blur.cpp
@@ -0,0 +1,325 @@
+//============================================================================
+// Copyright © 2022 Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// ===========================================================================
+
+//****************************************************************************
+// 
+// Description:
+// This advanced SYCL code example implements a Gaussian blur filter, blurring
+// a JPG or PNG image from the command line. The original file is not modified.
+// The output file is a PNG image. 
+//
+// Usage:
+// The program blurs an image provided on the command line.
+//
+//*****************************************************************************
+
+// SYCL or oneAPI toolkit headers:
+#include 
+
+// Third party headers:
+#include 
+#include 
+// These public domain headers implement useful image reading and writing
+// functions. Find in ${oneAPI}/dev-utilities/include
+#define STB_IMAGE_IMPLEMENTATION
+#include "stb/stb_image.h"
+#define STB_IMAGE_WRITE_IMPLEMENTATION
+#include "stb/stb_image_write.h"
+
+// Forward declaration of this example's SYCL kernels
+class KernelFillGaussian;
+class KernelGaussian;
+
+using namespace sycl;
+using namespace std;
+
+// Attempts to determine a good local size. The best way to *control* 
+// performance is to choose the sizes. The method here is to choose the 
+// largest number, leq 64, which is a power-of-two, and divides the global
+// work size evenly. In this code, it might prove most optimal to pad the
+// image along one dimension so that the local size could be 64, but this 
+// introduces other complexities.
+range< 2 > GetOptimalLocalRange( range< 2 > globalSize, device hw ) 
+{
+  range< 2 > optimalLocalSize{ 0, 0 };
+
+  // 64 is a good local size on GPU-like devices, as each compute unit is
+  // made of many smaller processors. On non-GPU devices, 4 is a common vector
+  // width. 
+  if( hw.is_gpu() ) 
+  {
+    optimalLocalSize = range< 2 >( 64, 1 );
+  } 
+  else 
+  {
+    optimalLocalSize = range< 2 >( 4, 1 );  
+  }
+
+  // Here, for each dimension, we make sure that it divides the global size
+  // evenly. If it doesn't, we try the next lowest power of two. Eventually
+  // it will reach one, if the global size has no power of two component.
+  for( int i = 0; i < 2; ++i ) 
+  {
+    while( globalSize[ i ] % optimalLocalSize[ i ] != 0 ) 
+    {
+      optimalLocalSize[ i ] = optimalLocalSize[ i ] >> 1;
+    }
+  }
+
+  return optimalLocalSize;
+}
+
+// Asynchronous errors hander, catch faults in asynchronously executed code
+// inside a command group or a kernel. They can occur in a different stackframe, 
+// asynchronous error cannot be propagated up the stack. 
+// By default, they are considered 'lost'. The way in which we can retrieve them
+// is by providing an error handler function.
+auto exception_handler = []( sycl::exception_list exceptions ) 
+{
+    for( std::exception_ptr const &e : exceptions ) 
+    {
+        try 
+        {
+          std::rethrow_exception( e );
+        } 
+        catch( sycl::exception const &e ) 
+        {
+          std::cout << 
+          "Queue handler caught asynchronous SYCL exception:\n" << 
+          e.what() << std::endl;
+        }
+    }
+};
+
+// The Gaussian program
+int main( int argc, char* argv[] ) 
+{
+  bool bProgramError = false;
+
+  // Validate user input
+  if( argc < 2 ) 
+  {
+    std::cout
+        << "Please provide a JPEG or PNG image as an argument to this program."
+        << std::endl;
+  }
+
+  // ********************
+  // Input image handling
+  // ********************
+  // The image dimensions will be set by the library, as will the number of
+  // channels. However, passing a number of channels will force the image
+  // data to be returned in that format, regardless of what the original image
+  // looked like. The header has a mapping from int values to types - 4 means
+  // RGBA. 
+  int inputWidth = 0;
+  int inputHeight = 0;
+  int inputChannels = 0;
+  
+  // Number of color channels RGBA// Project files:
+  const int numChannels = 4;
+  const char *pImageFileName = argv[ 1 ];
+  unique_ptr< unsigned char [] > pInputImg( stbi_load( pImageFileName, 
+                     &inputWidth, &inputHeight, &inputChannels, numChannels ) );
+  if( pInputImg == nullptr ) 
+  {
+    bProgramError = true;
+    std::cout << "Failed to load image file (is argv[1] a valid image file?)"
+              << std::endl;
+    exit(-1);
+  }
+
+  // RAII resource
+  unique_ptr< unsigned char [] > pOutputImg( 
+    new unsigned char[ inputWidth * inputHeight * numChannels ] );
+         
+  try
+  {
+    sycl::device hw = device( sycl::cpu_selector_v );
+    queue myQueue( hw, exception_handler );
+    
+    // *******************************************
+    // Create gaussian convolution matrix and fill
+    // *******************************************
+    const float pi = std::atan( 1 ) * 4;
+    constexpr auto guasStdDev = 2;     
+    constexpr auto guasDelta = 6; 
+    const int guasMatrixRange = (guasDelta * guasStdDev);
+    const float guasStdDevFactor = 2 * guasStdDev * guasStdDev;
+    const float piFactor = guasStdDevFactor * pi;
+    const int gaussianBlurRange = guasMatrixRange * guasMatrixRange;
+    vector< float > gaussianBlurMatrix( gaussianBlurRange );
+
+    // The nd_range contains the total work (as mentioned previously) as
+    // well as the local work size (i.e. the number of threads in the local
+    // group). Here, we attempt to find a range close to the device's
+    // preferred size that also divides the global size neatly.
+    auto optRange = GetOptimalLocalRange( 
+      range< 2 >{ guasMatrixRange, guasMatrixRange }, myQueue.get_device() );
+    const nd_range< 2 > gaussianBlurNDRange( 
+      range< 2 >{ guasMatrixRange, guasMatrixRange }, optRange );
+    buffer bufGaussian( gaussianBlurMatrix );
+
+    // Enqueue KernelFillGaussian
+    myQueue.submit( [&]( handler &cgh ) 
+    {
+      const auto ptrGBlur = 
+        bufGaussian.get_access< access::mode::discard_write >( cgh );
+      cgh.parallel_for< KernelFillGaussian >( gaussianBlurNDRange, 
+      [=]( nd_item< 2 > item ) 
+      {
+        // Get the 2D x and y indicies
+        const auto idX = item.get_global_id( 0 );
+        const auto idY = item.get_global_id( 1 );
+        const auto width = item.get_group_range( 0 ) * 
+                           item.get_local_range( 0 );
+        const auto index = idX * width + idY;
+        const auto x = idX - guasDelta;
+        const auto y = idY - guasDelta;
+        float gausVallue = sycl::exp( -1.0f * (x*x + y*y) / guasStdDevFactor );
+        gausVallue /= piFactor;
+        ptrGBlur[ index ] = gausVallue;
+      });
+    });
+
+    // ********************************************************
+    // Using gaussian convolution matrix, blur the input image.
+    // ********************************************************
+    
+    // Images need a void * pointing to the data, and enums describing the
+    // type of the image (since a void * carries no type information). It
+    // also needs a range which describes the image's dimensions.
+    using co = sycl::image_channel_order;
+    using ct = sycl::image_channel_type;
+    // The image data has been returned us an unsigned char [], but due to 
+    // OpenCL restrictions, we must use it as a void *.
+    void *pInputData = (void *) pInputImg.get();
+    void *pOutputData = (void *) pOutputImg.get();
+    // This range represents the full amount of work to be done across the
+    // image. We dispatch one thread per pixel.
+    range< 2 > imgRange( inputWidth, inputHeight );
+    image< 2 > imageIn( pInputData, co::rgba, ct::unorm_int8, imgRange );
+    image< 2 > imageOut( pOutputData, co::rgba, ct::unorm_int8, imgRange );
+    optRange = GetOptimalLocalRange( imgRange, myQueue.get_device() );
+    auto myRange = nd_range< 2 >( imgRange, optRange );
+    constexpr auto offset = guasDelta; 
+
+    // Enqueue KernelGaussian
+    // Because of the dependency on the gaussian convolution grid, the call
+    // graph will automatically schedule this kernel to run after the
+    // KernelFillGaussian is complete.
+    myQueue.submit( [&]( handler &cgh ) 
+    {
+      // Images still require accessors, like buffers, except the target is
+      // always access::target::image.
+      accessor< float4, 2, access::mode::read, access::target::image > 
+                accImgInPtr( imageIn, cgh );
+      accessor< float4, 2, access::mode::discard_write, access::target::image > 
+                accImgOutPtr( imageOut, cgh );
+      const auto ptrGBlur = 
+                bufGaussian.get_access< access::mode::read >( cgh );
+      
+      // The sampler is used to map user-provided co-ordinates to pixels in
+      // the image.
+      sampler smpl( coordinate_normalization_mode::unnormalized,
+                    addressing_mode::none, filtering_mode::nearest );
+
+      // Setting breakpoints in the kernel code does not present the normal 
+      // step through code behavior. Instead a breakpoint event is occurring
+      // on each thread being executed and so switches to the context of 
+      // that thread. To step through the code of a single thread, use the 
+      // Intel gdb-oneapi command 'set scheduler-locking step' or 'on' in the 
+      // IDE's debug console prompt. As this is not the main thread, be sure
+      // to revert this setting on returning to debug any host side code. 
+      // Use the command 'set scheduler-locking replay' or 'off'.  
+      cgh.parallel_for< KernelGaussian >( myRange, [=](nd_item< 2 > item) 
+      {
+         const auto idY = item.get_global_id( 1 );
+         const auto idX = item.get_global_id( 0 );
+         const auto outputCoords = int2( idX, idY );
+         // A boundary is used so the convolution grid does not fall off the
+         // sides of the image. Keep it simple, just copy those pixels at the
+         // edges of the image.
+         const int hitY1 = idY - offset;
+         const int hitY2 = inputHeight - idY - offset;
+         const int hitX1 = idX - offset;
+         const int hitX2 = inputWidth - idX - offset;
+         const bool bBoundryY = (hitY1 < 0) || (hitY2 < 0);
+         const bool bBoundryX = (hitX1 < 0) || (hitX2 < 0);
+         float4 newPixel = float4( 0.0f, 0.0f, 0.0f, 0.0f );
+                             
+         if( !(bBoundryX || bBoundryY) )
+         {
+          // Perform a convolution on a central pixel at idX idY
+          for( int x = 0; x < guasMatrixRange; x++ ) 
+          {
+            for( int y = 0; y < guasMatrixRange; y++ ) 
+            {
+              const auto index = x * guasMatrixRange + y;
+              const float value = ptrGBlur[ index ];
+              const auto inputCoords = 
+                int2( idX + x - offset, idY + y - offset );
+              newPixel += accImgInPtr.read( inputCoords, smpl ) * value;
+            }
+          }
+         }
+         else
+         {
+            // Just duplicate the pixel at idX idY
+            const auto inputCoords = int2( idX, idY );
+            newPixel = accImgInPtr.read( inputCoords, smpl );
+         }
+         newPixel.w() = 1.0f;
+         accImgOutPtr.write( outputCoords, newPixel );
+
+      });
+    });
+    // The host/main thread is asked to wait here until all enqueued kernels 
+    // have completed execution.
+    myQueue.wait_and_throw();
+  }
+  // Synchronous errors are classical C++ exceptions
+  catch( sycl::exception const &e )
+  {
+
+    bProgramError = true;
+    cout << 
+    "Wrap catch caught synchronous SYCL exception:\n" << e.what() << std::endl;
+  }
+
+  if( bProgramError )
+  {
+    std::cout << "Program failed." << std::endl;
+    return -1;
+  }
+
+  // ****************************
+  // Output the new blurred image
+  // ****************************
+  
+  // Attempt to change the name from x.png or x.jpg to x-blurred.png. 
+  // If the code cannot find a '.', it simply appends "-blurred" to the name.
+  std::string outputFilePath;
+  std::string inputName( argv[ 1 ] );
+  auto pos = inputName.find_last_of( "." );
+  if( pos == std::string::npos ) 
+  {
+    outputFilePath = inputName + "-blurred";
+  } 
+  else 
+  {
+    inputName.erase( pos, inputName.size() );
+    outputFilePath = inputName + "-blurred" + ".png";
+  }
+
+  stbi_write_png( outputFilePath.c_str(), inputWidth, inputHeight, numChannels,
+                  pOutputImg.get(), 0 );
+
+  std::cout << 
+    "Program success, the image is successfully blurred!" << std::endl;
+  
+  return 0;
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/c_cpp_properties.json b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000000..2b8e56ba4b
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/c_cpp_properties.json
@@ -0,0 +1,17 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "compilerPath": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+            "compilerArgs": [ "-fsycl" ],
+            "cStandard": "gnu17",
+            "cppStandard": "gnu++17",
+            "intelliSenseMode": "linux-gcc-x64"
+        }
+    ],
+    "version": 4
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/launch.json b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/launch.json
new file mode 100644
index 0000000000..6d3bc25b7d
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/launch.json
@@ -0,0 +1,48 @@
+{
+    "configurations": [
+    {
+        "name": "C/C++: Intel icpx build and debug MonteCarloPi",
+        "type": "cppdbg",
+        "request": "launch",
+        "program": "${workspaceFolder}/bin/${config:programName}_d",
+        "args": ["${input:args}"],
+        "stopAtEntry": true,
+        "cwd": "${fileDirname}",
+        "environment": [],
+        "externalConsole": false,
+        "MIMode": "gdb",
+        "setupCommands": [
+            {
+                "description": "Enable pretty-printing for gdb",
+                "text": "-enable-pretty-printing",
+                "ignoreFailures": true
+            },
+            {
+                "description":  "Set Disassembly Flavor to Intel",
+                "text": "-gdb-set disassembly-flavor intel",
+                "ignoreFailures": true
+            },
+            {
+                "description": "Needed by Intel oneAPI: Disable target async",
+                "text": "set target-async off",
+                "ignoreFailures": true
+            }
+        ],
+        "preLaunchTask": "MonteCarloPi Debug C/C++: Intel icpx build active file",
+        "miDebuggerPath": "/opt/intel/oneapi/debugger/latest/gdb/intel64/bin/gdb-oneapi"
+    }
+    ],
+    "inputs" : [
+        {
+            "id": "args",
+            "type": "pickString",
+            "description": "Program args",
+            "default": "cpu",
+            "options": [
+                "cpu",
+                "gpu",
+                "accelerator"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/settings.json b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/settings.json
new file mode 100644
index 0000000000..a2e868b496
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "programName": "MonteCarloPi"
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/tasks.json b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/tasks.json
new file mode 100644
index 0000000000..4383e21898
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/.vscode/tasks.json
@@ -0,0 +1,48 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "cppbuild",
+			"label": "MonteCarloPi Debug C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-fdiagnostics-color=always",
+				"-fno-limit-debug-info",
+				"-g",
+				"-O0",
+				"${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}_d"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/icpx"
+		},
+		{
+			"type": "cppbuild",
+			"label": "MonteCarloPi Release C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-DNDEBUG",
+                "${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/icpx"
+		}
+	]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/CMakeLists.txt b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/CMakeLists.txt
new file mode 100644
index 0000000000..482e1862b1
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/CMakeLists.txt
@@ -0,0 +1,16 @@
+if(WIN32)
+        set(CMAKE_CXX_COMPILER "dpcpp-cl")
+        set(CMAKE_C_COMPILER "dpcpp-cl")
+else()
+        set(CMAKE_CXX_COMPILER "icpx")
+endif()
+set(CMAKE_CXX_STANDARD 17)
+if(NOT DEFINED ${CMAKE_BUILD_TYPE})
+	set(CMAKE_BUILD_TYPE "RELEASE")
+endif()
+if( CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
+endif()
+cmake_minimum_required (VERSION 3.4)
+project (MonteCarloPi)
+add_subdirectory (src)
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/LICENSE.txt b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/LICENSE.txt
new file mode 100644
index 0000000000..d645695673
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/README.md b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/README.md
new file mode 100644
index 0000000000..9b5ac8f04b
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/README.md
@@ -0,0 +1,85 @@
+# Monte Carlo Pi example program
+
+## Purpose
+Monte Carlo Simulation is a broad category of computation that utilizes
+statistical analysis to reach a result. This  `Monte Carlo Pi` sample uses the
+Monte Carlo Procedure to estimate the value of pi.
+
+## Prerequisites
+
+| Minimum Requirements              | Description
+|:---                               |:---
+| OS                                | Linux* Ubuntu* 20.04.5 LTS
+| Hardware                          | Intel® 11th Gen Intel Core i7-1185G7 + Mesa Intel Xe Graphics
+| Compiler Toolchain                | Visual Studio Code IDE, Intel oneAPI Base Toolkit (inc its prerequisite)
+| Libraries                         | Install Intel oneAPI Base Toolkit 
+| Tools                             | Visual Studio Code 1.73.1, VSCode Microsoft C/C++ extns
+
+## Build and Run using Visual Code Studio
+
+### Linux*
+
+Within a terminal window change directory to this project's folder. At the 
+terminal prompt type:
+
+```
+cd MonteCarloPi_v2
+code .
+```
+
+Visual Studio Code will open this project displaying its files in the Explorer 
+pane. 
+The project is already set up with build configurations to build either a
+debug build or a release build of the program. When a program is built, it is
+placed in the bin directory of this project's top folder. 
+
+To build the program hit Ctrl+Shift+b and choose the type of program to build.
+The debug executable will have a '_d' appended to its name.
+
+To execute the program, type in the Visual Studio Code terminal window:
+```
+cd bin
+./MonteCarloPi_d cpu
+```
+
+## Build and Run using CMake
+### Linux*
+```
+mkdir build
+cd build
+cmake ..
+make
+```
+
+To execute the program type in the terminal window:
+
+```
+cd build/src
+./MonteCarloPi cpu
+```
+
+## Debug the program using Visual Studio Code
+
+### Linux*
+
+To debug the program, either choose from the IDE's run menu 
+'Start debugging' or hit F5 on the keyboard.
+The debug launch.json configuration file defines the debug session to:
+* To halt the program at the first line of code after main().
+Use the GUI debug panel's buttons to step over code (key F10) lines to see the 
+program advance. 
+Breakpoints can be set either in the main code or the kernel code.
+
+Note: Setting breakpoints in the kernel code does not present the normal 
+      step through code behavior. Instead a breakpoint event is occurring
+      on each thread being executed and so switches to the context of 
+      that thread. To step through the code of a single thread, use the 
+      Intel gdb-oneapi command 'set scheduler-locking step' or 'on' in the 
+      IDE's debug console prompt. As this is not the main thread, be sure
+      to revert this setting on returning to debug any host side code. 
+      Use the command 'set scheduler-locking replay' or 'off'.  
+
+## License
+
+Code samples are licensed under the Apache 2.0 license. See
+[LICENSE.txt](LICENSE.txt) for details.
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/CMakeLists.txt b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/CMakeLists.txt
new file mode 100644
index 0000000000..a40a39873d
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fsycl")
+set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+add_executable (MonteCarloPi MonteCarloPi.cpp)
+target_link_libraries(MonteCarloPi OpenCL sycl)
+
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/MonteCarloPi.cpp b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/MonteCarloPi.cpp
new file mode 100644
index 0000000000..e8553405d3
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/MonteCarloPi.cpp
@@ -0,0 +1,338 @@
+//============================================================================
+// Copyright © 2022 Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// ===========================================================================
+
+//****************************************************************************
+// 
+// Description:
+// Example of Monte-Carlo Pi approximation algorithm in SYCL. Also,
+// demonstrating how to query the maximum number of work-items in a
+// work-group to check if a kernel can be executed with the initially
+// desired work-group size.
+//
+// Usage:
+// The program takes one argument: host / cpu / gpu / accelerator.
+//
+//*****************************************************************************
+
+// SYCL or oneAPI toolkit headers:
+#include 
+
+// Third party headers:
+#include 
+#include 
+#include 
+#include 
+#include 
+
+// In-house headers:
+#include "device_selector.hpp"
+
+using namespace std;
+using namespace sycl;
+
+// Forward declerations:
+size_t GetBestWorkGroupSize( size_t work_group_size, 
+                             const sycl::device &device,
+                             const sycl::kernel &kernel );
+
+// Monte-Carlo Pi SYCL C++ functor
+class CMonteCarloPiKernel 
+{
+  template< typename dataT >
+  using readGlobalAccessor = sycl::accessor< 
+                                dataT, 1, 
+                                sycl::access::mode::read,
+                                sycl::access::target::global_buffer >;
+  template < typename dataT >
+  using writeGlobalAccessor = sycl::accessor< 
+                                dataT, 1, 
+                                sycl::access::mode::write,
+                                sycl::access::target::global_buffer >;
+  template< typename dataT >
+  using readWriteLocalAccessor = sycl::accessor<
+                                    dataT, 1, 
+                                    sycl::access::mode::read_write,
+                                    sycl::access::target::local >;
+ public:
+  CMonteCarloPiKernel( readGlobalAccessor< sycl::cl_float2 > ptrPoints,
+                       writeGlobalAccessor< sycl::cl_int > ptrResults,
+                       readWriteLocalAccessor< sycl::cl_int > ptrResultsLocal )
+  : m_ptrPoints( ptrPoints ),
+    m_ptrResults( ptrResults ),
+    m_ptrResultsLocal( ptrResultsLocal )
+  {}
+
+  // Functor kernel using a 1D ND-range of work items
+  void operator()( sycl::nd_item< 1 > item ) const 
+  {
+    // Setting breakpoints in the kernel code does not present the normal 
+    // step through code behavior. Instead a breakpoint event is occurring
+    // on each thread being executed and so switches to the context of 
+    // that thread. To step through the code of a single thread, use the 
+    // gdb-oneapi command 'set scheduler-locking step' or 'on' in the 
+    // IDE's debug console prompt. As this is not the main thread, be sure
+    // to revert this setting on returning to debug any host side code. 
+    // Use the command 'set scheduler-locking replay' or 'off'.
+
+    const size_t idGlobal = item.get_global_id( 0 );
+    const size_t idLocal = item.get_local_id( 0 );
+    const size_t localDim = item.get_local_range( 0 );
+    const size_t idGroup = item.get_group( 0 );
+
+    // Get the point to work on
+    const sycl::float2 point = m_ptrPoints[ idGlobal ];
+
+    // Calculate the length - built-in SYCL function
+    // length: sqrt(point.x * point.x + point.y * point.y)
+    const float len = sycl::length( point );
+
+    // Result is either 1 or 0
+    m_ptrResultsLocal[ idLocal ] = (len <= 1.0f) ? 1 : 0;
+
+    // Wait for the entire work group to get here.
+    item.barrier( sycl::access::fence_space::local_space );
+
+    // If work item 0 in work group, sum local values
+    if( idLocal == 0 ) 
+    {
+      int sum = 0;
+      for( size_t i = 0; i < localDim; i++ ) 
+      {
+        if( m_ptrResultsLocal[ i ] == 1 ) 
+        {
+          ++sum;
+        }
+      }
+
+      // Store the sum in global memory
+      m_ptrResults[ idGroup ] = sum;
+    }
+  }
+
+ private:
+  readGlobalAccessor< sycl::cl_float2 >   m_ptrPoints;
+  writeGlobalAccessor< sycl::cl_int >     m_ptrResults;
+  readWriteLocalAccessor< sycl::cl_int >  m_ptrResultsLocal;
+};
+
+
+// Asynchronous errors hander, catch faults in asynchronously executed code
+// inside a command group or a kernel. They can occur in a different stackframe, 
+// asynchronous error cannot be propagated up the stack. 
+// By default, they are considered 'lost'. The way in which we can retrieve them
+// is by providing an error handler function.
+auto exception_handler = []( sycl::exception_list exceptions ) 
+{
+    for( std::exception_ptr const &e : exceptions ) 
+    {
+        try 
+        {
+          std::rethrow_exception( e );
+        } 
+        catch( sycl::exception const &e ) 
+        {
+          std::cout << "Queue handler caught asynchronous SYCL exception:\n" 
+          << e.what() << std::endl;
+        }
+    }
+};
+
+// The Monto Carlo Pi program
+int main( int argc, char *argv[] ) 
+{
+  CUtilDeviceTargets utilsDev;
+  FnResult fnResult = utilsDev.DiscoverDevsWeWant();
+  if( !fnResult.bSuccess )
+  {
+    cerr << "Program failure: Unable to discover target devices on this platform.\n";
+    exit( -1 );
+  }
+
+  fnResult = UserCheckTheirInput( utilsDev, argc, argv ); 
+  if( !fnResult.bSuccess ) 
+  {
+    cerr << fnResult.strErrMsg << "\n";
+    exit( 1 );
+  }
+
+  bool bDoDevDiscovery = false;
+  fnResult = UserWantsToDiscoverPossibleTargets( argv, bDoDevDiscovery );
+  if( !fnResult.bSuccess )
+  {
+    cerr << fnResult.strErrMsg << "\n";
+    exit( -1 );
+  }
+  if( bDoDevDiscovery ) exit( 1 );
+
+  const SDeviceFoundProxy *pUsersChosenDevice = utilsDev.GetDevUsersFirstChoice();
+  if( pUsersChosenDevice == nullptr )
+  {
+    cerr << "Program failure: Did not create a valid target device object.\n";
+    exit( -1 );
+  }
+
+  constexpr size_t iterations = 1 << 20;
+  size_t workGroupSize = 1 << 10;
+
+  // Container for the sum calculated per each work-group.
+  std::vector< sycl::cl_int > arrayResults;
+
+  // Generate random points on the host - one point for each work item (thread)
+  std::vector< sycl::float2 > arrayPoints( iterations );
+  // Fill up with (pseudo) random values in the range: [0, 1]
+  std::random_device r;
+  std::default_random_engine e( r() );
+  std::uniform_real_distribution< float > dist;
+  std::generate( arrayPoints.begin(), arrayPoints.end(),
+                [&r, &e, &dist]() 
+                { 
+                  return sycl::float2( dist( e ), dist( e ) ); 
+                });
+
+  try 
+  {
+    // Create a SYCL queue
+    queue queue( pUsersChosenDevice->theDevice, exception_handler );
+
+    string strTheDeviceBeingUsed;
+    fnResult = CUtilDeviceTargets::GetQueuesCurrentDevice( queue, strTheDeviceBeingUsed );
+    if( !fnResult.bSuccess )
+    {
+      cerr << fnResult.strErrMsg << "\n";
+      exit( -1 );
+    }
+    cout << strTheDeviceBeingUsed << "\n";
+
+    // Get device and display information: name and platform
+    const sycl::device hw = queue.get_device();
+    cout << "Selected " << hw.get_info< sycl::info::device::name >()
+         << " on platform "
+         << hw.get_info< sycl::info::device::platform >()
+              .get_info< sycl::info::platform::name >()
+         << std::endl;
+
+    // Force online compilation of all kernels in the hwCntext now,
+    // unless already compiled for the device ahead-of-time.
+    const auto hwContext = queue.get_context();
+    const sycl::kernel_id kernelID = 
+        sycl::get_kernel_id< CMonteCarloPiKernel >();
+    const auto hwKernelBundle = 
+        sycl::get_kernel_bundle< sycl::bundle_state::executable >( hwContext );
+    const sycl::kernel kernel = hwKernelBundle.get_kernel( kernelID );
+    
+    // If the desired work-group size doesn't satisfy the device, define a
+    // perfect/max work-group depending on the selected device and kernel
+    // maximum size allowance.
+    workGroupSize = GetBestWorkGroupSize( workGroupSize, hw, kernel );
+
+    // Size of the total sums that are going to be stored in the results vector
+    // is set based on the defined work-group size.
+    arrayResults.resize( iterations / workGroupSize );
+
+    // Allocate device memory
+    sycl::buffer< sycl::cl_float2 > buffPoints( arrayPoints.data(),
+                                                sycl::range<1>( iterations ) );
+    sycl::buffer< sycl::cl_int > buffResults( arrayResults.data(), 
+                              sycl::range< 1 >( iterations / workGroupSize ) );
+
+    queue.submit( [&](sycl::handler& cgh) 
+    {
+      const size_t global_size = iterations;
+      const size_t local_size = workGroupSize;
+
+      // Get access to the data (points and results) on the device
+      const auto ptrPoints =
+          buffPoints.get_access( cgh );
+      const auto ptrResults = 
+          buffResults.get_access< sycl::access::mode::write >( cgh );
+      
+      // Allocate local memory on the device (to compute results)
+      const sycl::accessor< sycl::cl_int, 1, sycl::access::mode::read_write,
+                     sycl::access::target::local >
+          ptrResultsLocal( sycl::range< 1 >( local_size ), cgh );
+
+      // Run the kernel
+      cgh.parallel_for(
+          sycl::nd_range< 1 >( sycl::range< 1 >( global_size ),
+                               sycl::range< 1 >( local_size ) ),
+          CMonteCarloPiKernel( ptrPoints, ptrResults, ptrResultsLocal ) );
+    });
+  } 
+  catch( const sycl::exception &e ) 
+  {
+    std::cerr << "SYCL exception caught: " << e.what() << std::endl;
+    return 1;
+  } 
+  catch( const std::exception &e ) 
+  {
+    std::cerr << "C++ exception caught: " << e.what() << std::endl;
+    return 2;
+  }
+
+  // Sum the results (auto copied back to host)
+  int inCircle = 0;
+  for( int &result : arrayResults ) 
+  {
+    inCircle += result;
+  }
+
+  // Calculate the final result of "pi"
+  float pi = (4.0f * inCircle) / iterations;
+  std::cout << "pi = " << pi << std::endl;
+
+  return 0;
+}
+
+
+// A helper to define a "perfect" work-group size dependant on selected device
+// and kernel maximum allowance.
+size_t GetBestWorkGroupSize( const size_t workGroupSize,
+                             const sycl::device &device,
+                             const sycl::kernel &kernel ) 
+{
+  if( device.is_cpu() ) 
+  {
+    const size_t maxDeviceWorkGroupSize =
+        device.get_info< sycl::info::device::max_work_group_size >();
+
+    // Check if the desired work-group size will be allowed on the host device
+    // and query the maximum possible size on that device in case the desired
+    // one is more than the allowed.
+    if( workGroupSize > maxDeviceWorkGroupSize ) 
+    {
+      cout << "Maximum work-group size for device "
+           << device.get_info< sycl::info::device::name >() << ": "
+           << maxDeviceWorkGroupSize << std::endl;
+      
+      return maxDeviceWorkGroupSize;
+    }
+
+    return workGroupSize;
+  } 
+  else 
+  {
+    const size_t maxKernelWorkGroupSize = kernel.get_info<
+       sycl::info::kernel_device_specific::work_group_size >( device );
+
+    // Verify if the kernel can be executed with our desired work-group size,
+    // and if it can't use the maximum allowed kernel work-group size for the
+    // selected device.
+    if( workGroupSize > maxKernelWorkGroupSize ) 
+    {
+      cout << "Maximum work-group size for "
+           << typeid( CMonteCarloPiKernel ).name() << " on device "
+           << device.get_info() << ": "
+           << maxKernelWorkGroupSize << "\n";
+      
+      return maxKernelWorkGroupSize;
+    }
+    
+    // Otherwise, the work-size will stay the originally desired one
+    return workGroupSize;
+  }
+}
+
diff --git a/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/device_selector.hpp b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/device_selector.hpp
new file mode 100644
index 0000000000..a515c547be
--- /dev/null
+++ b/DirectProgramming/DPC++/MapReduce/MonteCarloPi_v2/src/device_selector.hpp
@@ -0,0 +1,421 @@
+//==============================================================
+// Copyright (C) Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+
+#pragma once
+
+// SYCL or oneAPI toolkit headers:
+#include 
+
+// Third party headers:
+#include 
+
+using namespace std;
+using namespace sycl;
+
+//++
+//============================================================================
+// Details: Common code utility. User defined enumerate of the SYCL device 
+//          target typesdesired.
+//          An 'eHost' is admitted because SYCL 2020 depricates host device
+//          selection.
+//--
+enum EDevsWeWant
+{
+  eNotValid = 0,      // Default
+  eCPU = 1,
+  eGPU = 2,
+  eAccelerator = 3,   // i.e. a FPGA type device
+  eCount = 4          //  Always the last one
+};
+  
+//++
+//============================================================================
+// Details: Common code utility. User defined target device proxy.
+//          After the utility has discoverd available devices on the system,
+//          this structure holds/caches information about the device. 
+//          Forms a proxy device object representing an actual possible target
+//          device found on the system.
+//--
+struct SDeviceFoundProxy final
+{
+  EDevsWeWant   eDevice = eNotValid;    // The type of real device we want to
+                                        // use to run kernels on. 
+  string        strDeviceName = "";     // THe proxy label (ID) for a real 
+                                        // device.
+  bool          bAvailable = false;     // True = can be used, 
+                                        // False = not found on the system.
+  bool          bActiveTarget = false;  // True = use it, false = stop using.
+  int           nScore = 0;             // User defined score of the device.
+  sycl::device  theDevice;              // Copy of the real device found.
+}; 
+
+//++
+//============================================================================
+// Details: Common code utility. Rudimentry error reporting system. Used by
+//          utility class to explicity aid the user or the programmer of any
+//          issues that have occurred.
+//--
+struct FnResult final
+{
+  bool bSuccess = true;
+  string strErrMsg = "";
+};
+
+//++
+//============================================================================
+// Details: Common code utility. A basic utility class to wrap up functions
+//          that can discover, then target the acceleration devices found on 
+//          a system. 
+//
+// Docs:  https://www.intel.com/content/www/us/en/developer/articles/
+//        technical/device-discovery-with-sycl.html#gs.nhyd7s
+//        https://registry.khronos.org/SYCL/specs/sycl-2020/html/
+//        sycl-2020.html#sec:device-selection
+//
+// It can find all the available device targets on a system.
+// But it will only store a list of the first device found of the following
+// type and criteria: cpu, gpu and accelerator.
+// Each device in the list can be set to be an active target.
+// The last or latest call to function SetDevToActive() will change the 
+// device proxy object returned by GetDevUsersFirstChoice() to be that device.  
+// -- 
+class CUtilDeviceTargets final
+{
+  // Definitions:
+  public:
+    typedef std::vector< SDeviceFoundProxy > ListDevicesFound_t;
+
+  // Static method:
+  public:
+    static FnResult       DiscoverPlatformsDevicesAvailable( string &vrstrPlatformAndDevices );
+    static const string&  GetInputOptionDiscoverDevice();
+    static FnResult       GetQueuesCurrentDevice( const queue &vrQ, string &vrstr );
+ 
+  // Methods:
+  public:
+    CUtilDeviceTargets();
+    ~CUtilDeviceTargets();
+ 
+    FnResult                  DiscoverDevsWeWant();
+    const ListDevicesFound_t &GetListDevs() const;
+    const SDeviceFoundProxy  *GetDevUsersFirstChoice() const;
+    FnResult                  SetDevToActive( const string &rvDeviceName, const bool vbActive );
+  
+  // Attributes:
+  private:
+    ListDevicesFound_t  m_listDeviceTargets;  
+    static string       m_strDiscoverDeviceInputOption;
+    SDeviceFoundProxy  *m_pDeviceUserFirstChoice;       // NULL = a choice has not been made
+};
+
+// Instantiations:
+string CUtilDeviceTargets::m_strDiscoverDeviceInputOption = "discover_devices";
+
+//++
+// Details: CUtilDeviceTargets constructor.
+// Type:    Method.
+// Args:    None.
+// Return:  None.
+// Throws:  None.
+//--
+CUtilDeviceTargets::CUtilDeviceTargets()
+: m_pDeviceUserFirstChoice( nullptr ) 
+{}
+
+//++
+// Details: CUtilDeviceTargets destructor.
+// Type:    Method.
+// Args:    None.
+// Return:  None.
+// Throws:  None.
+//--
+CUtilDeviceTargets::~CUtilDeviceTargets()
+{
+  // Release
+  m_pDeviceUserFirstChoice = nullptr;
+}
+
+//++
+// Details:  Return a report on the specified SYCL queue stating its current
+//           real target device and the device platform.
+//
+// Type:    Method.
+// Args:    vrQ - (R) The queue to query.
+//          vrstrReport - (W) The report text. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::GetQueuesCurrentDevice( const queue &vrQ, string &vrstrReport )
+{
+  FnResult status;
+
+  vrstrReport = "[SYCL] Using device: [";
+  vrstrReport += vrQ.get_device().get_info< info::device::name >();
+  vrstrReport += "] from [";
+  vrstrReport += vrQ.get_device().get_platform().get_info< info::platform::name >();
+  vrstrReport += "]";
+
+  return status;
+}
+
+//++
+// Details: Returns the program's text label for the user input option
+//          to choose to discover all the available device on a system.
+// Type:    Method.
+// Args:    None.
+// Return:  string& - Text label.
+// Throws:  None.
+//--
+const string& CUtilDeviceTargets::GetInputOptionDiscoverDevice()
+{
+  return m_strDiscoverDeviceInputOption;
+}
+
+//++
+// Details: Returns the pointer to the proxy object in the list of 
+//          discovery device proxies.
+// Type:    Method.
+// Args:    None.
+// Return:  SDeviceFoundProxy* - pointer to object.
+// Throws:  None.
+//--
+const SDeviceFoundProxy * CUtilDeviceTargets::GetDevUsersFirstChoice() const
+{
+  return m_pDeviceUserFirstChoice;
+}
+
+//++
+// Details:  A pointer to the current user's choice of target is made on this
+//           function being successful. 
+//
+// If this function fails, the pointer retains the last or
+// remains NULL (a choice was never made).
+//
+// Type:    Method.
+// Args:    string &rvDeviceName - (R) Proxy's label or ID text.
+//          bool vbActive - (R) True = Use the device, False = disable use. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  None.
+//--
+FnResult CUtilDeviceTargets::SetDevToActive( const string &rvDeviceName, const bool vbActive )
+{
+  FnResult status;
+
+  bool bFoundDevice = false;
+  for( SDeviceFoundProxy &rDev : m_listDeviceTargets )
+  {
+      if( rDev.strDeviceName == rvDeviceName )
+      {
+        bFoundDevice = true;
+        rDev.bActiveTarget = vbActive;
+        m_pDeviceUserFirstChoice = &rDev;
+        break;
+      }
+  }
+  if( !bFoundDevice )
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "Device '" + rvDeviceName;
+    status.strErrMsg += "' not found in list of available device targets";
+  }
+
+  return status;
+}
+ 
+//++
+// Details:  Discovers all the SYCL target devices available and assigns them
+//           to a target device proxy object. A device proxy object holds
+//           the criteria for a real device. All proxies created are
+//           disabled until a real device is found to match it. A programmer
+//           has to still set the proxy device as active to target that 
+//           device it represent.
+//
+// Call this function before at the earliest opportunity and 
+// before other functions in this class as it makes a list
+// of the target devices we are aiming to use.
+//
+// A limitation of this function it will only assign the first real device that
+// matches the proxy criteria. Any subsequent same or similar devices are
+// ignored.
+//
+// Type:    Method.
+// Args:    None. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::DiscoverDevsWeWant()
+{ 
+  FnResult status;
+
+  SDeviceFoundProxy accelerator{ eAccelerator, "accelerator", false };
+  SDeviceFoundProxy cpu{ eCPU, "cpu", false };
+  SDeviceFoundProxy gpu{ eGPU, "gpu", false };
+ 
+  for( const auto platform : platform::get_platforms() )
+  {
+    for( const auto device : platform.get_devices() )
+    {
+      // Get first available device of each type
+      if( !accelerator.bAvailable && device.is_accelerator() )
+      {
+        accelerator.bAvailable = true;
+        accelerator.theDevice = device;
+      }
+      else if( !cpu.bAvailable && device.is_cpu() )
+      {
+        cpu.bAvailable = true;
+        cpu.theDevice = device;
+      }
+      else if( !gpu.bAvailable && device.is_gpu() )
+      {
+        gpu.bAvailable = true;
+        gpu.theDevice = device;
+      }
+    }
+  }
+
+  m_listDeviceTargets.push_back( accelerator );
+  m_listDeviceTargets.push_back( cpu );
+  m_listDeviceTargets.push_back( gpu );
+
+  return status;
+}
+
+//++
+// Details: Returns the list of proxy device objects the programmer has 
+//          defined and wants found on the system. Some proxy objects
+//          may be set to not available (and inactive) if not matching
+//          devices has been found on the system.
+// Type:    Method.
+// Args:    None. 
+// Return:  ListDevicesFound_t - List of proxy device objects.
+// Throws:  None.
+//--
+const CUtilDeviceTargets::ListDevicesFound_t & CUtilDeviceTargets::GetListDevs() const
+{
+  return m_listDeviceTargets;
+}
+
+//++
+// Details: Prints to std out all the SYCL device targets discovered on the
+//          wanted to be used.  
+// Type:    Method.
+// Args:    string& vrstrPlatformAndDevices - (W) A report of found devices. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::DiscoverPlatformsDevicesAvailable( string &vrstrPlatformAndDevices )
+{
+  FnResult status;
+
+  vrstrPlatformAndDevices = "";
+  bool bFoundPlatforms = false;
+  bool bFoundDevices = false;
+  for( const auto platform : platform::get_platforms() )
+  {
+    bFoundPlatforms = true;
+    vrstrPlatformAndDevices += "Platform: ";
+    vrstrPlatformAndDevices += platform.get_info< info::platform::name >();
+    vrstrPlatformAndDevices += "\n";
+
+    for( const auto device : platform.get_devices() )
+    {
+      bFoundDevices = true;
+      vrstrPlatformAndDevices += "\tDevice: ";
+      vrstrPlatformAndDevices += device.get_info< info::device::name >();
+      vrstrPlatformAndDevices += "\n";
+    }
+  }
+  if( !bFoundPlatforms && !bFoundDevices )
+  {
+    vrstrPlatformAndDevices = "No SYCL targeted platforms or devices found.";
+  }
+
+  return status;
+}
+
+//++
+// Details: Checks the user's input is valid. If not a help message if formed
+//          and returned. If valid, the matching proxy device object
+//          discovered earlier is made active for use by the program.  
+// Type:    Function.
+// Args:    vrDevList- (RW) Utililty object managing proxy device objects.
+//          argc - (R) Program's input arguments count. 
+//          argv - (R) Program's list of input arguments.
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  None.
+//--
+FnResult UserCheckTheirInput( CUtilDeviceTargets &vrDevList, int argc, char* argv[] ) 
+{
+  FnResult status;
+
+  const CUtilDeviceTargets::ListDevicesFound_t &rDevs = vrDevList.GetListDevs();
+  string strListDevsOptionsToUser;
+  for( const SDeviceFoundProxy d : rDevs )
+  {
+    strListDevsOptionsToUser += d.strDeviceName + "|";
+  }
+  strListDevsOptionsToUser += CUtilDeviceTargets::GetInputOptionDiscoverDevice();
+
+  if( argc < 2 ) 
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "Usage: " + string( argv[ 0 ] ) + " <";
+    status.strErrMsg += strListDevsOptionsToUser;
+    status.strErrMsg += ">";
+    return status;
+  }
+
+  bool bTargetDevMatch = false;
+  const string strArg{ argv[ 1 ] };
+  for( const SDeviceFoundProxy d : rDevs )
+  {
+    if( strArg == d.strDeviceName )
+    {
+      bTargetDevMatch = true;
+      status = vrDevList.SetDevToActive( strArg, true );
+      break;
+    }
+  }
+  if( status.bSuccess && !bTargetDevMatch && 
+     (strArg != CUtilDeviceTargets::GetInputOptionDiscoverDevice() ) )
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "The device type cannot be found. Please enter a device type name from the list: ";
+    status.strErrMsg += strListDevsOptionsToUser;
+  }
+
+  return status;
+}
+
+//++
+// Details: Checks the user's input is the option to 'discover device target'
+//          on the system.  
+// Type:    Function.
+// Args:    argv - (R) Program's list of input arguments.
+//          bool rbDoDiscovery - (W) True = yes, the discovery option choosen.
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult UserWantsToDiscoverPossibleTargets( char* argv[], bool &rbDoDiscovery )
+{
+  FnResult status;
+
+  rbDoDiscovery = false;
+  const string strArg{ argv[ 1 ] };
+  if( strArg == CUtilDeviceTargets::GetInputOptionDiscoverDevice() )
+  {
+    string strPlatformAndDevicesReport;
+    status = CUtilDeviceTargets::DiscoverPlatformsDevicesAvailable( strPlatformAndDevicesReport );
+    if( status.bSuccess )
+    {
+      rbDoDiscovery = true;
+      cout << strPlatformAndDevicesReport << std::endl;
+    }
+  }
+
+  return status;
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/c_cpp_properties.json b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/c_cpp_properties.json
new file mode 100644
index 0000000000..2b8e56ba4b
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/c_cpp_properties.json
@@ -0,0 +1,17 @@
+{
+    "configurations": [
+        {
+            "name": "Linux",
+            "includePath": [
+                "${workspaceFolder}/**"
+            ],
+            "defines": [],
+            "compilerPath": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+            "compilerArgs": [ "-fsycl" ],
+            "cStandard": "gnu17",
+            "cppStandard": "gnu++17",
+            "intelliSenseMode": "linux-gcc-x64"
+        }
+    ],
+    "version": 4
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/launch.json b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/launch.json
new file mode 100644
index 0000000000..325c8901a1
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/launch.json
@@ -0,0 +1,50 @@
+{
+    "configurations": [
+    {
+        "name": "C/C++: dpc++ build and debug scan",
+        "type": "cppdbg",
+        "request": "launch",
+        "program": "${workspaceFolder}/bin/${config:programName}_d",
+        "args": [
+            "${input:args}"
+        ],
+        "stopAtEntry": true,
+        "cwd": "${fileDirname}",
+        "environment": [],
+        "externalConsole": false,
+        "MIMode": "gdb",
+        "setupCommands": [
+            {
+                "description": "Enable pretty-printing for gdb",
+                "text": "-enable-pretty-printing",
+                "ignoreFailures": true
+            },
+            {
+                "description":  "Set Disassembly Flavor to Intel",
+                "text": "-gdb-set disassembly-flavor intel",
+                "ignoreFailures": true
+            },
+            {
+                "description": "Needed by Intel oneAPI: Disable target async",
+                "text": "set target-async off",
+                "ignoreFailures": true
+            }
+        ],
+        "preLaunchTask": "scan Debug C/C++: Intel icpx build active file",
+        "miDebuggerPath": "/opt/intel/oneapi/debugger/latest/gdb/intel64/bin/gdb-oneapi"
+    }
+    ],
+    "inputs" : [
+        {
+            "id": "args",
+            "type": "pickString",
+            "description": "Program args",
+            "default": "cpu",
+            "options": [
+                "cpu",
+                "gpu",
+                "accelerator"
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/settings.json b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/settings.json
new file mode 100644
index 0000000000..150d2ba955
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/settings.json
@@ -0,0 +1,14 @@
+{
+    "programName": "scan",
+    "files.associations": {
+        "stdexcept": "cpp",
+        "array": "cpp",
+        "bitset": "cpp",
+        "string_view": "cpp",
+        "initializer_list": "cpp",
+        "regex": "cpp",
+        "utility": "cpp",
+        "algorithm": "cpp",
+        "iostream": "cpp"
+    }
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/tasks.json b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/tasks.json
new file mode 100644
index 0000000000..0706d2eebb
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/.vscode/tasks.json
@@ -0,0 +1,49 @@
+{
+	"version": "2.0.0",
+	"tasks": [
+		{
+			"type": "cppbuild",
+			"label": "scan Debug C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-fdiagnostics-color=always",
+				"-fno-limit-debug-info",
+				"-fsycl-device-code-split=per_kernel",
+				"-g",
+				"-O0",
+				"${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}_d"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/dpcpp"
+		},
+		{
+			"type": "cppbuild",
+			"label": "scan Release C/C++: Intel icpx build active file",
+			"command": "/opt/intel/oneapi/compiler/latest/linux/bin/icpx",
+			"args": [
+				"-fsycl",
+				"-DNDEBUG",
+                "${workspaceFolder}/src/${config:programName}.cpp",
+				"-o",
+				"${workspaceFolder}/bin/${config:programName}"
+			],
+			"options": {
+				"cwd": "${workspaceFolder}"
+			},
+			"problemMatcher": [
+				"$gcc"
+			],
+			"group": "build",
+			"detail": "compiler: /opt/intel/oneapi/compiler/latest/linux/bin/icpx"
+		}
+	]
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/CMakeLists.txt
new file mode 100644
index 0000000000..24aa89e2f2
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/CMakeLists.txt
@@ -0,0 +1,16 @@
+if(WIN32)
+        set(CMAKE_CXX_COMPILER "dpcpp-cl")
+        set(CMAKE_C_COMPILER "dpcpp-cl")
+else()
+        set(CMAKE_CXX_COMPILER "icpx")
+endif()
+set(CMAKE_CXX_STANDARD 17)
+if(NOT DEFINED ${CMAKE_BUILD_TYPE})
+	set(CMAKE_BUILD_TYPE "RELEASE")
+endif()
+if( CMAKE_BUILD_TYPE STREQUAL "DEBUG" )
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g")
+endif()
+cmake_minimum_required (VERSION 3.4)
+project (scan)
+add_subdirectory (src)
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/README.md b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/README.md
new file mode 100644
index 0000000000..3f1e3f3720
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/README.md
@@ -0,0 +1,86 @@
+# Parallel Prefix Sum (Scan) program
+
+## Purpose
+A simple and common parallel algorithm building block is the all-prefix-sums operation. Blelloch (1990) describes all-prefix-sums as a good example of a computation that seems inherently sequential, but for which there is an efficient parallel algorithm. 
+
+## Prerequisites
+
+| Minimum Requirements              | Description
+|:---                               |:---
+| OS                                | Linux* Ubuntu* 20.04.5 LTS
+| Hardware                          | Intel® 11th Gen Intel Core i7-1185G7 + Mesa Intel Xe Graphics
+| Compiler Toolchain                | Visual Studio Code IDE, Intel oneAPI Base Toolkit (inc its prerequisite)
+| Libraries                         | Install Intel oneAPI Base Toolkit 
+| Tools                             | Visual Studio Code 1.73.1, VSCode Microsoft C/C++ extns
+
+## Build and Run using Visual Code Studio
+
+### Linux*
+
+Within a terminal window change directory to this project's folder. At the 
+terminal prompt type:
+
+```
+cd ParallelPrefixSumScan
+code .
+```
+
+Visual Studio Code will open this project displaying its files in the Explorer 
+pane. 
+The project is already set up with build configurations to build either a
+debug build or a release build of the program. When a program is built, it is
+placed in the bin directory of this project's top folder.
+
+To build the program hit Ctrl+Shift+b and choose the type of program to build.
+The debug executable will have a '_d' appended to its name.
+
+To execute the program, type in the Visual Studio Code terminal window:
+```
+cd bin
+./scan_d cpu
+```
+
+## Build and Run using CMake
+### Linux*
+```
+mkdir build
+cd build
+cmake ..
+make
+```
+
+To execute the program, type in the terminal window:
+
+```
+cd build/src
+./scan cpu
+```
+
+
+## Debug the program using Visual Studio Code
+
+### Linux*
+
+To debug the program, either choose from the IDE's run menu 
+'Start debugging' or hit F5 on the keyboard.
+The debug launch.json configuration file defines the debug session to:
+* Provide a list of accelerator type to choose from. Picking a device will 
+  be used as the first argument to the program.
+* To halt the program at the first line of code after main().
+Use the GUI debug panel's buttons to step over code (key F10) lines to see the 
+program advance. 
+Breakpoints can be set either in the main code or the kernel code.
+
+Note: Setting breakpoints in the kernel code does not present the normal 
+      step through code behavior. Instead a breakpoint event is occurring
+      on each thread being executed and so switches to the context of 
+      that thread. To step through the code of a single thread, use the 
+      Intel gdb-oneapi command 'set scheduler-locking step' or 'on' in the 
+      IDE's debug console prompt. As this is not the main thread, be sure
+      to revert this setting on returning to debug any host side code. 
+      Use the command 'set scheduler-locking replay' or 'off'.  
+
+## License
+
+Code samples are licensed under the Apache 2.0 license. See
+[LICENSE.txt](LICENSE.txt) for details.
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/CMakeLists.txt b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/CMakeLists.txt
new file mode 100644
index 0000000000..7dbc6eff3d
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2 -fsycl")
+set(CMAKE_BUILD_TYPE "RelWithDebInfo")
+set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}")
+add_executable (scan scan.cpp)
+target_link_libraries(scan OpenCL sycl)
+
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/device_selector.hpp b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/device_selector.hpp
new file mode 100644
index 0000000000..a515c547be
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/device_selector.hpp
@@ -0,0 +1,421 @@
+//==============================================================
+// Copyright (C) Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// =============================================================
+
+#pragma once
+
+// SYCL or oneAPI toolkit headers:
+#include 
+
+// Third party headers:
+#include 
+
+using namespace std;
+using namespace sycl;
+
+//++
+//============================================================================
+// Details: Common code utility. User defined enumerate of the SYCL device 
+//          target typesdesired.
+//          An 'eHost' is admitted because SYCL 2020 depricates host device
+//          selection.
+//--
+enum EDevsWeWant
+{
+  eNotValid = 0,      // Default
+  eCPU = 1,
+  eGPU = 2,
+  eAccelerator = 3,   // i.e. a FPGA type device
+  eCount = 4          //  Always the last one
+};
+  
+//++
+//============================================================================
+// Details: Common code utility. User defined target device proxy.
+//          After the utility has discoverd available devices on the system,
+//          this structure holds/caches information about the device. 
+//          Forms a proxy device object representing an actual possible target
+//          device found on the system.
+//--
+struct SDeviceFoundProxy final
+{
+  EDevsWeWant   eDevice = eNotValid;    // The type of real device we want to
+                                        // use to run kernels on. 
+  string        strDeviceName = "";     // THe proxy label (ID) for a real 
+                                        // device.
+  bool          bAvailable = false;     // True = can be used, 
+                                        // False = not found on the system.
+  bool          bActiveTarget = false;  // True = use it, false = stop using.
+  int           nScore = 0;             // User defined score of the device.
+  sycl::device  theDevice;              // Copy of the real device found.
+}; 
+
+//++
+//============================================================================
+// Details: Common code utility. Rudimentry error reporting system. Used by
+//          utility class to explicity aid the user or the programmer of any
+//          issues that have occurred.
+//--
+struct FnResult final
+{
+  bool bSuccess = true;
+  string strErrMsg = "";
+};
+
+//++
+//============================================================================
+// Details: Common code utility. A basic utility class to wrap up functions
+//          that can discover, then target the acceleration devices found on 
+//          a system. 
+//
+// Docs:  https://www.intel.com/content/www/us/en/developer/articles/
+//        technical/device-discovery-with-sycl.html#gs.nhyd7s
+//        https://registry.khronos.org/SYCL/specs/sycl-2020/html/
+//        sycl-2020.html#sec:device-selection
+//
+// It can find all the available device targets on a system.
+// But it will only store a list of the first device found of the following
+// type and criteria: cpu, gpu and accelerator.
+// Each device in the list can be set to be an active target.
+// The last or latest call to function SetDevToActive() will change the 
+// device proxy object returned by GetDevUsersFirstChoice() to be that device.  
+// -- 
+class CUtilDeviceTargets final
+{
+  // Definitions:
+  public:
+    typedef std::vector< SDeviceFoundProxy > ListDevicesFound_t;
+
+  // Static method:
+  public:
+    static FnResult       DiscoverPlatformsDevicesAvailable( string &vrstrPlatformAndDevices );
+    static const string&  GetInputOptionDiscoverDevice();
+    static FnResult       GetQueuesCurrentDevice( const queue &vrQ, string &vrstr );
+ 
+  // Methods:
+  public:
+    CUtilDeviceTargets();
+    ~CUtilDeviceTargets();
+ 
+    FnResult                  DiscoverDevsWeWant();
+    const ListDevicesFound_t &GetListDevs() const;
+    const SDeviceFoundProxy  *GetDevUsersFirstChoice() const;
+    FnResult                  SetDevToActive( const string &rvDeviceName, const bool vbActive );
+  
+  // Attributes:
+  private:
+    ListDevicesFound_t  m_listDeviceTargets;  
+    static string       m_strDiscoverDeviceInputOption;
+    SDeviceFoundProxy  *m_pDeviceUserFirstChoice;       // NULL = a choice has not been made
+};
+
+// Instantiations:
+string CUtilDeviceTargets::m_strDiscoverDeviceInputOption = "discover_devices";
+
+//++
+// Details: CUtilDeviceTargets constructor.
+// Type:    Method.
+// Args:    None.
+// Return:  None.
+// Throws:  None.
+//--
+CUtilDeviceTargets::CUtilDeviceTargets()
+: m_pDeviceUserFirstChoice( nullptr ) 
+{}
+
+//++
+// Details: CUtilDeviceTargets destructor.
+// Type:    Method.
+// Args:    None.
+// Return:  None.
+// Throws:  None.
+//--
+CUtilDeviceTargets::~CUtilDeviceTargets()
+{
+  // Release
+  m_pDeviceUserFirstChoice = nullptr;
+}
+
+//++
+// Details:  Return a report on the specified SYCL queue stating its current
+//           real target device and the device platform.
+//
+// Type:    Method.
+// Args:    vrQ - (R) The queue to query.
+//          vrstrReport - (W) The report text. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::GetQueuesCurrentDevice( const queue &vrQ, string &vrstrReport )
+{
+  FnResult status;
+
+  vrstrReport = "[SYCL] Using device: [";
+  vrstrReport += vrQ.get_device().get_info< info::device::name >();
+  vrstrReport += "] from [";
+  vrstrReport += vrQ.get_device().get_platform().get_info< info::platform::name >();
+  vrstrReport += "]";
+
+  return status;
+}
+
+//++
+// Details: Returns the program's text label for the user input option
+//          to choose to discover all the available device on a system.
+// Type:    Method.
+// Args:    None.
+// Return:  string& - Text label.
+// Throws:  None.
+//--
+const string& CUtilDeviceTargets::GetInputOptionDiscoverDevice()
+{
+  return m_strDiscoverDeviceInputOption;
+}
+
+//++
+// Details: Returns the pointer to the proxy object in the list of 
+//          discovery device proxies.
+// Type:    Method.
+// Args:    None.
+// Return:  SDeviceFoundProxy* - pointer to object.
+// Throws:  None.
+//--
+const SDeviceFoundProxy * CUtilDeviceTargets::GetDevUsersFirstChoice() const
+{
+  return m_pDeviceUserFirstChoice;
+}
+
+//++
+// Details:  A pointer to the current user's choice of target is made on this
+//           function being successful. 
+//
+// If this function fails, the pointer retains the last or
+// remains NULL (a choice was never made).
+//
+// Type:    Method.
+// Args:    string &rvDeviceName - (R) Proxy's label or ID text.
+//          bool vbActive - (R) True = Use the device, False = disable use. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  None.
+//--
+FnResult CUtilDeviceTargets::SetDevToActive( const string &rvDeviceName, const bool vbActive )
+{
+  FnResult status;
+
+  bool bFoundDevice = false;
+  for( SDeviceFoundProxy &rDev : m_listDeviceTargets )
+  {
+      if( rDev.strDeviceName == rvDeviceName )
+      {
+        bFoundDevice = true;
+        rDev.bActiveTarget = vbActive;
+        m_pDeviceUserFirstChoice = &rDev;
+        break;
+      }
+  }
+  if( !bFoundDevice )
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "Device '" + rvDeviceName;
+    status.strErrMsg += "' not found in list of available device targets";
+  }
+
+  return status;
+}
+ 
+//++
+// Details:  Discovers all the SYCL target devices available and assigns them
+//           to a target device proxy object. A device proxy object holds
+//           the criteria for a real device. All proxies created are
+//           disabled until a real device is found to match it. A programmer
+//           has to still set the proxy device as active to target that 
+//           device it represent.
+//
+// Call this function before at the earliest opportunity and 
+// before other functions in this class as it makes a list
+// of the target devices we are aiming to use.
+//
+// A limitation of this function it will only assign the first real device that
+// matches the proxy criteria. Any subsequent same or similar devices are
+// ignored.
+//
+// Type:    Method.
+// Args:    None. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::DiscoverDevsWeWant()
+{ 
+  FnResult status;
+
+  SDeviceFoundProxy accelerator{ eAccelerator, "accelerator", false };
+  SDeviceFoundProxy cpu{ eCPU, "cpu", false };
+  SDeviceFoundProxy gpu{ eGPU, "gpu", false };
+ 
+  for( const auto platform : platform::get_platforms() )
+  {
+    for( const auto device : platform.get_devices() )
+    {
+      // Get first available device of each type
+      if( !accelerator.bAvailable && device.is_accelerator() )
+      {
+        accelerator.bAvailable = true;
+        accelerator.theDevice = device;
+      }
+      else if( !cpu.bAvailable && device.is_cpu() )
+      {
+        cpu.bAvailable = true;
+        cpu.theDevice = device;
+      }
+      else if( !gpu.bAvailable && device.is_gpu() )
+      {
+        gpu.bAvailable = true;
+        gpu.theDevice = device;
+      }
+    }
+  }
+
+  m_listDeviceTargets.push_back( accelerator );
+  m_listDeviceTargets.push_back( cpu );
+  m_listDeviceTargets.push_back( gpu );
+
+  return status;
+}
+
+//++
+// Details: Returns the list of proxy device objects the programmer has 
+//          defined and wants found on the system. Some proxy objects
+//          may be set to not available (and inactive) if not matching
+//          devices has been found on the system.
+// Type:    Method.
+// Args:    None. 
+// Return:  ListDevicesFound_t - List of proxy device objects.
+// Throws:  None.
+//--
+const CUtilDeviceTargets::ListDevicesFound_t & CUtilDeviceTargets::GetListDevs() const
+{
+  return m_listDeviceTargets;
+}
+
+//++
+// Details: Prints to std out all the SYCL device targets discovered on the
+//          wanted to be used.  
+// Type:    Method.
+// Args:    string& vrstrPlatformAndDevices - (W) A report of found devices. 
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult CUtilDeviceTargets::DiscoverPlatformsDevicesAvailable( string &vrstrPlatformAndDevices )
+{
+  FnResult status;
+
+  vrstrPlatformAndDevices = "";
+  bool bFoundPlatforms = false;
+  bool bFoundDevices = false;
+  for( const auto platform : platform::get_platforms() )
+  {
+    bFoundPlatforms = true;
+    vrstrPlatformAndDevices += "Platform: ";
+    vrstrPlatformAndDevices += platform.get_info< info::platform::name >();
+    vrstrPlatformAndDevices += "\n";
+
+    for( const auto device : platform.get_devices() )
+    {
+      bFoundDevices = true;
+      vrstrPlatformAndDevices += "\tDevice: ";
+      vrstrPlatformAndDevices += device.get_info< info::device::name >();
+      vrstrPlatformAndDevices += "\n";
+    }
+  }
+  if( !bFoundPlatforms && !bFoundDevices )
+  {
+    vrstrPlatformAndDevices = "No SYCL targeted platforms or devices found.";
+  }
+
+  return status;
+}
+
+//++
+// Details: Checks the user's input is valid. If not a help message if formed
+//          and returned. If valid, the matching proxy device object
+//          discovered earlier is made active for use by the program.  
+// Type:    Function.
+// Args:    vrDevList- (RW) Utililty object managing proxy device objects.
+//          argc - (R) Program's input arguments count. 
+//          argv - (R) Program's list of input arguments.
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  None.
+//--
+FnResult UserCheckTheirInput( CUtilDeviceTargets &vrDevList, int argc, char* argv[] ) 
+{
+  FnResult status;
+
+  const CUtilDeviceTargets::ListDevicesFound_t &rDevs = vrDevList.GetListDevs();
+  string strListDevsOptionsToUser;
+  for( const SDeviceFoundProxy d : rDevs )
+  {
+    strListDevsOptionsToUser += d.strDeviceName + "|";
+  }
+  strListDevsOptionsToUser += CUtilDeviceTargets::GetInputOptionDiscoverDevice();
+
+  if( argc < 2 ) 
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "Usage: " + string( argv[ 0 ] ) + " <";
+    status.strErrMsg += strListDevsOptionsToUser;
+    status.strErrMsg += ">";
+    return status;
+  }
+
+  bool bTargetDevMatch = false;
+  const string strArg{ argv[ 1 ] };
+  for( const SDeviceFoundProxy d : rDevs )
+  {
+    if( strArg == d.strDeviceName )
+    {
+      bTargetDevMatch = true;
+      status = vrDevList.SetDevToActive( strArg, true );
+      break;
+    }
+  }
+  if( status.bSuccess && !bTargetDevMatch && 
+     (strArg != CUtilDeviceTargets::GetInputOptionDiscoverDevice() ) )
+  {
+    status.bSuccess = false;
+    status.strErrMsg = "The device type cannot be found. Please enter a device type name from the list: ";
+    status.strErrMsg += strListDevsOptionsToUser;
+  }
+
+  return status;
+}
+
+//++
+// Details: Checks the user's input is the option to 'discover device target'
+//          on the system.  
+// Type:    Function.
+// Args:    argv - (R) Program's list of input arguments.
+//          bool rbDoDiscovery - (W) True = yes, the discovery option choosen.
+// Return:  FnResult - Status of the function's operational success.
+// Throws:  SYCL implemenation may throw.
+//--
+FnResult UserWantsToDiscoverPossibleTargets( char* argv[], bool &rbDoDiscovery )
+{
+  FnResult status;
+
+  rbDoDiscovery = false;
+  const string strArg{ argv[ 1 ] };
+  if( strArg == CUtilDeviceTargets::GetInputOptionDiscoverDevice() )
+  {
+    string strPlatformAndDevicesReport;
+    status = CUtilDeviceTargets::DiscoverPlatformsDevicesAvailable( strPlatformAndDevicesReport );
+    if( status.bSuccess )
+    {
+      rbDoDiscovery = true;
+      cout << strPlatformAndDevicesReport << std::endl;
+    }
+  }
+
+  return status;
+}
\ No newline at end of file
diff --git a/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/scan.cpp b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/scan.cpp
new file mode 100644
index 0000000000..d179444ee4
--- /dev/null
+++ b/DirectProgramming/DPC++/ParallelPatterns/ParallelPrefixSumScan/src/scan.cpp
@@ -0,0 +1,520 @@
+//============================================================================
+// Copyright © 2022 Intel Corporation
+//
+// SPDX-License-Identifier: MIT
+// ===========================================================================
+
+//****************************************************************************
+// 
+// Description:
+// Example of a parallel inclusive scan in SYCL. Based on the two-phase 
+// exclusive scan algorithm paper by Guy E. Blelloch titled "Prefix Sums and 
+// Their Applications", 1990.
+//
+// Usage:
+// The program takes one argument: host / cpu / gpu / accelerator.
+//
+//*****************************************************************************
+
+// SYCL / Intel oneAPI files:
+#include 
+#include "dpc_common.hpp"
+
+// Third party files:
+#include 
+#include 
+#include 
+#include 
+
+// This project's files:
+#include "device_selector.hpp"
+
+using namespace sycl;
+using namespace std;
+
+// Asynchronous errors hander, catch faults in asynchronously executed code
+// inside a command group or a kernel. They can occur in a different stackframe, 
+// asynchronous error cannot be propagated up the stack. 
+// By default, they are considered 'lost'. The way in which we can retrieve them
+// is by providing an error handler function.
+auto exception_handler = []( sycl::exception_list exceptions ) 
+{
+    for( std::exception_ptr const &e : exceptions ) 
+    {
+        try 
+        {
+          std::rethrow_exception( e );
+        } 
+        catch( sycl::exception const &e ) 
+        {
+          std::cout << "Queue handler caught asynchronous SYCL exception:\n" << e.what() << std::endl;
+        }
+    }
+};
+
+// Forward decleration of functions
+template< typename T, typename OP >
+void ParallelScan( sycl::buffer< T, 1 > &bufIn, sycl::queue &q );
+int TestSum( sycl::queue &q );
+int TestFactorial( sycl::queue &q );
+
+int main( int argc, char *argv[] ) 
+{
+  CUtilDeviceTargets utilsDev;
+  FnResult fnResult = utilsDev.DiscoverDevsWeWant();
+  if( !fnResult.bSuccess )
+  {
+    cerr << "Program failure: Unable to discover target devices on this platform.\n";
+    exit( -1 );
+  }
+
+  fnResult = UserCheckTheirInput( utilsDev, argc, argv ); 
+  if( !fnResult.bSuccess ) 
+  {
+    cerr << fnResult.strErrMsg << "\n";
+    exit( 1 );
+  }
+
+  bool bDoDevDiscovery = false;
+  fnResult = UserWantsToDiscoverPossibleTargets( argv, bDoDevDiscovery );
+  if( !fnResult.bSuccess )
+  {
+    cerr << fnResult.strErrMsg << "\n";
+    exit( -1 );
+  }
+  if( bDoDevDiscovery ) exit( 1 );
+
+  const SDeviceFoundProxy *pUsersChosenDevice = utilsDev.GetDevUsersFirstChoice();
+  if( pUsersChosenDevice == nullptr )
+  {
+    cerr << "Program failure: Did not create a valid target device object.\n";
+    exit( -1 );
+  }
+
+  int retResultSum = 0;
+  int retResultFactorial = 0;
+
+  try
+  {
+    queue myQueue( pUsersChosenDevice->theDevice, exception_handler );
+
+    string strTheDeviceBeingUsed;
+    fnResult = CUtilDeviceTargets::GetQueuesCurrentDevice( myQueue, strTheDeviceBeingUsed );
+    if( !fnResult.bSuccess )
+    {
+      cerr << fnResult.strErrMsg << "\n";
+      exit( -1 );
+    }
+    cout << strTheDeviceBeingUsed << "\n";
+
+    retResultSum = TestSum( myQueue );
+    retResultFactorial =  (retResultSum == 0) && TestFactorial( myQueue );
+  }
+  catch( sycl::exception const &e ) 
+  {
+    cout << "Fail; SYCL synchronous exception occurred: " << e.what() << "\n";
+    return -1;
+  }
+  catch( std::exception const &e ) 
+  {
+    cout << "Fail; Runtime synchronous exception occurred: " << e.what() << "\n";
+    return -1;
+  }
+
+  if( (retResultSum != 0) || (retResultFactorial != 0) ) 
+  {
+    return 1;
+  }
+
+  cout << "Results are correct." << std::endl;
+  
+  return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+
+// The identity element for a given operation.
+template< typename T, typename OP >
+struct SIdentity {};
+
+template< typename T >
+struct SIdentity< T, std::plus< T > > 
+{
+  static constexpr T value = 0;
+};
+
+template< typename T >
+struct SIdentity< T, std::multiplies< T > > 
+{
+  static constexpr T value = 1;
+};
+
+template< typename T >
+struct SIdentity< T, std::logical_or< T > > 
+{
+  static constexpr T value = false;
+};
+
+template< typename T >
+struct SIdentity< T, std::logical_and< T > > 
+{
+  static constexpr T value = true;
+};
+
+// Dummy struct to generate unique kernel name types
+template< typename T, typename U, typename V >
+struct SKernelNameType {};
+
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// Performs an inclusive scan with the given associative binary operation `OP`
+// on the data in the `bufIn` buffer. Runs in parallel on the provided 
+// accelerated hardware queue. Modifies the input buffer to contain the 
+// results of the scan.
+// Input size has to be a power of two. If the size isn't so, the input can
+// easily be padded to the nearest power of two with any values, and the scan
+// on the meaningful part of the data will stay the same.
+template< typename T, typename OP >
+void ParallelScan( sycl::buffer< T, 1 > &bufIn, sycl::queue &q ) 
+{
+  // Retrieve the device associated with the given queue.
+  const sycl::device dev = q.get_device();
+  const bool bHwIsCpu = dev.is_cpu();
+
+  // Check if local memory is available. On host no local memory is fine, since
+  if( !bHwIsCpu && 
+      (dev.get_info< sycl::info::device::local_mem_type >() ==
+       sycl::info::local_mem_type::none) ) 
+  {
+    throw std::runtime_error( "Non host device does not have local memory." );
+  }
+
+  const size_t bufSize = bufIn.size();
+  if( ((bufSize & (bufSize - 1)) != 0) || (bufSize == 0) ) 
+  {
+    throw std::runtime_error( 
+      "Given input buffer size is not a power of two." );
+  }
+  
+  // Check if there is enough global memory.
+  const size_t globalMemSize = 
+      dev.get_info< sycl::info::device::global_mem_size >();
+  if( !bHwIsCpu && (bufSize > (globalMemSize * 0.5) ) ) 
+  {
+    throw std::runtime_error( 
+      "Non host device input size exceeds device global memory size." );
+  }
+
+  // Obtain device limits.
+  const size_t maxWgroupSize =
+      dev.get_info< sycl::info::device::max_work_group_size >();
+  const size_t localMemSize = 
+      dev.get_info< sycl::info::device::local_mem_size >();
+
+  // Find a work-group size that is guaranteed to fit in local memory and is
+  // below the maximum work-group size of the device.
+  const size_t wgroupSizeLim =
+      sycl::min( maxWgroupSize, localMemSize / (2 * sizeof( T )) );
+
+  // Every work-item processes two elements, so the work-group size has to
+  // divide this number evenly. 
+  const size_t halfInBufSize = bufSize * 0.5;
+
+  // Find the largest power of two that divides half_in_size and is within the
+  // device limit.
+  size_t wgroupSize = 0;
+  size_t pow = size_t( 1 ) << (sizeof( size_t ) * 8 - 1);
+  for( ; pow > 0; pow >>= 1 ) 
+  {
+    if( (halfInBufSize / pow) * pow == 
+      halfInBufSize && (pow <= wgroupSizeLim) ) 
+    {
+      wgroupSize = pow;
+      break;
+    }
+  }
+  if( wgroupSize == 0 ) 
+  {
+    throw std::runtime_error(
+        "Could not find an appropriate work-group size for the given input." );
+  }
+  const size_t dblWgrpSize = wgroupSize * 2;
+
+  q.submit( [&]( sycl::handler &cgh ) 
+  {
+    const auto ptrData = 
+      bufIn.template get_access< sycl::access::mode::read_write >( cgh );
+
+    // Using scratch/local memory (to a work group) for faster memory 
+    // access to compute the results
+    sycl::accessor< T, 1, sycl::access::mode::read_write, 
+                          sycl::access::target::local > 
+                          scratch( wgroupSize * 2, cgh);
+
+    // Use dummy struct as the unique kernel name.
+    cgh.parallel_for< SKernelNameType< T, OP, class CScanSegments > >(
+        sycl::nd_range< 1 >( halfInBufSize, wgroupSize ),
+        [=]( sycl::nd_item< 1 > item ) 
+        {
+          const size_t gid = item.get_global_linear_id();
+          const size_t lid = item.get_local_linear_id();
+
+          // Read data into local memory.
+          scratch[ 2 * lid ] = ptrData[ 2 * gid ];
+          scratch[ 2 * lid + 1 ] = ptrData[ 2 * gid + 1 ];
+
+          // Preserve the second input element to add at the end.
+          const auto secondInput = scratch[ 2 * lid + 1 ];
+
+          // Perform partial reduction (up-sweep) on the data. The `off`
+          // variable is 2 to the power of the current depth of the
+          // reduction tree. In the paper, this corresponds to 2^d.
+          for( size_t off = 1; off < (wgroupSize * 2); off *= 2 ) 
+          {
+            // Synchronize local memory to observe the previous writes.
+            item.barrier( sycl::access::fence_space::local_space );
+
+            const size_t i = lid * off * 2;
+            if( i < dblWgrpSize ) 
+            {
+              const size_t index = i + (off * 2) - 1;
+              scratch[ index ] =
+                  OP{}( scratch[ index ], scratch[ i + off - 1 ] );
+            }
+          }
+
+          // Clear the last element to the identity before down-sweeping.
+          if( lid == 0 ) 
+          {
+            scratch[ dblWgrpSize - 1 ] = SIdentity< T, OP >::value;
+          }
+
+          // Perform down-sweep on the tree to compute the whole scan.
+          // Again, `off` is 2^d. 
+          for( size_t off = wgroupSize; off > 0; off >>= 1 ) 
+          {
+            item.barrier( sycl::access::fence_space::local_space );
+
+            const size_t i = lid * off * 2;
+            if( i < dblWgrpSize ) 
+            {
+              const size_t indexT = i + off - 1;
+              const size_t indexU = i + (off * 2) - 1;
+              const auto t = scratch[ indexT ];
+              const auto u = scratch[ indexU ];
+              scratch[ indexT ] = u;
+              scratch[ indexU ] = OP{}( t, u );
+            }
+          }
+
+          // Synchronize again to observe results.
+          item.barrier( sycl::access::fence_space::local_space );
+
+          // To return an inclusive rather than exclusive scan result, shift
+          // each element left by 1 when writing back into global memory. If
+          // we are the last work-item, also add on the final element. 
+          const size_t indexL1 = 2 * lid + 1;
+          const size_t indexL2 = 2 * lid + 2;
+          const size_t indexG1 = 2 * gid;
+          const size_t indexG2 = 2 * gid + 1;
+          ptrData[ indexG1 ] = scratch[ indexL1 ];
+          if( lid == wgroupSize - 1 ) 
+          {
+            ptrData[ indexG2 ] = OP{}( scratch[ indexL1 ], secondInput );
+          } 
+          else 
+          {
+            ptrData[ indexG2 ] = scratch[ indexL2 ];
+          }
+        }    // [=]( sycl::nd_item< 1 > item )
+      );  // cgh.parallel_for< SKernelNameType< T, OP, class CScanSegments > >(
+  }); // q.submit( [&]( sycl::handler &cgh )
+
+  // At this point we have computed the inclusive scans of this many segments.
+  const size_t nSegments = halfInBufSize / wgroupSize;
+
+  if( nSegments == 1 ) 
+  {
+    // If all of the data is in one segment, we're done.
+    return;
+  }
+  // Otherwise we have to propagate the scan results forward into later
+  // segments.
+
+  // Allocate space for one (last) element per segment.
+  sycl::buffer< T, 1 > bufEndSegment{ sycl::range< 1 >( nSegments ) };
+
+  // Store the elements in this space.
+  q.submit( [&](sycl::handler &cgh ) 
+  {
+    const auto ptrScans = bufIn.template get_access< 
+                                   sycl::access::mode::read >( cgh );
+    const auto ptrElems = bufEndSegment.template get_access< 
+                                   sycl::access::mode::discard_write >( cgh );
+
+    cgh.parallel_for< SKernelNameType< T, OP, class CCopyEndSeg > >(
+        sycl::range< 1 >( nSegments ), 
+        [=]( sycl::item< 1 > item ) 
+        {
+          const size_t id = item.get_linear_id();
+          // Offset into the last element of each segment.
+          ptrElems[ item ] = ptrScans[ (id + 1) * 2 * wgroupSize - 1 ];
+        });
+  });
+
+  // Recursively scan the array of last elements.
+  ParallelScan< T, OP >( bufEndSegment, q );
+
+  // Add the results of the scan to each segment.
+  q.submit( [&]( sycl::handler &cgh ) 
+  {
+    const auto ptrEndSegScan = bufEndSegment.template get_access< 
+                                      sycl::access::mode::read >( cgh );
+    const auto ptrDataIn = bufIn.template get_access< 
+                                      sycl::access::mode::read_write >( cgh );
+
+    cgh.parallel_for< SKernelNameType< T, OP, class CAddEndSeg > >(
+        // Work with one less work-group, since the first segment is correct.
+        sycl::nd_range< 1 >( halfInBufSize - wgroupSize, wgroupSize ),
+        [=](sycl::nd_item< 1 > item) 
+        {
+          const size_t grpLinId = item.get_group_linear_id();
+
+          // Start with the second segment.
+          const size_t glbIdOff = item.get_global_linear_id() + wgroupSize;
+
+          // Each work-group adds the corresponding number in the
+          // "last element scan" array to every element in the group's
+          // segment.
+          ptrDataIn[ glbIdOff * 2 ] = OP{}( ptrDataIn[ glbIdOff * 2 ], 
+                                            ptrEndSegScan[ grpLinId ] );
+          ptrDataIn[ glbIdOff * 2 + 1 ] = OP{}( ptrDataIn[ glbIdOff * 2 + 1 ], 
+                                                ptrEndSegScan[ grpLinId ] );
+        });
+  });
+}
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// Tests the scan with an addition operation, which is its most common use.
+// Returns 0 if successful, a nonzero value otherwise.
+int TestSum( sycl::queue &q ) 
+{
+  constexpr size_t size = 64;
+
+  // Initializes a vector of sequentially increasing values.
+  std::vector< int32_t > arrayIn( size );
+  std::iota( arrayIn.begin(), arrayIn.end(), 1 );
+
+  // Compute the prefix sum using SYCL.
+  std::vector< int32_t > arraySum( arrayIn.size() );
+  
+  {
+    // Read from `arrayIn`, but write into `arraySum`.
+    buffer< int32_t, 1 > bufArrayIn( sycl::range< 1 >( arrayIn.size() ) );
+    bufArrayIn.set_final_data( arraySum.data() );
+
+    q.submit( [&](sycl::handler &cgh) 
+    {
+      const auto acc = 
+          bufArrayIn.get_access< sycl::access::mode::write >( cgh );
+      cgh.copy( arrayIn.data(), acc );
+    });
+
+    ParallelScan< int32_t, std::plus< int32_t > >( bufArrayIn, q );
+  }
+
+  // Compute the same operation using the standard library.
+  std::vector < int32_t > arrayTestSum( arrayIn.size() );
+  std::partial_sum( arrayIn.begin(), arrayIn.end(), arrayTestSum.begin() );
+
+  cout << "\nSYCL compute's sum results:\n";
+  for( auto a : arraySum ) 
+  {
+    cout << a << " ";
+  }
+  cout << std::endl;
+
+  // Check if the results are correct.
+  const bool bEqual = 
+    std::equal( arraySum.begin(), arraySum.end(), arrayTestSum.begin() );
+  if( !bEqual ) 
+  {
+    cout << "SYCL sum computation incorrect!\n";
+    cout << "std::partial_sum's results:\n";
+        
+    for( auto a : arrayTestSum ) 
+    {
+      cout << a << " ";
+    }
+    
+    return 1;
+  }
+
+  return 0;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+///////////////////////////////////////////////////////////////////////////////
+// Tests the scan with a multiply operation, which is a sequence of factorials.
+// Returns 0 if successful, a nonzero value otherwise.
+int TestFactorial( sycl::queue &q ) 
+{
+  // Anything above this size overflows the int64_t type
+  constexpr size_t size = 16;
+
+  // Initializes a vector of sequentially increasing values.
+  std::vector< int64_t > arrayIn( size );
+  std::iota( arrayIn.begin(), arrayIn.end(), 1 ); 
+
+  // Compute a sequence of factorials using SYCL.
+  std::vector< int64_t > arrayFact( arrayIn.size() );
+  {
+    // Read from `arrayIn`, but write into `arrayFact`.
+    sycl::buffer< int64_t, 1 > bufArrayIn( sycl::range< 1 >( arrayIn.size() ));
+    bufArrayIn.set_final_data( arrayFact.data() );
+    q.submit( [&](sycl::handler &cgh ) 
+    {
+      const auto acc = bufArrayIn.get_access< sycl::access::mode::write >( cgh );
+      cgh.copy( arrayIn.data(), acc );
+    });
+
+    ParallelScan< int64_t, std::multiplies< int64_t > >( bufArrayIn, q );
+  }
+
+  // Compute the same operation using the standard library.
+  std::vector< int64_t > arrayTestFact( arrayIn.size() );
+  std::partial_sum( arrayIn.begin(), arrayIn.end(), arrayTestFact.begin(),
+                    std::multiplies< int64_t >{} );
+
+  cout << "\nSYCL compute's factorial results:\n";
+  for( auto a : arrayFact ) 
+  {
+    cout << a << " ";
+  }
+  cout << std::endl;
+    
+  // Check if the results are correct.
+  const bool bEqual = std::equal( arrayFact.begin(), arrayFact.end(), 
+                                  arrayTestFact.begin() );
+  if( !bEqual ) 
+  {
+    cout << "SYCL factorial computation incorrect!\n";
+    cout << "std::partial_sum's results:\n";
+    
+    for( auto a : arrayTestFact ) 
+    {
+      cout << a << " ";
+    }
+    
+    return 1;
+  }
+
+  return 0;
+}
\ No newline at end of file