Skip to content

@shared in DPCPP incorrect output #237

@Kyrylo-Mazhara

Description

@Kyrylo-Mazhara

In the example below, the converted @shared is displayed in the same place where it was originally declared. While when running the test code through OCCA, the converted @shared is printed before loop.

Input:

@kernel void test_kern() {
    @tile(4, @outer) for (int i = 0; i < 10; ++i) {
        @shared int shm[10];
        @tile(4, @inner, @inner) for (int j = 0; j < 10; ++j) {
            shm[j] = j;
        }
    }
}

Output:

#include <CL/sycl.hpp>
using namespace sycl;

extern "C" [[sycl::reqd_work_group_size(1, 3, 4)]] void _occa_test_kern_0(sycl::queue *queue_,
                                  sycl::nd_range<3> *range_) {
  queue_->submit([&](sycl::handler &handler_) {
    handler_.parallel_for(*range_, [=](sycl::nd_item<3> item_) {
      {
        int _occa_tiled_i = (0) + ((4) * item_.get_group(2));
        for (int i = _occa_tiled_i; i < (_occa_tiled_i + (4)); ++i) {
          if (i < 10) {
   ->       auto &shm =
                *(sycl::ext::oneapi::group_local_memory_for_overwrite<int[10]>(
                    item_.get_group()));
            {
              int _occa_tiled_j = (0) + ((4) * item.get_local_id(1));
              {
                int j = _occa_tiled_j + item.get_local_id(2);
                if (j < 10) {
                  shm[j] = j;
                }
              }
            }
            item_.barrier(sycl::access::fence_space::local_space);
          }
        }
      }
    });
  });
}

Expected output:

#include <CL/sycl.hpp>
 using namespace sycl;

extern "C" void _occa_test_kern_0(sycl::queue * queue_,
                                  sycl::nd_range<3> * range_) {
  queue_->submit(
    [&](sycl::handler & handler_) {
      handler_.parallel_for(
        *range_,
        [=](sycl::nd_item<3> item_)  {
 ->       auto & shm = *(sycl::ext::oneapi::group_local_memory_for_overwrite<int[10]>(item_.get_group()));
          {
            int _occa_tiled_i = 0 + (4 * item_.get_group(2));
            for (int i = _occa_tiled_i; i < (_occa_tiled_i + 4); ++i) {
              if (i < 10) {
                {
                  int _occa_tiled_j = 0 + (4 * item_.get_local_id(1));
                  {
                    int j = _occa_tiled_j + item_.get_local_id(2);
                    if (j < 10) {
                      shm[j] = j;
                    }
                  }
                }
                item_.barrier(sycl::access::fence_space::local_space);
              }
            }
          }
        }
      );
    }
  );
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions