/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/chainer/cshuffle_epilogue_schedule.hpp Source File

/home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/chainer/cshuffle_epilogue_schedule.hpp Source File#

Composable Kernel: /home/docs/checkouts/readthedocs.org/user_builds/advanced-micro-devices-composable-kernel/checkouts/develop/include/ck_tile/ops/epilogue/chainer/cshuffle_epilogue_schedule.hpp Source File
cshuffle_epilogue_schedule.hpp
Go to the documentation of this file.
1 // Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
2 // SPDX-License-Identifier: MIT
3 
4 #pragma once
5 
9 
10 namespace ck_tile {
11 
15 
18 {
19 };
20 
23 {
24 };
25 
28 {
29 };
30 
40 template <typename Problem, typename ScheduleTag = DefaultScheduleTag>
42 {
43  using ProblemType = Problem;
45 
47 
49  template <typename OutWindow, typename AccTile, typename AuxWindows>
50  CK_TILE_DEVICE static auto create_context(OutWindow& out_window,
51  const AccTile& acc_tile,
52  const AuxWindows& aux_windows,
53  void* p_smem)
54  {
55  return BaseOp{}(out_window, acc_tile, aux_windows, p_smem);
56  }
57 
59  template <typename... Args>
60  CK_TILE_DEVICE static auto make_schedule(Args&&... args)
61  {
62  if constexpr(std::is_same_v<ScheduleTag, DefaultScheduleTag>)
63  {
64  // Standard epilogue
65  // Schedule: Slice -> CastAndStoreLds -> Load -> ApplyD -> Store -> MoveWindows
66  static_assert(sizeof...(args) == 0, "DefaultSchedule expects no arguments");
67  return make_graph<NumAccess>(
69  typename BaseOp::CWarpDstr,
79  }
80  else if constexpr(std::is_same_v<ScheduleTag, RowColQuantScheduleTag>)
81  {
82  // RowCol quantization schedule with tensor windows
83  // Schedule: Slice -> ScaleWindow -> CastAndStoreLds -> Load -> ApplyD -> Store ->
84  // MoveWindows
85  static_assert(sizeof...(args) == 2,
86  "RowColQuantSchedule requires exactly 2 scale tensor arguments");
87  return make_graph<NumAccess>(
89  typename BaseOp::CWarpDstr,
94  make_node<CShuffleScaleWindowOp<typename BaseOp::SFC>>(std::forward<Args>(args)...),
100  }
101  else if constexpr(std::is_same_v<ScheduleTag, TensorQuantScheduleTag>)
102  {
103  // Tensor quantization schedule with scalar values
104  // Schedule: Slice -> ScaleScalar -> CastAndStoreLds -> Load -> ApplyD -> Store ->
105  // MoveWindows
106  static_assert(sizeof...(args) == 2,
107  "TensorQuantSchedule requires exactly 2 scalar arguments");
108  return make_graph<NumAccess>(
110  typename BaseOp::CWarpDstr,
115  make_node<ScaleScalarOp>(std::forward<Args>(args)...),
121  }
122  else
123  {
124  static_assert(false, "Unknown schedule tag");
125  }
126  }
127 };
128 
129 } // namespace ck_tile
Reusable simple epilogue operations which might be used to compose more complex one.
#define CK_TILE_DEVICE
Definition: config.hpp:45
Definition: cluster_descriptor.hpp:13
int32_t index_t
Definition: integer.hpp:9
constexpr auto make_node(Args... args)
Helper function for creating epilogue nodes.
Definition: epilogue_chainer.hpp:201
Definition: cshuffle_epilogue_chainer_ops.hpp:181
static constexpr index_t NumMXdlPerWavePerShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:326
static constexpr index_t MPerIterationShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:338
typename WG::CWarpDstr CWarpDstr
Definition: cshuffle_epilogue_chainer_ops.hpp:349
static constexpr index_t NumNXdlPerWavePerShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:327
static constexpr index_t NPerIterationShuffle
Definition: cshuffle_epilogue_chainer_ops.hpp:339
Scale working tile using tensor windows (CShuffle-specific)
Definition: cshuffle_epilogue_chainer_ops.hpp:79
Slice accumulator tile for CShuffle epilogue.
Definition: cshuffle_epilogue_chainer_ops.hpp:39
Cast working tile and store to LDS.
Definition: common_epilogue_ops.hpp:58
CShuffle epilogue scheduler providing pre-built schedules.
Definition: cshuffle_epilogue_schedule.hpp:42
static constexpr index_t NumAccess
Definition: cshuffle_epilogue_schedule.hpp:46
static CK_TILE_DEVICE auto create_context(OutWindow &out_window, const AccTile &acc_tile, const AuxWindows &aux_windows, void *p_smem)
Create context for epilogue operations.
Definition: cshuffle_epilogue_schedule.hpp:50
static CK_TILE_DEVICE auto make_schedule(Args &&... args)
Make schedule based on compile-time tag selection.
Definition: cshuffle_epilogue_schedule.hpp:60
Problem ProblemType
Definition: cshuffle_epilogue_schedule.hpp:43
Schedule type tags for epilogue selection.
Definition: cshuffle_epilogue_schedule.hpp:18
Apply elementwise operation with auxiliary tensors.
Definition: common_epilogue_ops.hpp:114
Load output tile from LDS with synchronization.
Definition: common_epilogue_ops.hpp:85
Move output and auxiliary windows by step from space-filling curve.
Definition: common_epilogue_ops.hpp:180
RowCol quantization schedule: Slice → ScaleWindow → CastStore → Load → ApplyD → Store → Move.
Definition: cshuffle_epilogue_schedule.hpp:23
Store output tile to global memory.
Definition: common_epilogue_ops.hpp:147
Tensor quantization schedule: Slice → ScaleScalar → CastStore → Load → ApplyD → Store → Move.
Definition: cshuffle_epilogue_schedule.hpp:28
Definition: space_filling_curve.hpp:20
static constexpr CK_TILE_HOST_DEVICE index_t get_num_of_access()
Definition: space_filling_curve.hpp:46