54#if defined(HAVE_GOOGLE_BENCHMARK) && defined(HAVE_NV_BENCH)
55#warning "Please only use one Plugin."
58#if defined(HAVE_GOOGLE_BENCHMARK)
62#if defined(HAVE_NV_BENCH)
102 template <
typename T>
105 asm volatile(
"" : :
"r,m"(value) :
"memory");
116 template <
typename T>
119#if defined(__clang__)
120 asm volatile(
"" :
"+r,m"(value) : :
"memory");
122 asm volatile(
"" :
"+m,r"(value) : :
"memory");
134 template <
typename T>
137#if defined(__clang__)
138 asm volatile(
"" :
"+r,m"(value) : :
"memory");
140 asm volatile(
"" :
"+m,r"(value) : :
"memory");
148 std::atomic_signal_fence(std::memory_order_acq_rel);
191 template <
typename Value,
typename Policy =
void>
214 template <
typename Value>
241 template <
typename Value,
typename Policy>
257 template <
typename Value,
typename Policy>
266 template <
typename Value,
typename Policy>
270 template <
typename T>
273 typename outspec<T>::outtype_t,
274 typename outspec<T>::policy_t>;
281 template <
typename... Inputs>
290 template <
OutSpec... OutputSpecs>
298 template <
typename S>
304 template <
typename S>
324 using InTup = std::tuple<std::decay_t<Inputs>...>;
328 using OutTup = std::tuple<outtype_t<OutputSpecs>...>;
366 static constexpr size_t NUM_OUT =
sizeof...(OutputSpecs);
374 std::shared_ptr<plugin::Plugin<InTup, OutTup>>
plugin_;
386 throw std::logic_error(
"Multiple plugins are not supported");
441#ifdef HAVE_GOOGLE_BENCHMARK
475 template <
template <
class,
class>
class Plugin>
479 auto adp = Plugin<InTup, OutTup>::instance();
504 template <std::
size_t I>
505 static constexpr std::size_t
spec_metric_count() {
return std::tuple_element_t<I, PolicyTup>::metric_count(); }
514 template <std::
size_t I>
515 static constexpr std::string_view
spec_metric_name(std::size_t m) {
return std::tuple_element_t<I, PolicyTup>::metric_name(m); }
532 std::cout << std::left << comppare::internal::ansi::BOLD
533 <<
"*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=\n============ "
534 << comppare::internal::ansi::ITALIC(
"ComPPare Framework")
535 <<
" ============\n=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*"
536 << comppare::internal::ansi::BOLD_OFF <<
"\n\n";
538 << std::left << std::setw(30) <<
"Number of implementations: "
539 << std::right << std::setw(10) <<
impls_.size() <<
"\n"
540 << std::left << std::setw(30) <<
"Warmup iterations: "
542 << std::left << std::setw(30) <<
"Benchmark iterations: "
544 << std::left << comppare::internal::ansi::BOLD(
"=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*") <<
"\n\n";
547 std::cout << comppare::internal::ansi::UNDERLINE << comppare::internal::ansi::BOLD
556 auto &&_print_metric_header = [
this]<std::size_t I>()
558 for (std::size_t m = 0; m < this->
template spec_metric_count<I>(); ++m)
561 << comppare::internal::ansi::UNDERLINE(
562 comppare::internal::ansi::BOLD(
563 std::string(this->
template spec_metric_name<I>(m)) +
"[" + std::to_string(I) +
"]"));
568 [&]<std::size_t... I>(std::index_sequence<I...>)
573 (_print_metric_header.template operator()<I>(), ...);
574 }(std::make_index_sequence<NUM_OUT>{});
576 std::cout << std::endl;
589 auto &&_compute_errors = [&]<std::size_t I>()
592 [&]<std::size_t... I>(std::index_sequence<I...>)
594 (_compute_errors.template operator()<I>(), ...);
595 }(std::make_index_sequence<NUM_OUT>{});
606 auto &&_any_fail = [&]<std::size_t I>() ->
bool
611 return [&]<std::size_t... I>(std::index_sequence<I...>) ->
bool
614 ((fail |= _any_fail.template operator()<I>()), ...);
616 }(std::make_index_sequence<NUM_OUT>{});
626 auto &&_print_metrics = [&errs]<std::size_t I>()
628 for (std::size_t m = 0; m < spec_metric_count<I>(); ++m)
629 std::cout << std::setw(
PRINT_COL_WIDTH) << std::scientific << std::get<I>(errs).metric(m);
632 [&]<std::size_t... I>(std::index_sequence<I...>)
634 (_print_metrics.template operator()<I>(), ...);
635 }(std::make_index_sequence<NUM_OUT>{});
647 throw std::logic_error(
"run() has not been executed");
649 throw std::out_of_range(
"Index out of range for outputs");
663 throw std::logic_error(
"run() has not been executed");
664 for (
size_t i = 0; i <
impls_.size(); ++i)
666 if (
impls_[i].name == name)
669 std::stringstream os;
670 os <<
"Output with name '" << name <<
"' not found";
671 throw std::invalid_argument(os.str());
683 [&](
auto &...outtup_elem)
685 ((*outs = outtup_elem), ...);
700 template <
typename... Ins>
702 :
inputs_(std::forward<Ins>(ins)...) {}
724 template <
typename F>
728 impls_.insert(
impls_.begin(), {std::move(name), Func(std::forward<F>(f)), &inputs_, this});
743 template <
typename F>
744 requires std::invocable<F, const std::decay_t<Inputs> &..., outtype_t<OutputSpecs> &...>
805 requires(
sizeof...(OutputSpecs) > 0)
824 requires(
sizeof...(OutputSpecs) > 0)
843 requires(
sizeof...(OutputSpecs) > 0)
856 char **argv =
nullptr)
862 std::cerr <<
"\n*----------*\nNo implementations added to the ComPPare Framework.\n*----------*\n";
871 for (
size_t k = 0; k <
impls_.size(); ++k)
878 double func_duration;
884 std::apply([&](
auto const &...in)
888 auto func_start = comppare::config::clock_t::now();
889 impl.fn(in..., out...);
890 auto func_end = comppare::config::clock_t::now();
891 func_duration = std::chrono::duration<double, std::micro>(func_end - func_start).count();
899 double func_us = func_duration - warmup_us;
900 double ovhd_us = func_us - roi_us;
909 outputs_.push_back(std::make_shared<OutTup>(std::move(outs)));
911 std::cout << comppare::internal::ansi::RESET
912 << std::left << std::setw(
PRINT_COL_WIDTH) << comppare::internal::ansi::GREEN(impl.name)
913 << std::fixed << std::setprecision(2) << std::right
914 << comppare::internal::ansi::YELLOW
916 << comppare::internal::ansi::DIM
919 << comppare::internal::ansi::RESET;
923 std::cout << comppare::internal::ansi::BG_RED(
"<-- FAIL");
931 plugin_->initialize(argc, argv);
952 template <
typename... Outputs,
typename... Inputs>
956 std::forward<Inputs>(ins)...);
964#define HOTLOOPSTART \
965 auto &&hotloop_body = [&]() {
971#define COMPPARE_HOTLOOP_BENCH \
973 auto warmup_t0 = comppare::config::clock_t::now(); \
974 for (std::size_t i = 0; i < comppare::config::warmup_iters(); ++i) \
976 auto warmup_t1 = comppare::config::clock_t::now(); \
977 comppare::config::set_warmup_us(warmup_t0, warmup_t1); \
980 comppare::config::reset_roi_us(); \
981 auto t0 = comppare::config::clock_t::now(); \
982 for (std::size_t i = 0; i < comppare::config::bench_iters(); ++i) \
984 auto t1 = comppare::config::clock_t::now(); \
986 if (comppare::config::get_roi_us() == double(0.0)) \
987 comppare::config::set_roi_us(t0, t1);
989#ifdef PLUGIN_HOTLOOP_BENCH
994 if (comppare::current_state::using_plugin()) \
996 PLUGIN_HOTLOOP_BENCH; \
1000 COMPPARE_HOTLOOP_BENCH; \
1007 COMPPARE_HOTLOOP_BENCH;
1013#define HOTLOOP(LOOP_BODY) \
1014 HOTLOOPSTART LOOP_BODY HOTLOOPEND
1019#define MANUAL_TIMER_START \
1020 auto t_manual_start = comppare::config::clock_t::now();
1025#define MANUAL_TIMER_END \
1026 auto t_manual_stop = comppare::config::clock_t::now(); \
1027 SET_ITERATION_TIME(t_manual_stop - t_manual_start);
1029#ifdef PLUGIN_HOTLOOP_BENCH
1030#define SET_ITERATION_TIME(TIME) \
1031 if (comppare::current_state::using_plugin()) \
1033 PLUGIN_SET_ITERATION_TIME(TIME); \
1037 comppare::config::increment_roi_us(TIME); \
1040#define SET_ITERATION_TIME(TIME) \
1041 comppare::config::increment_roi_us(TIME);
1044#if defined(__CUDACC__)
1045#define GPU_PREFIX cuda
1046#elif defined(__HIPCC__)
1047#define GPU_PREFIX hip
1050#if defined(GPU_PREFIX)
1052#if defined(HAVE_GOOGLE_BENCHMARK)
1053#warning "Not Recommended to use Google Benchmark with GPU_HOTLOOPEND macro. Use SET_ITERATION_TIME and manual timing instead."
1056#define CONCAT_IMPL(x, y) x##y
1057#define CONCAT(x, y) CONCAT_IMPL(x, y)
1063#define GPU_HOTLOOPSTART \
1064 auto &&hotloop_body = [&]() {
1070#define GPU_COMPPARE_HOTLOOP_BENCH \
1072 CONCAT(GPU_PREFIX, Event_t) \
1074 CONCAT(GPU_PREFIX, EventCreate)(&start_); \
1075 CONCAT(GPU_PREFIX, EventCreate)(&stop_); \
1076 CONCAT(GPU_PREFIX, EventRecord)(start_); \
1077 for (std::size_t i = 0; i < comppare::config::warmup_iters(); ++i) \
1079 CONCAT(GPU_PREFIX, EventRecord)(stop_); \
1080 CONCAT(GPU_PREFIX, EventSynchronize)(stop_); \
1082 CONCAT(GPU_PREFIX, EventElapsedTime)(&ms_warmup_, start_, stop_); \
1083 comppare::config::set_warmup_us(1e3 * ms_warmup_); \
1086 comppare::config::reset_roi_us(); \
1087 CONCAT(GPU_PREFIX, EventRecord)(start_); \
1088 for (std::size_t i = 0; i < comppare::config::bench_iters(); ++i) \
1090 CONCAT(GPU_PREFIX, EventRecord)(stop_); \
1091 CONCAT(GPU_PREFIX, EventSynchronize)(stop_); \
1093 CONCAT(GPU_PREFIX, EventElapsedTime)(&ms_, start_, stop_); \
1094 if (comppare::config::get_roi_us() == double(0.0)) \
1095 comppare::config::set_roi_us(1e3 * ms_); \
1096 CONCAT(GPU_PREFIX, EventDestroy)(start_); \
1097 CONCAT(GPU_PREFIX, EventDestroy)(stop_);
1099#if defined(GPU_PLUGIN_HOTLOOP_BENCH)
1101#define GPU_HOTLOOPEND \
1104 if (comppare::current_state::using_plugin()) \
1106 GPU_PLUGIN_HOTLOOP_BENCH; \
1110 GPU_COMPPARE_HOTLOOP_BENCH; \
1114#define GPU_HOTLOOPEND \
1117 GPU_COMPPARE_HOTLOOP_BENCH;
1125#define GPU_MANUAL_TIMER_START \
1126 CONCAT(GPU_PREFIX, Event_t) \
1127 start_manual_timer, stop_manual_timer; \
1128 CONCAT(GPU_PREFIX, EventCreate)(&start_manual_timer); \
1129 CONCAT(GPU_PREFIX, EventCreate)(&stop_manual_timer); \
1130 CONCAT(GPU_PREFIX, EventRecord)(start_manual_timer);
1136#define GPU_MANUAL_TIMER_END \
1137 CONCAT(GPU_PREFIX, EventRecord)(stop_manual_timer); \
1138 CONCAT(GPU_PREFIX, EventSynchronize)(stop_manual_timer); \
1140 CONCAT(GPU_PREFIX, EventElapsedTime)(&ms_manual, start_manual_timer, stop_manual_timer); \
1141 SET_ITERATION_TIME(1e3 * ms_manual); \
1142 CONCAT(GPU_PREFIX, EventDestroy)(start_manual_timer); \
1143 CONCAT(GPU_PREFIX, EventDestroy)(stop_manual_timer);
This file contains utilities for applying ANSI styles and colors to console output.
OutputContext class template to hold output parameters and manage implementations.
Definition comppare.hpp:292
OutputContext(OutputContext &&other)=delete
Deleted move constructor.
void register_plugin(const std::shared_ptr< plugin::Plugin< InTup, OutTup > > &p)
Register a plugin for the output context.
Definition comppare.hpp:381
InTup inputs_
Tuple instance storing all current input arguments.
Definition comppare.hpp:344
std::shared_ptr< OutTup > OutPtr
Shared pointer to an output tuple.
Definition comppare.hpp:339
void get_reference_output(outtype_t< OutputSpecs > *...outs) const
Copies the reference output into provided pointer to variables.
Definition comppare.hpp:804
std::tuple< outtype_t< OutputSpecs >... > OutTup
Tuple type holding all output values (one element per outspec).
Definition comppare.hpp:328
static constexpr std::size_t spec_metric_count()
Get the implementation details for a specific implementation index.
Definition comppare.hpp:505
void run(int argc=0, char **argv=nullptr)
Runs the comparison for all added implementations.
Definition comppare.hpp:855
OutputContext(Ins &&...ins)
Construct a new OutputContext.
Definition comppare.hpp:701
OutPtr get_output_by_name_(const std::string_view name) const
Get the output by implementation name.
Definition comppare.hpp:660
PolicyTup policies_ref_
Tuple of policy objects for the reference outputs.
Definition comppare.hpp:357
void unpack_output_(const OutTup &outtup, outtype_t< OutputSpecs > *...outs) const
Unpack the output tuple into the provided output pointers.
Definition comppare.hpp:680
void print_header() const
Print the header for the output table.
Definition comppare.hpp:530
OutPtr get_output_by_index_(const size_t idx) const
Get the output by index.
Definition comppare.hpp:644
OutputContext & operator=(OutputContext &&other)=delete
Deleted move assignment operator.
OutputContext & operator=(const OutputContext &other)=delete
Deleted copy assignment operator.
OutputContext(const OutputContext &other)=delete
Deleted copy constructor.
const OutPtr get_reference_output() const
Get the reference output by pointer.
Definition comppare.hpp:761
std::tuple< pol_t< OutputSpecs >... > PolicyTup
Tuple type holding the error/policy object associated with each output outspec.
Definition comppare.hpp:332
void get_output(const size_t idx, outtype_t< OutputSpecs > *...outs) const
Copies the outputs of a specific implementation by index into provided pointer to variables.
Definition comppare.hpp:823
typename outspec< S >::policy_t pol_t
Extracts the policy type from a outspec.
Definition comppare.hpp:305
const OutPtr get_output(const std::string_view name) const
Get the output for a specific implementation by name.
Definition comppare.hpp:787
static constexpr int PRINT_COL_WIDTH
Set the width of the print columns.
Definition comppare.hpp:520
std::vector< OutPtr > outputs_
Storage for reference and comparison outputs.
Definition comppare.hpp:351
Impl & set_reference(std::string name, F &&f)
Set a reference implementation.
Definition comppare.hpp:726
bool any_fail(const PolicyTup &errs) const
Check if any of the error policies indicate a failure.
Definition comppare.hpp:604
void print_metrics(const PolicyTup &errs) const
Print the metrics for each output specification.
Definition comppare.hpp:624
std::function< void(const std::decay_t< Inputs > &..., outtype_t< OutputSpecs > &...)> Func
Alias for the function signature of a user-provided implementation.
Definition comppare.hpp:319
static constexpr size_t NUM_OUT
Number of output specifications.
Definition comppare.hpp:366
static constexpr std::string_view spec_metric_name(std::size_t m)
Get the name of a specific metric for a specific implementation index.
Definition comppare.hpp:515
typename outspec< S >::outtype_t outtype_t
Extracts the value type from a outspec.
Definition comppare.hpp:299
const OutPtr get_output(const size_t idx) const
Get the output for a specific implementation by pointer.
Definition comppare.hpp:774
Impl & add(std::string name, F &&f)
Add a new implementation to the comparison framework.
Definition comppare.hpp:745
void get_output(const std::string_view name, outtype_t< OutputSpecs > *...outs) const
Copies the outputs of a specific implementation by name into provided pointer to variables.
Definition comppare.hpp:842
std::shared_ptr< plugin::Plugin< InTup, OutTup > > plugin_
Shared pointer to the plugin instance.
Definition comppare.hpp:374
std::tuple< std::decay_t< Inputs >... > InTup
Tuple type holding all input arguments.
Definition comppare.hpp:324
void compute_errors(PolicyTup &errs, const OutTup &test, const OutTup &ref)
Compute the error metrics for each output specification.
Definition comppare.hpp:587
std::vector< Impl > impls_
Vector to hold all implementations.
Definition comppare.hpp:495
InputContext class template to hold input parameters for the comparison framework.
Definition comppare.hpp:283
static double get_roi_us()
Get the current roi us value.
Definition config.hpp:163
static uint64_t bench_iters()
Get the number of benchmark iterations.
Definition config.hpp:71
static double get_warmup_us()
Get the current warmup us value.
Definition config.hpp:165
static uint64_t warmup_iters()
Get the number of warmup iterations.
Definition config.hpp:66
static void set_using_plugin(bool v)
Set if a plugin is being used currently.
Definition config.hpp:257
Base class for plugins in the ComPPare framework.
Definition plugin.hpp:60
Concept for output specifications being pair of type and policy.
Definition comppare.hpp:271
Concept for a valid Error Policy.
Definition policy.hpp:192
Concept for types supported by automatic error policy selection.
Definition policy.hpp:254
Concept for a valid plugin class.
Definition plugin.hpp:81
This file contains configuration settings for the ComPPare library.
This file contains the Google Benchmark plugin for the ComPPare framework.
static void parse_args(int argc, char **argv)
Definition helper.hpp:98
typename AutoPolicy< T >::type AutoPolicy_t
Helper alias to get the automatic error policy type for a given type T.
Definition policy.hpp:545
bool is_fail(const EP &ep)
Wrapper function to check if the error policy indicates a failure.
Definition policy.hpp:232
void compute_error(EP &ep, const V &a, const V &b)
Wrapper function to compute the error using the given instance of the error policy.
Definition policy.hpp:219
ComPPare framework main namespace.
void DoNotOptimize(T const &value)
Prevents the compiler from optimizing away the given value.
Definition comppare.hpp:103
void ClobberMemory()
Definition comppare.hpp:146
auto make_comppare(Inputs &&...ins)
Helper function to create a comppare object.
Definition comppare.hpp:953
This file contains the NVBench plugin for the ComPPare framework.
This file contains the base Plugin class and concept of a valid Plugin class for the ComPPare framewo...
This file contains error policies for comparing values in the ComPPare library.
Internal container representing one registered implementation.
Definition comppare.hpp:406
OutputContext * parent_ctx
Reference to the owning OutputContext. This allows the implementation to register plugins.
Definition comppare.hpp:430
decltype(auto) nvbench()
Attach the nvbench plugin.
Definition comppare.hpp:459
Func fn
The user-provided function implementing the operation.
Definition comppare.hpp:420
decltype(auto) attach()
Attach a plugin to the output context.
Definition comppare.hpp:477
InTup * inputs_ptr
Pointer to the input tuple inputs_.
Definition comppare.hpp:425
std::unique_ptr< OutTup > plugin_output
Unique pointer to the output tuple for plugin runs.
Definition comppare.hpp:439
std::string name
Name of the implementation.
Definition comppare.hpp:412
decltype(auto) google_benchmark()
Attach the Google Benchmark plugin.
Definition comppare.hpp:447
Partial specialization of outspec for user-defined error policy selection.
Definition comppare.hpp:260
std::decay_t< Value > outtype_t
Definition comppare.hpp:261
Policy policy_t
Definition comppare.hpp:262
internal::policy::autopolicy::AutoPolicy_t< Value > policy_t
Definition comppare.hpp:219
std::decay_t< Value > outtype_t
Definition comppare.hpp:218
std::decay_t< Value > outtype_t
Definition comppare.hpp:244
Policy policy_t
Definition comppare.hpp:245
Specification struct for pairing an output type with an error policy.
Definition comppare.hpp:192