pkl/stdlib/Benchmark.pkl

//===----------------------------------------------------------------------===//
// Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

/// A template for writing and running benchmarks.
///
/// To write benchmarks, amend this module and define [microbenchmarks],
/// [outputBenchmarks], and/or [parserBenchmarks].
/// To run benchmarks, evaluate the amended module.
///
/// Each benchmark run consists of *m* [iterations] of *n* repetitions.
/// The number of repetitions is controlled indirectly via [iterationTime].
///
/// The reported metric is time per repetition.
/// It is calculated as measured iteration time divided by number of repetitions.
///
/// Benchmarks are warmed up for approximately the same time they are measured.
/// The goal is to measure steady state performance.
///
/// The benchmark [report] details benchmark [results][BenchmarkResult]
/// and the [platform][_platform.Platform] that benchmarks were run on.
/// By default, the report is rendered in *Pcf*.
/// To render the report in a different format, override `output.renderer`.
///
/// Warning: Although this module is ready for initial use,
/// benchmark results may be inaccurate or inconsistent.
@ModuleInfo { minPklVersion = "0.31.0" }
module pkl.Benchmark

import "pkl:platform" as _platform

/// The number of benchmark iterations to run.
///
/// This value can be [overridden][Benchmark.iterations] per benchmark.
iterations: UInt32 = 15

/// The approximate time to spend on each benchmark iteration.
///
/// This value indirectly controls the number of repetitions per iteration.
///
/// This value can be [overridden][Benchmark.iterationTime] per benchmark.
iterationTime: Duration = 100.ms

/// Whether to report detailed results such as [BenchmarkResult.samples].
///
/// This value can be [overridden][Benchmark.isVerbose] per benchmark.
isVerbose: Boolean = false

/// Benchmarks that measure the time taken to evaluate an expression.
///
/// Mapping keys are the benchmarks' descriptive names.
microbenchmarks: Mapping<String, Microbenchmark> = new {
  default {
    iterations = module.iterations
    iterationTime = module.iterationTime
    isVerbose = module.isVerbose
  }
}

/// Benchmarks that measure the time taken to render a module's output.
///
/// Mapping keys are the benchmarks' descriptive names.
outputBenchmarks: Mapping<String, OutputBenchmark> = new {
  default {
    iterations = module.iterations
    iterationTime = module.iterationTime
    isVerbose = module.isVerbose
  }
}

/// Benchmarks that measure the time taken to parse a module's source code into an abstract syntax tree.
///
/// Mapping keys are the benchmarks' descriptive names.
parserBenchmarks: Mapping<String, ParserBenchmark> = new {
  default {
    iterations = module.iterations
    iterationTime = module.iterationTime
    isVerbose = module.isVerbose
  }
}

/// The report containing benchmark results. Typically not configured from user code.
report: BenchmarkReport = new {
  platform = _platform.current

  when (!module.microbenchmarks.isEmpty) {
    microbenchmarks {
      for (name, benchmark in module.microbenchmarks) {
        [name] = benchmark.run()
      }
    }
  }

  when (!module.outputBenchmarks.isEmpty) {
    outputBenchmarks {
      for (name, benchmark in module.outputBenchmarks) {
        [name] = benchmark.run()
      }
    }
  }

  when (!module.parserBenchmarks.isEmpty) {
    parserBenchmarks {
      for (name, benchmark in module.parserBenchmarks) {
        [name] = benchmark.run()
      }
    }
  }
}

/// Common base class for benchmarks.
abstract class Benchmark {
  /// The number of benchmark iterations to run.
  iterations: UInt32

  /// The approximate time to spend on each benchmark iteration.
  ///
  /// This value indirectly controls the number of repetitions per iteration.
  iterationTime: Duration

  /// Whether to report detailed results such as [BenchmarkResult.samples].
  isVerbose: Boolean

  /// Runs this benchmark. Typically not called from user code.
  abstract function run(): BenchmarkResult
}

/// The result of running a benchmark.
class BenchmarkResult {
  /// The number of iterations run.
  iterations: UInt32

  /// The number of repetitions run per iteration.
  repetitions: UInt

  /// The average time that a repetition took to complete in each iteration,
  /// calculated as measured iteration time divided by number of repetitions.
  ///
  /// This property is only set if [Benchmark.isVerbose] is [true].
  samples: List<Duration>(length == iterations)?

  /// The minimum of [samples].
  min: Duration

  /// The maximum of [samples].
  max: Duration

  /// The mean of [samples].
  mean: Duration

  /// The standard deviation of [samples].
  stdev: Duration

  /// The margin of error for [mean] at 99% confidence level.
  error: Duration
}

/// A benchmark that measures the time taken to evaluate an expression.
class Microbenchmark extends Benchmark {
  /// The expression to evaluate.
  expression: Any

  external function run(): BenchmarkResult
}

/// A benchmark that measures the time taken to render a module's output.
class OutputBenchmark extends Benchmark {
  /// The module to evaluate.
  sourceModule: Module

  external function run(): BenchmarkResult
}

/// A benchmark that measures the time taken to parse a module's source code.
class ParserBenchmark extends Benchmark {
  /// The module source code to parse.
  @SourceCode { language = "Pkl" }
  sourceText: String

  /// The effective URI of [sourceText].
  ///
  /// If [sourceText] contains relative imports, set [sourceUri] so that these imports may be
  /// resolved.
  sourceUri: String = "repl:text"

  external function run(): BenchmarkResult
}

/// The results of running a module's benchmarks.
class BenchmarkReport {
  /// The results of the microbenchmarks that were run.
  ///
  /// Mapping keys are the benchmarks' descriptive names.
  microbenchmarks: Mapping<String, BenchmarkResult>?

  /// The results of the output benchmarks that were run.
  ///
  /// Mapping keys are the benchmarks' descriptive names.
  outputBenchmarks: Mapping<String, BenchmarkResult>?

  /// The results of the parser benchmarks that were run.
  ///
  /// Mapping keys are the benchmarks' descriptive names.
  parserBenchmarks: Mapping<String, BenchmarkResult>?

  /// The platform that benchmarks were run on.
  platform: _platform.Platform
}

output {
  value = report

  renderer = new PcfRenderer {
    omitNullProperties = true
    converters {
      // round durations to two decimal places
      [Duration] = (it: Duration) -> it.value.toFixed(2).toFloat().toDuration(it.unit)
    }
  }
}