I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <chrono>
#include <map>
#include <memory>
#include <string>
#include <vector>
namespace libkineto {
class AbstractConfig {
public:
AbstractConfig& operator=(const AbstractConfig&) = delete;
AbstractConfig(AbstractConfig&&) = delete;
AbstractConfig& operator=(AbstractConfig&&) = delete;
virtual ~AbstractConfig() {
for (const auto& p : featureConfigs_) {
delete p.second;
}
}
// Return a copy of the full derived class
virtual AbstractConfig* cloneDerived(AbstractConfig& parent) const = 0;
// Returns true if successfully parsed the config string
bool parse(const std::string& conf);
// Default setup for signal-triggered profiling
virtual void setSignalDefaults() {
for (auto& p : featureConfigs_) {
p.second->setSignalDefaults();
}
}
// Default setup for client-triggered profiling
virtual void setClientDefaults() {
for (auto& p : featureConfigs_) {
p.second->setClientDefaults();
}
}
// Time config was created / updated
std::chrono::time_point<std::chrono::system_clock> timestamp() const {
return timestamp_;
}
// Source config string that this was parsed from
const std::string& source() const {
return source_;
}
AbstractConfig& feature(std::string name) const {
const auto& pos = featureConfigs_.find(name);
return *pos->second;
}
// Transfers ownership of cfg arg
void addFeature(const std::string& name, AbstractConfig* cfg) {
featureConfigs_[name] = cfg;
}
protected:
AbstractConfig() {}
AbstractConfig(const AbstractConfig& other) = default;
// Return true if the option was recognized and successfully parsed.
// Throw std::invalid_argument if val is invalid.
virtual bool handleOption(const std::string& name, std::string& val);
// Perform post-validation checks, typically conditons involving
// multiple options.
// Throw std::invalid_argument if automatic correction can not be made.
//
// @param fallbackProfileStartTime Specify a fallback profile start timestamp in case it was never specified by the client
virtual void validate(const std::chrono::time_point<std::chrono::system_clock>& fallbackProfileStartTime) = 0;
// TODO: Separate out each profiler type into features?
virtual void printActivityProfilerConfig(std::ostream& s) const;
virtual void setActivityDependentConfig();
// Helpers for use in handleOption
// Split a string by delimiter and remove external white space
std::vector<std::string> splitAndTrim(const std::string& s, char delim) const;
// Lowercase for case-insensitive comparisons
std::string toLower(std::string& s) const;
// Does string end with suffix
bool endsWith(const std::string& s, const std::string& suffix) const;
// Conversions
int64_t toIntRange(const std::string& val, int64_t min, int64_t max) const;
int32_t toInt32(const std::string& val) const;
int64_t toInt64(const std::string& val) const;
bool toBool(std::string& val) const;
void cloneFeaturesInto(AbstractConfig& cfg) const {
for (const auto& feature : featureConfigs_) {
cfg.featureConfigs_[feature.first] = feature.second->cloneDerived(cfg);
}
}
private:
// Time config was created / updated
std::chrono::time_point<std::chrono::system_clock> timestamp_{};
// Original configuration string, used for comparison
std::string source_{""};
// Configuration objects for optional features
std::map<std::string, AbstractConfig*> featureConfigs_{};
};
} // namespace libkineto

View File

@ -0,0 +1,109 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <memory>
#include <set>
#include <thread>
#include <vector>
#include "ActivityType.h"
#include "ActivityTraceInterface.h"
#include "IActivityProfiler.h"
namespace libkineto {
class ActivityProfilerController;
struct CpuTraceBuffer;
class Config;
class ActivityProfilerInterface {
public:
virtual ~ActivityProfilerInterface() {}
virtual void init() {}
virtual bool isInitialized() {
return false;
}
virtual bool isActive(){
return false;
}
// *** Asynchronous API ***
// Instead of starting and stopping the trace manually, provide a start time
// and duration and / or iteration stop criterion.
// Tracing terminates when either condition is met.
virtual void scheduleTrace(const std::string& configStr) {}
// *** Synchronous API ***
// These must be called in order:
// prepareTrace -> startTrace -> stopTrace.
// Many tracing structures are lazily initialized during trace collection,
// with potentially high overhead.
// Call prepareTrace to enable tracing, then run the region to trace
// at least once (and ideally run the same code that is to be traced) to
// allow tracing structures to be initialized.
virtual void prepareTrace(
const std::set<ActivityType>& activityTypes,
const std::string& configStr = "") {}
// Toggle GPU tracing as a trace is running to omit certain parts of a graph
virtual void toggleCollectionDynamic(
const bool enable) {}
// Start recording, potentially reusing any buffers allocated since
// prepareTrace was called.
virtual void startTrace() {}
// Stop and process trace, producing an in-memory list of trace records.
// The processing will be done synchronously (using the calling thread.)
virtual std::unique_ptr<ActivityTraceInterface> stopTrace() {
return nullptr;
}
// Re-evaluate internal state to allow for triggering operations based
// on number of iteration. each implicitly increments the iteration count
virtual void step() {}
// *** TraceActivity API ***
// FIXME: Pass activityProfiler interface into clientInterface?
virtual void pushCorrelationId(uint64_t id){}
virtual void popCorrelationId(){}
virtual void transferCpuTrace(
std::unique_ptr<CpuTraceBuffer> traceBuffer){}
// Correlation ids for user defined spans
virtual void pushUserCorrelationId(uint64_t){}
virtual void popUserCorrelationId(){}
// Saves information for the current thread to be used in profiler output
// Client must record any new kernel thread where the activity has occured.
virtual void recordThreadInfo() {}
// Record trace metadata, currently supporting only string key and values,
// values with the same key are overwritten
virtual void addMetadata(const std::string& key, const std::string& value) = 0;
// Add a child activity profiler, this enables frameworks in the application
// to enable custom framework events.
virtual void addChildActivityProfiler(
std::unique_ptr<IActivityProfiler> profiler) {}
// Log Invariant Violation to factories enabled. This helps record
// instances when the profiler behaves unexpectedly.
virtual void logInvariantViolation(
const std::string&,
const std::string&,
const std::string&,
const std::string& = "") {}
};
} // namespace libkineto

View File

@ -0,0 +1,28 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <memory>
#include <string>
#include <vector>
namespace libkineto {
struct ITraceActivity;
class ActivityTraceInterface {
public:
virtual ~ActivityTraceInterface() {}
virtual const std::vector<const ITraceActivity*>* activities() {
return nullptr;
}
virtual void save(const std::string& path) {}
};
} // namespace libkineto

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <array>
#include <string>
#include <set>
namespace libkineto {
// Note : All activity types are not enabled by default. Please add them
// at correct position in the enum
enum class ActivityType {
// Activity types enabled by default
CPU_OP = 0, // cpu side ops
USER_ANNOTATION,
GPU_USER_ANNOTATION,
GPU_MEMCPY,
GPU_MEMSET,
CONCURRENT_KERNEL, // on-device kernels
EXTERNAL_CORRELATION,
CUDA_RUNTIME, // host side cuda runtime events
CUDA_DRIVER, // host side cuda driver events
CPU_INSTANT_EVENT, // host side point-like events
PYTHON_FUNCTION,
OVERHEAD, // CUPTI induced overhead events sampled from its overhead API.
MTIA_RUNTIME, // host side MTIA runtime events
MTIA_CCP_EVENTS, // MTIA ondevice CCP events
CUDA_SYNC, // synchronization events between runtime and kernels
// Optional Activity types
GLOW_RUNTIME, // host side glow runtime events
CUDA_PROFILER_RANGE, // CUPTI Profiler range for performance metrics
HPU_OP, // HPU host side runtime event
XPU_RUNTIME, // host side xpu runtime events
COLLECTIVE_COMM, // collective communication
MTIA_WORKLOADD, // MTIA workloadd events
// PRIVATEUSE1 Activity types are used for custom backends.
// The corresponding device type is `DeviceType::PrivateUse1` in PyTorch.
PRIVATEUSE1_RUNTIME, // host side privateUse1 runtime events
PRIVATEUSE1_DRIVER, // host side privateUse1 driver events
ENUM_COUNT, // This is to add buffer and not used for any profiling logic. Add your new type before it.
OPTIONAL_ACTIVITY_TYPE_START = GLOW_RUNTIME,
};
const char* toString(ActivityType t);
ActivityType toActivityType(const std::string& str);
// Return an array of all activity types except COUNT
constexpr int activityTypeCount = (int)ActivityType::ENUM_COUNT;
constexpr int defaultActivityTypeCount = (int)ActivityType::OPTIONAL_ACTIVITY_TYPE_START;
const std::array<ActivityType, activityTypeCount> activityTypes();
const std::array<ActivityType, defaultActivityTypeCount> defaultActivityTypes();
} // namespace libkineto

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
namespace libkineto {
class ClientInterface {
public:
virtual ~ClientInterface() {}
virtual void init() = 0;
virtual void prepare(bool, bool, bool, bool, bool) = 0;
virtual void start() = 0;
virtual void stop() = 0;
};
} // namespace libkineto

View File

@ -0,0 +1,505 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include "AbstractConfig.h"
#include "ActivityType.h"
#include <assert.h>
#include <chrono>
#include <functional>
#include <set>
#include <string>
#include <vector>
namespace libkineto {
class Config : public AbstractConfig {
public:
Config();
Config& operator=(const Config&) = delete;
Config(Config&&) = delete;
Config& operator=(Config&&) = delete;
// Return a full copy including feature config object
std::unique_ptr<Config> clone() const {
auto cfg = std::unique_ptr<Config>(new Config(*this));
cloneFeaturesInto(*cfg);
return cfg;
}
bool handleOption(const std::string& name, std::string& val) override;
void setClientDefaults() override;
// Log events to this file
const std::string& eventLogFile() const {
return eventLogFile_;
}
bool activityProfilerEnabled() const {
return activityProfilerEnabled_ ||
activitiesOnDemandTimestamp_.time_since_epoch().count() > 0;
}
// Log activitiy trace to this file
const std::string& activitiesLogFile() const {
return activitiesLogFile_;
}
// Log activitiy trace to this url
const std::string& activitiesLogUrl() const {
return activitiesLogUrl_;
}
void setActivitiesLogUrl(const std::string& url) {
activitiesLogUrl_ = url;
}
bool activitiesLogToMemory() const {
return activitiesLogToMemory_;
}
bool eventProfilerEnabled() const {
return !eventNames_.empty() || !metricNames_.empty();
}
// Is profiling enabled for the given device?
bool eventProfilerEnabledForDevice(uint32_t dev) const {
return 0 != (eventProfilerDeviceMask_ & (1 << dev));
}
// Take a sample (read hardware counters) at this frequency.
// This controls how often counters are read - if all counters cannot
// be collected simultaneously then multiple samples are needed to
// collect all requested counters - see multiplex period.
std::chrono::milliseconds samplePeriod() const {
return samplePeriod_;
}
void setSamplePeriod(std::chrono::milliseconds period) {
samplePeriod_ = period;
}
// When all requested counters cannot be collected simultaneously,
// counters will be multiplexed at this frequency.
// Multiplexing can have a large performance impact if done frequently.
// To avoid a perf impact, keep this at 1s or above.
std::chrono::milliseconds multiplexPeriod() const {
return multiplexPeriod_;
}
void setMultiplexPeriod(std::chrono::milliseconds period) {
multiplexPeriod_ = period;
}
// Report counters at this frequency. Note that several samples can
// be reported each time, see samplesPerReport.
std::chrono::milliseconds reportPeriod() const {
return reportPeriod_;
}
void setReportPeriod(std::chrono::milliseconds msecs);
// Number of samples dispatched each report period.
// Must be in the range [1, report period / sample period].
// In other words, aggregation is supported but not interpolation.
int samplesPerReport() const {
return samplesPerReport_;
}
void setSamplesPerReport(int count) {
samplesPerReport_ = count;
}
// The names of events to collect
const std::set<std::string>& eventNames() const {
return eventNames_;
}
// Add additional events to be profiled
void addEvents(const std::set<std::string>& names) {
eventNames_.insert(names.begin(), names.end());
}
// The names of metrics to collect
const std::set<std::string>& metricNames() const {
return metricNames_;
}
// Add additional metrics to be profiled
void addMetrics(const std::set<std::string>& names) {
metricNames_.insert(names.begin(), names.end());
}
const std::vector<int>& percentiles() const {
return eventReportPercentiles_;
}
// Profile for this long, then revert to base config
std::chrono::seconds eventProfilerOnDemandDuration() const {
return eventProfilerOnDemandDuration_;
}
void setEventProfilerOnDemandDuration(std::chrono::seconds duration) {
eventProfilerOnDemandDuration_ = duration;
}
// Too many event profilers on a single system can overload the driver.
// At some point, latencies shoot through the roof and collection of samples
// becomes impossible. To avoid this situation we have a limit of profilers
// per GPU.
// NOTE: Communication with a daemon is needed for this feature.
// Library must be built with an active DaemonConfigLoader.
int maxEventProfilersPerGpu() const {
return eventProfilerMaxInstancesPerGpu_;
}
// On Cuda11 we've seen occasional hangs when reprogramming counters
// Monitor profiling threads and report when a thread is not responding
// for a given number of seconds.
// A period of 0 means disable.
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod() const {
return eventProfilerHeartbeatMonitorPeriod_;
}
// The types of activities selected in the configuration file
const std::set<ActivityType>& selectedActivityTypes() const {
return selectedActivityTypes_;
}
void setSelectedActivityTypes(const std::set<ActivityType>& types) {
selectedActivityTypes_ = types;
}
bool isReportInputShapesEnabled() const {
return enableReportInputShapes_;
}
bool isProfileMemoryEnabled() const {
return enableProfileMemory_;
}
bool isWithStackEnabled() const {
return enableWithStack_;
}
bool isWithFlopsEnabled() const {
return enableWithFlops_;
}
bool isWithModulesEnabled() const {
return enableWithModules_;
}
// Trace for this long
std::chrono::milliseconds activitiesDuration() const {
return activitiesDuration_;
}
// Trace for this many iterations, determined by external API
int activitiesRunIterations() const {
return activitiesRunIterations_;
}
int activitiesMaxGpuBufferSize() const {
return activitiesMaxGpuBufferSize_;
}
std::chrono::seconds activitiesWarmupDuration() const {
return activitiesWarmupDuration_;
}
int activitiesWarmupIterations() const {
return activitiesWarmupIterations_;
}
// Show CUDA Synchronization Stream Wait Events
bool activitiesCudaSyncWaitEvents() const {
return activitiesCudaSyncWaitEvents_;
}
void setActivitiesCudaSyncWaitEvents(bool enable) {
activitiesCudaSyncWaitEvents_ = enable;
}
// Timestamp at which the profiling to start, requested by the user.
const std::chrono::time_point<std::chrono::system_clock> requestTimestamp()
const {
if (profileStartTime_.time_since_epoch().count()) {
return profileStartTime_;
}
// If no one requested timestamp, return 0.
if (requestTimestamp_.time_since_epoch().count() == 0) {
return requestTimestamp_;
}
// TODO(T94634890): Deprecate requestTimestamp
return requestTimestamp_ + maxRequestAge() + activitiesWarmupDuration();
}
bool hasProfileStartTime() const {
return requestTimestamp_.time_since_epoch().count() > 0 ||
profileStartTime_.time_since_epoch().count() > 0;
}
int profileStartIteration() const {
return profileStartIteration_;
}
bool hasProfileStartIteration() const {
return profileStartIteration_ >= 0 && activitiesRunIterations_ > 0;
}
void setProfileStartIteration(int iter) {
profileStartIteration_ = iter;
}
int profileStartIterationRoundUp() const {
return profileStartIterationRoundUp_;
}
// calculate the start iteration accounting for warmup
int startIterationIncludingWarmup() const {
if (!hasProfileStartIteration()) {
return -1;
}
return profileStartIteration_ - activitiesWarmupIterations_;
}
const std::chrono::seconds maxRequestAge() const;
// All VLOG* macros will log if the verbose log level is >=
// the verbosity specified for the verbose log message.
// Default value is -1, so messages with log level 0 will log by default.
int verboseLogLevel() const {
return verboseLogLevel_;
}
// Modules for which verbose logging is enabled.
// If empty, logging is enabled for all modules.
const std::vector<std::string>& verboseLogModules() const {
return verboseLogModules_;
}
bool sigUsr2Enabled() const {
return enableSigUsr2_;
}
bool ipcFabricEnabled() const {
return enableIpcFabric_;
}
std::chrono::seconds onDemandConfigUpdateIntervalSecs() const {
return onDemandConfigUpdateIntervalSecs_;
}
static std::chrono::milliseconds alignUp(
std::chrono::milliseconds duration,
std::chrono::milliseconds alignment) {
duration += alignment;
return duration - (duration % alignment);
}
std::chrono::time_point<std::chrono::system_clock>
eventProfilerOnDemandStartTime() const {
return eventProfilerOnDemandTimestamp_;
}
std::chrono::time_point<std::chrono::system_clock>
eventProfilerOnDemandEndTime() const {
return eventProfilerOnDemandTimestamp_ + eventProfilerOnDemandDuration_;
}
std::chrono::time_point<std::chrono::system_clock>
activityProfilerRequestReceivedTime() const {
return activitiesOnDemandTimestamp_;
}
static constexpr std::chrono::milliseconds kControllerIntervalMsecs{1000};
// Users may request and set trace id and group trace id.
const std::string& requestTraceID() const {
return requestTraceID_;
}
void setRequestTraceID(const std::string& tid) {
requestTraceID_ = tid;
}
const std::string& requestGroupTraceID() const {
return requestGroupTraceID_;
}
void setRequestGroupTraceID(const std::string& gtid) {
requestGroupTraceID_ = gtid;
}
size_t cuptiDeviceBufferSize() const {
return cuptiDeviceBufferSize_;
}
size_t cuptiDeviceBufferPoolLimit() const {
return cuptiDeviceBufferPoolLimit_;
}
void updateActivityProfilerRequestReceivedTime();
void printActivityProfilerConfig(std::ostream& s) const override;
void setActivityDependentConfig() override;
void validate(const std::chrono::time_point<std::chrono::system_clock>&
fallbackProfileStartTime) override;
static void addConfigFactory(
std::string name,
std::function<AbstractConfig*(Config&)> factory);
void print(std::ostream& s) const;
// Config relies on some state with global static lifetime. If other
// threads are using the config, it's possible that the global state
// is destroyed before the threads stop. By hanging onto this handle,
// correct destruction order can be ensured.
static std::shared_ptr<void> getStaticObjectsLifetimeHandle();
bool getTSCTimestampFlag() const{
return useTSCTimestamp_;
}
void setTSCTimestampFlag(bool flag) {
useTSCTimestamp_ = flag;
}
private:
explicit Config(const Config& other) = default;
AbstractConfig* cloneDerived(AbstractConfig& parent) const override {
// Clone from AbstractConfig not supported
assert(false);
return nullptr;
}
uint8_t createDeviceMask(const std::string& val);
// Adds valid activity types from the user defined string list in the
// configuration file
void setActivityTypes(const std::vector<std::string>& selected_activities);
// Sets the default activity types to be traced
void selectDefaultActivityTypes() {
// If the user has not specified an activity list, add all types
for (ActivityType t : defaultActivityTypes()) {
selectedActivityTypes_.insert(t);
}
}
int verboseLogLevel_;
std::vector<std::string> verboseLogModules_;
// Event profiler
// These settings are also supported in on-demand mode
std::chrono::milliseconds samplePeriod_;
std::chrono::milliseconds reportPeriod_;
int samplesPerReport_;
std::set<std::string> eventNames_;
std::set<std::string> metricNames_;
// On-demand duration
std::chrono::seconds eventProfilerOnDemandDuration_;
// Last on-demand request
std::chrono::time_point<std::chrono::system_clock>
eventProfilerOnDemandTimestamp_;
int eventProfilerMaxInstancesPerGpu_;
// Monitor whether event profiler threads are stuck
// at this frequency
std::chrono::seconds eventProfilerHeartbeatMonitorPeriod_;
// These settings can not be changed on-demand
std::string eventLogFile_;
std::vector<int> eventReportPercentiles_ = {5, 25, 50, 75, 95};
uint8_t eventProfilerDeviceMask_ = ~0;
std::chrono::milliseconds multiplexPeriod_;
// Activity profiler
bool activityProfilerEnabled_;
std::set<ActivityType> selectedActivityTypes_;
// The activity profiler settings are all on-demand
std::string activitiesLogFile_;
std::string activitiesLogUrl_;
// Log activities to memory buffer
bool activitiesLogToMemory_{false};
int activitiesMaxGpuBufferSize_;
std::chrono::seconds activitiesWarmupDuration_;
int activitiesWarmupIterations_;
bool activitiesCudaSyncWaitEvents_;
// Enable Profiler Config Options
// Temporarily disable shape collection until we re-roll out the feature for on-demand cases
bool enableReportInputShapes_{false};
bool enableProfileMemory_{false};
bool enableWithStack_{false};
bool enableWithFlops_{false};
bool enableWithModules_{false};
// Profile for specified iterations and duration
std::chrono::milliseconds activitiesDuration_;
int activitiesRunIterations_;
// Below are not used
// Use this net name for iteration count
std::string activitiesExternalAPIIterationsTarget_;
// Only profile nets that includes this in the name
std::vector<std::string> activitiesExternalAPIFilter_;
// Only profile nets with at least this many operators
int activitiesExternalAPINetSizeThreshold_;
// Only profile nets with at least this many GPU operators
int activitiesExternalAPIGpuOpCountThreshold_;
// Last activity profiler request
std::chrono::time_point<std::chrono::system_clock>
activitiesOnDemandTimestamp_;
// ActivityProfilers are triggered by either:
// Synchronized start timestamps
std::chrono::time_point<std::chrono::system_clock> profileStartTime_;
// Or start iterations.
int profileStartIteration_;
int profileStartIterationRoundUp_;
// DEPRECATED
std::chrono::time_point<std::chrono::system_clock> requestTimestamp_;
// Enable profiling via SIGUSR2
bool enableSigUsr2_;
// Enable IPC Fabric instead of thrift communication
bool enableIpcFabric_;
std::chrono::seconds onDemandConfigUpdateIntervalSecs_;
// Logger Metadata
std::string requestTraceID_;
std::string requestGroupTraceID_;
// CUPTI Device Buffer
size_t cuptiDeviceBufferSize_;
size_t cuptiDeviceBufferPoolLimit_;
// CUPTI Timestamp Format
bool useTSCTimestamp_{true};
};
constexpr char kUseDaemonEnvVar[] = "KINETO_USE_DAEMON";
} // namespace libkineto

View File

@ -0,0 +1,151 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <fmt/format.h>
#include <string>
#include <thread>
#include <unordered_map>
#include <vector>
#include <sstream>
#include "ITraceActivity.h"
#include "ThreadUtil.h"
#include "TraceSpan.h"
namespace libkineto {
// Link type, used in GenericTraceActivity.flow.type
constexpr unsigned int kLinkFwdBwd = 1;
constexpr unsigned int kLinkAsyncCpuGpu = 2;
// @lint-ignore-every CLANGTIDY cppcoreguidelines-non-private-member-variables-in-classes
// @lint-ignore-every CLANGTIDY cppcoreguidelines-pro-type-member-init
class GenericTraceActivity : public ITraceActivity {
public:
GenericTraceActivity()
: activityType(ActivityType::ENUM_COUNT), traceSpan_(nullptr) {}
GenericTraceActivity(
const TraceSpan& trace, ActivityType type, const std::string& name)
: activityType(type), activityName(name), traceSpan_(&trace) {}
int64_t deviceId() const override {
return device;
}
int64_t resourceId() const override {
return resource;
}
int32_t getThreadId() const override {
return threadId;
}
int64_t timestamp() const override {
return startTime;
}
int64_t duration() const override {
return endTime - startTime;
}
int64_t correlationId() const override {
return id;
}
ActivityType type() const override {
return activityType;
}
const ITraceActivity* linkedActivity() const override {
return linked;
}
int flowType() const override {
return flow.type;
}
int flowId() const override {
return flow.id;
}
bool flowStart() const override {
return flow.start;
}
const std::string name() const override {
return activityName;
}
const TraceSpan* traceSpan() const override {
return traceSpan_;
}
void log(ActivityLogger& logger) const override;
// Encode client side metadata as a key/value
template <typename ValType>
void addMetadata(const std::string& key, const ValType& value) {
metadataMap_.emplace(key, std::make_pair(fmt::format("{}", value), false));
}
void addMetadataQuoted(const std::string& key, const std::string& value) {
metadataMap_.emplace(key, std::make_pair(value, true));
}
const std::string getMetadataValue(const std::string& key) const override {
if (auto it = metadataMap_.find(key); it != metadataMap_.end()) {
return it->second.first;
}
return "";
}
const std::string metadataJson() const override {
std::stringstream json;
bool first = true;
for (const auto& [key, val] : metadataMap_) {
if (!first) {
json << ", ";
}
val.second ? json << fmt::format("\"{}\": \"{}\"", key, val.first)
: json << fmt::format("\"{}\": {}", key, val.first);
first = false;
}
return json.str();
}
virtual ~GenericTraceActivity() override {}
int64_t startTime{0};
int64_t endTime{0};
int32_t id{0};
int32_t device{0};
int32_t resource{0};
int32_t threadId{0};
ActivityType activityType;
std::string activityName;
struct Flow {
Flow(): id(0), type(0), start(0) {}
// Ids must be unique within each type
uint32_t id : 27;
// Type will be used to connect flows between profilers, as
// well as look up flow information (name etc)
uint32_t type : 4;
uint32_t start : 1;
} flow;
const ITraceActivity* linked{nullptr};
private:
const TraceSpan* traceSpan_;
// Metadata map: { key: (value, quoted)}
std::unordered_map<std::string, std::pair<std::string, bool>> metadataMap_;
};
} // namespace libkineto

View File

@ -0,0 +1,165 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <memory>
#include <set>
#include <vector>
#include "Config.h"
#include "GenericTraceActivity.h"
/* This file includes an abstract base class for an activity profiler
* that can be implemented by multiple tracing agents in the application.
* The high level Kineto profiler can co-ordinate start and end of tracing
* and combine together events from multiple such activity profilers.
*/
namespace libkineto {
struct CpuTraceBuffer;
#ifdef _MSC_VER
// workaround for the predefined ERROR macro on Windows
#undef ERROR
#endif // _MSC_VER
enum class TraceStatus {
READY, // Accepting trace requests
WARMUP, // Performing trace warmup
RECORDING, // Actively collecting activities
PROCESSING, // Recording is complete, preparing results
ERROR, // One or more errors (and possibly also warnings) occurred.
WARNING, // One or more warnings occurred.
};
/* DeviceInfo:
* Can be used to specify process name, sort order, PID and device label.
* The sort order is determined by the sortIndex field to handle ordering of
* processes and gpu rows in the trace viewer.
*/
struct DeviceInfo {
DeviceInfo(
int64_t id,
int64_t sortIndex,
const std::string& name,
const std::string& label)
: id(id), sortIndex(sortIndex), name(name), label(label) {}
int64_t id; // process id
int64_t sortIndex; // position in trace view
const std::string name; // process name
const std::string label; // device label
};
/* ResourceInfo:
* Can be used to specify resource inside device
*/
struct ResourceInfo {
ResourceInfo(
int64_t deviceId,
int64_t id,
int64_t sortIndex,
const std::string& name)
: id(id), sortIndex(sortIndex), deviceId(deviceId), name(name) {}
int64_t id; // resource id
int64_t sortIndex; // position in trace view
int64_t deviceId; // id of device which owns this resource (specified in DeviceInfo.id)
const std::string name; // resource name
};
using getLinkedActivityCallback =
std::function<const ITraceActivity*(int32_t)>;
/* IActivityProfilerSession:
* an opaque object that can be used by a high level profiler to
* start/stop and return trace events.
*/
class IActivityProfilerSession {
public:
virtual ~IActivityProfilerSession() {}
// start the trace collection synchronously
virtual void start() = 0;
// stop the trace collection synchronously
virtual void stop() = 0;
TraceStatus status() {
return status_;
}
// returns errors with this trace
virtual std::vector<std::string> errors() = 0;
// processes trace activities using logger
virtual void processTrace(ActivityLogger& logger) = 0;
virtual void processTrace(ActivityLogger& logger,
getLinkedActivityCallback /*getLinkedActivity*/,
int64_t /*startTime*/, int64_t /*endTime*/) {
processTrace(logger);
}
// returns device info used in this trace, could be nullptr
virtual std::unique_ptr<DeviceInfo> getDeviceInfo() = 0;
// returns resource info used in this trace, could be empty
virtual std::vector<ResourceInfo> getResourceInfos() = 0;
// release ownership of the trace events and metadata
virtual std::unique_ptr<CpuTraceBuffer> getTraceBuffer() = 0;
// XXX define trace formats
// virtual save(string name, TraceFormat format)
virtual void pushCorrelationId(uint64_t /*id*/) {}
virtual void popCorrelationId() {}
virtual void pushUserCorrelationId(uint64_t /*id*/) {}
virtual void popUserCorrelationId() {}
protected:
TraceStatus status_ = TraceStatus::READY;
};
/* Activity Profiler Plugins:
* These allow other frameworks to integrate into Kineto's primariy
* activity profiler. While the primary activity profiler handles
* timing the trace collections and correlating events the plugins
* can become source of new trace activity types.
*/
class IActivityProfiler {
public:
virtual ~IActivityProfiler() {}
// name of profiler
virtual const std::string& name() const = 0;
// returns activity types this profiler supports
virtual const std::set<ActivityType>& availableActivities() const = 0;
// Calls prepare() on registered tracer providers passing in the relevant
// activity types. Returns a profiler session handle
virtual std::unique_ptr<IActivityProfilerSession> configure(
const std::set<ActivityType>& activity_types,
const Config& config) = 0;
// asynchronous version of the above with future timestamp and duration.
virtual std::unique_ptr<IActivityProfilerSession> configure(
int64_t ts_ms,
int64_t duration_ms,
const std::set<ActivityType>& activity_types,
const Config& config) = 0;
};
} // namespace libkineto

View File

@ -0,0 +1,60 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <string>
// Stages in libkineto used when pushing logs to UST Logger.
constexpr char kWarmUpStage[] = "Warm Up";
constexpr char kCollectionStage[] = "Collection";
constexpr char kPostProcessingStage[] = "Post Processing";
#if !USE_GOOGLE_LOG
#include <map>
#include <vector>
#include <stdint.h>
namespace libkineto {
enum LoggerOutputType {
VERBOSE = 0,
INFO = 1,
WARNING = 2,
STAGE = 3,
ERROR = 4,
ENUM_COUNT = 5
};
const char* toString(LoggerOutputType t);
LoggerOutputType toLoggerOutputType(const std::string& str);
constexpr int LoggerTypeCount = (int) LoggerOutputType::ENUM_COUNT;
class ILoggerObserver {
public:
virtual ~ILoggerObserver() = default;
virtual void write(const std::string& message, LoggerOutputType ot) = 0;
virtual const std::map<LoggerOutputType, std::vector<std::string>> extractCollectorMetadata() = 0;
virtual void reset() = 0;
virtual void addDevice(const int64_t device) = 0;
virtual void setTraceDurationMS(const int64_t duration) = 0;
virtual void addEventCount(const int64_t count) = 0;
virtual void setTraceID(const std::string&) {}
virtual void setGroupTraceID(const std::string&) {}
virtual void addDestination(const std::string& dest) = 0;
virtual void setTriggerOnDemand() {}
virtual void addMetadata(const std::string& key, const std::string& value) = 0;
};
} // namespace libkineto
#endif // !USE_GOOGLE_LOG

View File

@ -0,0 +1,64 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <string>
#include "ActivityType.h"
namespace libkineto {
class ActivityLogger;
struct TraceSpan;
// Generic activity interface is borrowed from tensorboard protobuf format.
struct ITraceActivity {
virtual ~ITraceActivity() {}
// Device is a physical or logical entity, e.g. CPU, GPU or process
virtual int64_t deviceId() const = 0;
// A resource is something on the device, h/w thread,
// functional units etc.
virtual int64_t resourceId() const = 0;
// s/w thread
virtual int32_t getThreadId() const = 0;
// Start timestamp in nanoseconds
virtual int64_t timestamp() const = 0;
// Duration in nanoseconds
virtual int64_t duration() const = 0;
// Used to link up async activities
virtual int64_t correlationId() const = 0;
// Part of a flow, identified by flow id and type
virtual int flowType() const = 0;
virtual int flowId() const = 0;
virtual bool flowStart() const = 0;
virtual ActivityType type() const = 0;
virtual const std::string name() const = 0;
// Optional linked activity
virtual const ITraceActivity* linkedActivity() const = 0;
// Optional containing trace object
virtual const TraceSpan* traceSpan() const = 0;
// Log activity
virtual void log(ActivityLogger& logger) const = 0;
// Return json formatted metadata
// FIXME: Return iterator to dynamic type map here instead
virtual const std::string metadataJson() const = 0;
// Return the metadata value in string format with key
// @lint-ignore CLANGTIDY: clang-diagnostic-unused-parameter
virtual const std::string getMetadataValue(const std::string& key) const {
return "";
}
static int64_t nsToUs(int64_t ns) {
// It's important that this conversion is the same everywhere.
// No rounding!
return ns / 1000;
}
};
} // namespace libkineto

View File

@ -0,0 +1,14 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
namespace libkineto {
int getLogSeverityLevel();
void setLogSeverityLevel(int level);
} // namespace libkineto

View File

@ -0,0 +1,30 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <cstdint>
#include <string>
#include <utility>
#include <vector>
namespace libkineto {
int32_t systemThreadId();
int32_t threadId();
bool setThreadName(const std::string& name);
std::string getThreadName();
int32_t processId();
std::string processName(int32_t pid);
// Return a list of pids and process names for the current process
// and its parents.
std::vector<std::pair<int32_t, std::string>> pidCommandPairsOfAncestors();
} // namespace libkineto

View File

@ -0,0 +1,42 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <atomic>
#include <string>
#include <thread>
namespace libkineto {
struct TraceSpan {
TraceSpan() = delete;
TraceSpan(
int64_t startTime, int64_t endTime, std::string name)
: startTime(startTime), endTime(endTime), name(std::move(name)) {
}
TraceSpan(
int opCount, int it, std::string name, std::string prefix)
: opCount(opCount),
iteration(it),
name(std::move(name)),
prefix(std::move(prefix)) {
}
// FIXME: change to duration?
int64_t startTime{0};
int64_t endTime{0};
int opCount{0};
int iteration{-1};
// Name is used to identify timeline
std::string name;
// Prefix used to distinguish trace spans on the same timeline
std::string prefix;
};
} // namespace libkineto

View File

@ -0,0 +1,162 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
// Mediator for initialization and profiler control
#pragma once
#include <atomic>
#include <chrono>
#include <functional>
#include <memory>
#include <mutex>
#include <string>
#include <set>
#include <thread>
#include <vector>
#include <deque>
#include "ActivityProfilerInterface.h"
#include "ActivityType.h"
#include "ClientInterface.h"
#include "GenericTraceActivity.h"
#include "TraceSpan.h"
#include "IActivityProfiler.h"
#include "ActivityTraceInterface.h"
#include "ILoggerObserver.h"
#include "LoggingAPI.h"
#include "ThreadUtil.h"
extern "C" {
void suppressLibkinetoLogMessages();
int InitializeInjection(void);
void libkineto_init(bool cpuOnly, bool logOnError);
bool hasTestEnvVar();
}
namespace libkineto {
class Config;
class ConfigLoader;
struct CpuTraceBuffer {
template <class... Args>
void emplace_activity(Args&&... args) {
activities.emplace_back(
std::make_unique<GenericTraceActivity>(std::forward<Args>(args)...));
}
static GenericTraceActivity& toRef(
std::unique_ptr<GenericTraceActivity>& ref) {
return *ref;
}
static const GenericTraceActivity& toRef(
const std::unique_ptr<GenericTraceActivity>& ref) {
return *ref;
}
TraceSpan span{0, 0, "none"};
int gpuOpCount;
std::deque<std::unique_ptr<GenericTraceActivity>> activities;
};
using ChildActivityProfilerFactory =
std::function<std::unique_ptr<IActivityProfiler>()>;
class LibkinetoApi {
public:
explicit LibkinetoApi(ConfigLoader& configLoader)
: configLoader_(configLoader) {
}
// Called by client that supports tracing API.
// libkineto can still function without this.
void registerClient(ClientInterface* client);
// Called by libkineto on init
void registerProfiler(std::unique_ptr<ActivityProfilerInterface> profiler) {
activityProfiler_ = std::move(profiler);
initClientIfRegistered();
}
ActivityProfilerInterface& activityProfiler() {
return *activityProfiler_;
}
ClientInterface* client() {
return client_;
}
void initProfilerIfRegistered() {
static std::once_flag once;
if (activityProfiler_) {
std::call_once(once, [this] {
if (!activityProfiler_->isInitialized()) {
activityProfiler_->init();
initChildActivityProfilers();
}
});
}
}
bool isProfilerInitialized() const {
return activityProfiler_ && activityProfiler_->isInitialized();
}
bool isProfilerRegistered() const {
return activityProfiler_ != nullptr;
}
void suppressLogMessages() {
suppressLibkinetoLogMessages();
}
// Provides access to profier configuration manaegement
ConfigLoader& configLoader() {
return configLoader_;
}
void registerProfilerFactory(
ChildActivityProfilerFactory factory) {
if (isProfilerInitialized()) {
activityProfiler_->addChildActivityProfiler(factory());
} else {
childProfilerFactories_.push_back(factory);
}
}
private:
void initChildActivityProfilers() {
if (!isProfilerInitialized()) {
return;
}
for (const auto& factory : childProfilerFactories_) {
activityProfiler_->addChildActivityProfiler(factory());
}
childProfilerFactories_.clear();
}
// Client is initialized once both it and libkineto has registered
void initClientIfRegistered();
ConfigLoader& configLoader_;
std::unique_ptr<ActivityProfilerInterface> activityProfiler_{};
ClientInterface* client_{};
int32_t clientRegisterThread_{0};
std::vector<ChildActivityProfilerFactory> childProfilerFactories_;
};
// Singleton
LibkinetoApi& api();
} // namespace libkineto

View File

@ -0,0 +1,79 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <fstream>
#include <map>
#include <ostream>
#include <thread>
#include <unordered_map>
// TODO(T90238193)
// @lint-ignore-every CLANGTIDY facebook-hte-RelativeInclude
#include "IActivityProfiler.h"
#include "GenericTraceActivity.h"
#include "ThreadUtil.h"
#include "TraceSpan.h"
namespace KINETO_NAMESPACE {
struct ActivityBuffers;
}
namespace libkineto {
using namespace KINETO_NAMESPACE;
// Used by sortIndex to put GPU tracks at the bottom
// of the trace timelines. The largest valid CPU PID is 4,194,304,
// so 5000000 is enough to guarantee that GPU tracks are sorted after CPU.
constexpr int64_t kExceedMaxPid = 5000000;
class ActivityLogger {
public:
virtual ~ActivityLogger() = default;
struct OverheadInfo {
explicit OverheadInfo(const std::string& name) : name(name) {}
const std::string name;
};
virtual void handleDeviceInfo(
const DeviceInfo &info,
uint64_t time) = 0;
virtual void handleResourceInfo(const ResourceInfo& info, int64_t time) = 0;
virtual void handleOverheadInfo(const OverheadInfo& info, int64_t time) = 0;
virtual void handleTraceSpan(const TraceSpan& span) = 0;
virtual void handleActivity(
const libkineto::ITraceActivity& activity) = 0;
virtual void handleGenericActivity(
const libkineto::GenericTraceActivity& activity) = 0;
virtual void handleTraceStart(
const std::unordered_map<std::string, std::string>& metadata) = 0;
void handleTraceStart() {
handleTraceStart(std::unordered_map<std::string, std::string>());
}
virtual void finalizeTrace(
const Config& config,
std::unique_ptr<ActivityBuffers> buffers,
int64_t endTime,
std::unordered_map<std::string, std::vector<std::string>>& metadata) = 0;
protected:
ActivityLogger() = default;
};
} // namespace libkineto

View File

@ -0,0 +1,22 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <chrono>
namespace libkineto {
template <class ClockT>
inline int64_t timeSinceEpoch(
const std::chrono::time_point<ClockT>& t) {
return std::chrono::duration_cast<std::chrono::nanoseconds>(
t.time_since_epoch())
.count();
}
} // namespace libkineto