I am done

This commit is contained in:
2024-10-30 22:14:35 +01:00
parent 720dc28c09
commit 40e2a747cf
36901 changed files with 5011519 additions and 0 deletions

View File

@ -0,0 +1,231 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""The TensorBoard Audio plugin."""
import urllib.parse
from werkzeug import wrappers
from tensorboard import errors
from tensorboard import plugin_util
from tensorboard.backend import http_util
from tensorboard.data import provider
from tensorboard.plugins import base_plugin
from tensorboard.plugins.audio import metadata
_DEFAULT_MIME_TYPE = "application/octet-stream"
_DEFAULT_DOWNSAMPLING = 10 # audio clips per time series
_MIME_TYPES = {
metadata.Encoding.Value("WAV"): "audio/wav",
}
_ALLOWED_MIME_TYPES = frozenset(
list(_MIME_TYPES.values()) + [_DEFAULT_MIME_TYPE]
)
class AudioPlugin(base_plugin.TBPlugin):
"""Audio Plugin for TensorBoard."""
plugin_name = metadata.PLUGIN_NAME
def __init__(self, context):
"""Instantiates AudioPlugin via TensorBoard core.
Args:
context: A base_plugin.TBContext instance.
"""
self._data_provider = context.data_provider
self._downsample_to = (context.sampling_hints or {}).get(
self.plugin_name, _DEFAULT_DOWNSAMPLING
)
self._version_checker = plugin_util._MetadataVersionChecker(
data_kind="audio",
latest_known_version=0,
)
def get_plugin_apps(self):
return {
"/audio": self._serve_audio_metadata,
"/individualAudio": self._serve_individual_audio,
"/tags": self._serve_tags,
}
def is_active(self):
return False # `list_plugins` as called by TB core suffices
def frontend_metadata(self):
return base_plugin.FrontendMetadata(element_name="tf-audio-dashboard")
def _index_impl(self, ctx, experiment):
"""Return information about the tags in each run.
Result is a dictionary of the form
{
"runName1": {
"tagName1": {
"displayName": "The first tag",
"description": "<p>Long ago there was just one tag...</p>",
"samples": 3
},
"tagName2": ...,
...
},
"runName2": ...,
...
}
For each tag, `samples` is the greatest number of audio clips that
appear at any particular step. (It's not related to "samples of a
waveform.") For example, if for tag `minibatch_input` there are
five audio clips at step 0 and ten audio clips at step 1, then the
dictionary for `"minibatch_input"` will contain `"samples": 10`.
"""
mapping = self._data_provider.list_blob_sequences(
ctx,
experiment_id=experiment,
plugin_name=metadata.PLUGIN_NAME,
)
result = {run: {} for run in mapping}
for run, tag_to_time_series in mapping.items():
for tag, time_series in tag_to_time_series.items():
md = metadata.parse_plugin_metadata(time_series.plugin_content)
if not self._version_checker.ok(md.version, run, tag):
continue
description = plugin_util.markdown_to_safe_html(
time_series.description
)
result[run][tag] = {
"displayName": time_series.display_name,
"description": description,
"samples": time_series.max_length,
}
return result
@wrappers.Request.application
def _serve_audio_metadata(self, request):
"""Given a tag and list of runs, serve a list of metadata for audio.
Note that the actual audio data are not sent; instead, we respond
with URLs to the audio. The frontend should treat these URLs as
opaque and should not try to parse information about them or
generate them itself, as the format may change.
Args:
request: A werkzeug.wrappers.Request object.
Returns:
A werkzeug.Response application.
"""
ctx = plugin_util.context(request.environ)
experiment = plugin_util.experiment_id(request.environ)
tag = request.args.get("tag")
run = request.args.get("run")
sample = int(request.args.get("sample", 0))
response = self._audio_response_for_run(
ctx, experiment, run, tag, sample
)
return http_util.Respond(request, response, "application/json")
def _audio_response_for_run(self, ctx, experiment, run, tag, sample):
"""Builds a JSON-serializable object with information about audio.
Args:
run: The name of the run.
tag: The name of the tag the audio entries all belong to.
sample: The zero-indexed sample of the audio sample for which to
retrieve information. For instance, setting `sample` to `2` will
fetch information about only the third audio clip of each batch,
and steps with fewer than three audio clips will be omitted from
the results.
Returns:
A list of dictionaries containing the wall time, step, label,
content type, and query string for each audio entry.
"""
all_audio = self._data_provider.read_blob_sequences(
ctx,
experiment_id=experiment,
plugin_name=metadata.PLUGIN_NAME,
downsample=self._downsample_to,
run_tag_filter=provider.RunTagFilter(runs=[run], tags=[tag]),
)
audio = all_audio.get(run, {}).get(tag, None)
if audio is None:
raise errors.NotFoundError(
"No audio data for run=%r, tag=%r" % (run, tag)
)
content_type = self._get_mime_type(ctx, experiment, run, tag)
response = []
for datum in audio:
if len(datum.values) < sample:
continue
query = urllib.parse.urlencode(
{
"blob_key": datum.values[sample].blob_key,
"content_type": content_type,
}
)
response.append(
{
"wall_time": datum.wall_time,
"label": "",
"step": datum.step,
"contentType": content_type,
"query": query,
}
)
return response
def _get_mime_type(self, ctx, experiment, run, tag):
# TODO(@wchargin): Move this call from `/audio` (called many
# times) to `/tags` (called few times) to reduce data provider
# calls.
mapping = self._data_provider.list_blob_sequences(
ctx,
experiment_id=experiment,
plugin_name=metadata.PLUGIN_NAME,
)
time_series = mapping.get(run, {}).get(tag, None)
if time_series is None:
raise errors.NotFoundError(
"No audio data for run=%r, tag=%r" % (run, tag)
)
parsed = metadata.parse_plugin_metadata(time_series.plugin_content)
return _MIME_TYPES.get(parsed.encoding, _DEFAULT_MIME_TYPE)
@wrappers.Request.application
def _serve_individual_audio(self, request):
"""Serve encoded audio data."""
ctx = plugin_util.context(request.environ)
experiment = plugin_util.experiment_id(request.environ)
mime_type = request.args["content_type"]
if mime_type not in _ALLOWED_MIME_TYPES:
raise errors.InvalidArgumentError(
"Illegal mime type %r" % mime_type
)
blob_key = request.args["blob_key"]
data = self._data_provider.read_blob(ctx, blob_key=blob_key)
return http_util.Respond(request, data, mime_type)
@wrappers.Request.application
def _serve_tags(self, request):
ctx = plugin_util.context(request.environ)
experiment = plugin_util.experiment_id(request.environ)
index = self._index_impl(ctx, experiment)
return http_util.Respond(request, index, "application/json")

View File

@ -0,0 +1,70 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Internal information about the audio plugin."""
from tensorboard.compat.proto import summary_pb2
from tensorboard.plugins.audio import plugin_data_pb2
PLUGIN_NAME = "audio"
# The most recent value for the `version` field of the `AudioPluginData`
# proto.
PROTO_VERSION = 0
# Expose the `Encoding` enum constants.
Encoding = plugin_data_pb2.AudioPluginData.Encoding
def create_summary_metadata(
display_name, description, encoding, *, converted_to_tensor=None
):
"""Create a `SummaryMetadata` proto for audio plugin data.
Returns:
A `SummaryMetadata` protobuf object.
"""
content = plugin_data_pb2.AudioPluginData(
version=PROTO_VERSION,
encoding=encoding,
converted_to_tensor=converted_to_tensor,
)
metadata = summary_pb2.SummaryMetadata(
display_name=display_name,
summary_description=description,
plugin_data=summary_pb2.SummaryMetadata.PluginData(
plugin_name=PLUGIN_NAME, content=content.SerializeToString()
),
)
return metadata
def parse_plugin_metadata(content):
"""Parse summary metadata to a Python object.
Arguments:
content: The `content` field of a `SummaryMetadata` proto
corresponding to the audio plugin.
Returns:
An `AudioPluginData` protobuf object.
"""
if not isinstance(content, bytes):
raise TypeError("Content type must be bytes")
result = plugin_data_pb2.AudioPluginData.FromString(content)
if result.version == 0:
return result
# No other versions known at this time, so no migrations to do.
return result

View File

@ -0,0 +1,37 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: tensorboard/plugins/audio/plugin_data.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n+tensorboard/plugins/audio/plugin_data.proto\x12\x0btensorboard\"\x9a\x01\n\x0f\x41udioPluginData\x12\x0f\n\x07version\x18\x01 \x01(\x05\x12\x37\n\x08\x65ncoding\x18\x02 \x01(\x0e\x32%.tensorboard.AudioPluginData.Encoding\x12\x1b\n\x13\x63onverted_to_tensor\x18\x03 \x01(\x08\" \n\x08\x45ncoding\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x07\n\x03WAV\x10\x0b\x62\x06proto3')
_AUDIOPLUGINDATA = DESCRIPTOR.message_types_by_name['AudioPluginData']
_AUDIOPLUGINDATA_ENCODING = _AUDIOPLUGINDATA.enum_types_by_name['Encoding']
AudioPluginData = _reflection.GeneratedProtocolMessageType('AudioPluginData', (_message.Message,), {
'DESCRIPTOR' : _AUDIOPLUGINDATA,
'__module__' : 'tensorboard.plugins.audio.plugin_data_pb2'
# @@protoc_insertion_point(class_scope:tensorboard.AudioPluginData)
})
_sym_db.RegisterMessage(AudioPluginData)
if _descriptor._USE_C_DESCRIPTORS == False:
DESCRIPTOR._options = None
_AUDIOPLUGINDATA._serialized_start=61
_AUDIOPLUGINDATA._serialized_end=215
_AUDIOPLUGINDATA_ENCODING._serialized_start=183
_AUDIOPLUGINDATA_ENCODING._serialized_end=215
# @@protoc_insertion_point(module_scope)

View File

@ -0,0 +1,232 @@
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Audio summaries and TensorFlow operations to create them.
An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where
`k` is the number of audio clips recorded in the summary. Each row of
the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is
a binary string whose encoding is specified in the summary metadata, and
`label` is a UTF-8 encoded Markdown string describing the audio clip.
NOTE: This module is in beta, and its API is subject to change, but the
data that it stores to disk will be supported forever.
"""
import functools
import warnings
import numpy as np
from tensorboard.util import encoder as encoder_util
from tensorboard.plugins.audio import metadata
from tensorboard.plugins.audio import summary_v2
# Export V2 versions.
audio = summary_v2.audio
_LABELS_WARNING = (
"Labels on audio summaries are deprecated and will be removed. "
"See <https://github.com/tensorflow/tensorboard/issues/3513>."
)
def op(
name,
audio,
sample_rate,
labels=None,
max_outputs=3,
encoding=None,
display_name=None,
description=None,
collections=None,
):
"""Create a legacy audio summary op for use in a TensorFlow graph.
Arguments:
name: A unique name for the generated summary node.
audio: A `Tensor` representing audio data with shape `[k, t, c]`,
where `k` is the number of audio clips, `t` is the number of
frames, and `c` is the number of channels. Elements should be
floating-point values in `[-1.0, 1.0]`. Any of the dimensions may
be statically unknown (i.e., `None`).
sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the
sample rate, in Hz. Must be positive.
labels: Deprecated. Do not set.
max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
many audio clips will be emitted at each step. When more than
`max_outputs` many clips are provided, the first `max_outputs`
many clips will be used and the rest silently discarded.
encoding: A constant `str` (not string tensor) indicating the
desired encoding. You can choose any format you like, as long as
it's "wav". Please see the "API compatibility note" below.
display_name: Optional name for this summary in TensorBoard, as a
constant `str`. Defaults to `name`.
description: Optional long-form description for this summary, as a
constant `str`. Markdown is supported. Defaults to empty.
collections: Optional list of graph collections keys. The new
summary op is added to these collections. Defaults to
`[Graph Keys.SUMMARIES]`.
Returns:
A TensorFlow summary op.
API compatibility note: The default value of the `encoding`
argument is _not_ guaranteed to remain unchanged across TensorBoard
versions. In the future, we will by default encode as FLAC instead of
as WAV. If the specific format is important to you, please provide a
file format explicitly.
"""
if labels is not None:
warnings.warn(_LABELS_WARNING)
# TODO(nickfelt): remove on-demand imports once dep situation is fixed.
import tensorflow.compat.v1 as tf
if display_name is None:
display_name = name
if encoding is None:
encoding = "wav"
if encoding == "wav":
encoding = metadata.Encoding.Value("WAV")
encoder = functools.partial(
tf.audio.encode_wav, sample_rate=sample_rate
)
else:
raise ValueError("Unknown encoding: %r" % encoding)
with tf.name_scope(name), tf.control_dependencies(
[tf.assert_rank(audio, 3)]
):
limited_audio = audio[:max_outputs]
encoded_audio = tf.map_fn(
encoder, limited_audio, dtype=tf.string, name="encode_each_audio"
)
if labels is None:
limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])
else:
limited_labels = labels[:max_outputs]
tensor = tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
summary_metadata = metadata.create_summary_metadata(
display_name=display_name,
description=description,
encoding=encoding,
)
return tf.summary.tensor_summary(
name="audio_summary",
tensor=tensor,
collections=collections,
summary_metadata=summary_metadata,
)
def pb(
name,
audio,
sample_rate,
labels=None,
max_outputs=3,
encoding=None,
display_name=None,
description=None,
):
"""Create a legacy audio summary protobuf.
This behaves as if you were to create an `op` with the same arguments
(wrapped with constant tensors where appropriate) and then execute
that summary op in a TensorFlow session.
Arguments:
name: A unique name for the generated summary node.
audio: An `np.array` representing audio data with shape `[k, t, c]`,
where `k` is the number of audio clips, `t` is the number of
frames, and `c` is the number of channels. Elements should be
floating-point values in `[-1.0, 1.0]`.
sample_rate: An `int` that represents the sample rate, in Hz.
Must be positive.
labels: Deprecated. Do not set.
max_outputs: Optional `int`. At most this many audio clips will be
emitted. When more than `max_outputs` many clips are provided, the
first `max_outputs` many clips will be used and the rest silently
discarded.
encoding: A constant `str` indicating the desired encoding. You
can choose any format you like, as long as it's "wav". Please see
the "API compatibility note" below.
display_name: Optional name for this summary in TensorBoard, as a
`str`. Defaults to `name`.
description: Optional long-form description for this summary, as a
`str`. Markdown is supported. Defaults to empty.
Returns:
A `tf.Summary` protobuf object.
API compatibility note: The default value of the `encoding`
argument is _not_ guaranteed to remain unchanged across TensorBoard
versions. In the future, we will by default encode as FLAC instead of
as WAV. If the specific format is important to you, please provide a
file format explicitly.
"""
if labels is not None:
warnings.warn(_LABELS_WARNING)
# TODO(nickfelt): remove on-demand imports once dep situation is fixed.
import tensorflow.compat.v1 as tf
audio = np.array(audio)
if audio.ndim != 3:
raise ValueError("Shape %r must have rank 3" % (audio.shape,))
if encoding is None:
encoding = "wav"
if encoding == "wav":
encoding = metadata.Encoding.Value("WAV")
encoder = functools.partial(
encoder_util.encode_wav, samples_per_second=sample_rate
)
else:
raise ValueError("Unknown encoding: %r" % encoding)
limited_audio = audio[:max_outputs]
if labels is None:
limited_labels = [b""] * len(limited_audio)
else:
limited_labels = [
tf.compat.as_bytes(label) for label in labels[:max_outputs]
]
encoded_audio = [encoder(a) for a in limited_audio]
content = np.array([encoded_audio, limited_labels]).transpose()
tensor = tf.make_tensor_proto(content, dtype=tf.string)
if display_name is None:
display_name = name
summary_metadata = metadata.create_summary_metadata(
display_name=display_name, description=description, encoding=encoding
)
tf_summary_metadata = tf.SummaryMetadata.FromString(
summary_metadata.SerializeToString()
)
summary = tf.Summary()
summary.value.add(
tag="%s/audio_summary" % name,
metadata=tf_summary_metadata,
tensor=tensor,
)
return summary

View File

@ -0,0 +1,125 @@
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Audio summaries and TensorFlow operations to create them, V2 versions.
An audio summary stores a rank-2 string tensor of shape `[k, 2]`, where
`k` is the number of audio clips recorded in the summary. Each row of
the tensor is a pair `[encoded_audio, label]`, where `encoded_audio` is
a binary string whose encoding is specified in the summary metadata, and
`label` is a UTF-8 encoded Markdown string describing the audio clip.
"""
import functools
from tensorboard.compat import tf2 as tf
from tensorboard.plugins.audio import metadata
from tensorboard.util import lazy_tensor_creator
def audio(
name,
data,
sample_rate,
step=None,
max_outputs=3,
encoding=None,
description=None,
):
"""Write an audio summary.
Arguments:
name: A name for this summary. The summary tag used for TensorBoard will
be this name prefixed by any active name scopes.
data: A `Tensor` representing audio data with shape `[k, t, c]`,
where `k` is the number of audio clips, `t` is the number of
frames, and `c` is the number of channels. Elements should be
floating-point values in `[-1.0, 1.0]`. Any of the dimensions may
be statically unknown (i.e., `None`).
sample_rate: An `int` or rank-0 `int32` `Tensor` that represents the
sample rate, in Hz. Must be positive.
step: Explicit `int64`-castable monotonic step value for this summary. If
omitted, this defaults to `tf.summary.experimental.get_step()`, which must
not be None.
max_outputs: Optional `int` or rank-0 integer `Tensor`. At most this
many audio clips will be emitted at each step. When more than
`max_outputs` many clips are provided, the first `max_outputs`
many clips will be used and the rest silently discarded.
encoding: Optional constant `str` for the desired encoding. Only "wav"
is currently supported, but this is not guaranteed to remain the
default, so if you want "wav" in particular, set this explicitly.
description: Optional long-form description for this summary, as a
constant `str`. Markdown is supported. Defaults to empty.
Returns:
True on success, or false if no summary was emitted because no default
summary writer was available.
Raises:
ValueError: if a default writer exists, but no step was provided and
`tf.summary.experimental.get_step()` is None.
"""
audio_ops = getattr(tf, "audio", None)
if audio_ops is None:
# Fallback for older versions of TF without tf.audio.
from tensorflow.python.ops import gen_audio_ops as audio_ops
if encoding is None:
encoding = "wav"
if encoding != "wav":
raise ValueError("Unknown encoding: %r" % encoding)
summary_metadata = metadata.create_summary_metadata(
display_name=None,
description=description,
encoding=metadata.Encoding.Value("WAV"),
)
inputs = [data, sample_rate, max_outputs, step]
# TODO(https://github.com/tensorflow/tensorboard/issues/2109): remove fallback
summary_scope = (
getattr(tf.summary.experimental, "summary_scope", None)
or tf.summary.summary_scope
)
with summary_scope(name, "audio_summary", values=inputs) as (tag, _):
# Defer audio encoding preprocessing by passing it as a callable to write(),
# wrapped in a LazyTensorCreator for backwards compatibility, so that we
# only do this work when summaries are actually written.
@lazy_tensor_creator.LazyTensorCreator
def lazy_tensor():
tf.debugging.assert_rank(data, 3)
tf.debugging.assert_non_negative(max_outputs)
limited_audio = data[:max_outputs]
encode_fn = functools.partial(
audio_ops.encode_wav, sample_rate=sample_rate
)
encoded_audio = tf.map_fn(
encode_fn,
limited_audio,
dtype=tf.string,
name="encode_each_audio",
)
# Workaround for map_fn returning float dtype for an empty elems input.
encoded_audio = tf.cond(
tf.shape(input=encoded_audio)[0] > 0,
lambda: encoded_audio,
lambda: tf.constant([], tf.string),
)
limited_labels = tf.tile([""], tf.shape(input=limited_audio)[:1])
return tf.transpose(a=tf.stack([encoded_audio, limited_labels]))
# To ensure that audio encoding logic is only executed when summaries
# are written, we pass callable to `tensor` parameter.
return tf.summary.write(
tag=tag, tensor=lazy_tensor, step=step, metadata=summary_metadata
)