# 不规则张量

## 设置

``````!pip install -q tf_nightly
import math
import tensorflow as tf
``````

## 概述

• 可变长度特征，例如电影的演员名单。
• 成批的可变长度顺序输入，例如句子或视频剪辑。
• 分层输入，例如细分为节、段落、句子和单词的文本文档。
• 结构化输入中的各个字段，例如协议缓冲区。

### 不规则张量的功能

``````digits = tf.ragged.constant([[3, 1, 4, 1], [], [5, 9, 2], [6], []])
words = tf.ragged.constant([["So", "long"], ["thanks", "for", "all", "the", "fish"]])
print(tf.reduce_mean(digits, axis=1))
print(tf.concat([digits, [[5, 3]]], axis=0))
print(tf.tile(digits, [1, 2]))
print(tf.strings.substr(words, 0, 2))
print(tf.map_fn(tf.math.square, digits))
``````
```<tf.RaggedTensor [[6, 4, 7, 4], [], [8, 12, 5], [9], []]>
tf.Tensor([2.25              nan 5.33333333 6.                nan], shape=(5,), dtype=float64)
<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9, 2], [6], [], [5, 3]]>
<tf.RaggedTensor [[3, 1, 4, 1, 3, 1, 4, 1], [], [5, 9, 2, 5, 9, 2], [6, 6], []]>
<tf.RaggedTensor [[b'So', b'lo'], [b'th', b'fo', b'al', b'th', b'fi']]>
<tf.RaggedTensor [[9, 1, 16, 1], [], [25, 81, 4], [36], []]>
```

``````print(digits[0])       # First row
``````
```tf.Tensor([3 1 4 1], shape=(4,), dtype=int32)
```
``````print(digits[:, :2])   # First two values in each row.
``````
```<tf.RaggedTensor [[3, 1], [], [5, 9], [6], []]>
```
``````print(digits[:, -2:])  # Last two values in each row.
``````
```<tf.RaggedTensor [[4, 1], [], [9, 2], [6], []]>
```

``````print(digits + 3)
``````
```<tf.RaggedTensor [[6, 4, 7, 4], [], [8, 12, 5], [9], []]>
```
``````print(digits + tf.ragged.constant([[1, 2, 3, 4], [], [5, 6, 7], [8], []]))
``````
```<tf.RaggedTensor [[4, 3, 7, 5], [], [10, 15, 9], [14], []]>
```

``````times_two_plus_one = lambda x: x * 2 + 1
print(tf.ragged.map_flat_values(times_two_plus_one, digits))
``````
```<tf.RaggedTensor [[7, 3, 9, 3], [], [11, 19, 5], [13], []]>
```

``````digits.to_list()
``````
```[[3, 1, 4, 1], [], [5, 9, 2], [6], []]
```
``````digits.numpy()
``````
```array([array([3, 1, 4, 1], dtype=int32), array([], dtype=int32),
array([5, 9, 2], dtype=int32), array([6], dtype=int32),
array([], dtype=int32)], dtype=object)
```

### 构造不规则张量

``````sentences = tf.ragged.constant([
["Let's", "build", "some", "ragged", "tensors", "!"],
["We", "can", "use", "tf.ragged.constant", "."]])
print(sentences)
``````
```<tf.RaggedTensor [[b"Let's", b'build', b'some', b'ragged', b'tensors', b'!'],
[b'We', b'can', b'use', b'tf.ragged.constant', b'.']]>
```
``````paragraphs = tf.ragged.constant([
[['I', 'have', 'a', 'cat'], ['His', 'name', 'is', 'Mat']],
[['Do', 'you', 'want', 'to', 'come', 'visit'], ["I'm", 'free', 'tomorrow']],
])
print(paragraphs)
``````
```<tf.RaggedTensor [[[b'I', b'have', b'a', b'cat'], [b'His', b'name', b'is', b'Mat']],
[[b'Do', b'you', b'want', b'to', b'come', b'visit'],
[b"I'm", b'free', b'tomorrow']]]>
```

#### `tf.RaggedTensor.from_value_rowids`

``````print(tf.RaggedTensor.from_value_rowids(
values=[3, 1, 4, 1, 5, 9, 2],
value_rowids=[0, 0, 0, 0, 2, 2, 3]))
``````
```<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9], [2]]>
```

#### `tf.RaggedTensor.from_row_lengths`

``````print(tf.RaggedTensor.from_row_lengths(
values=[3, 1, 4, 1, 5, 9, 2],
row_lengths=[4, 0, 2, 1]))
``````
```<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9], [2]]>
```

#### `tf.RaggedTensor.from_row_splits`

``````print(tf.RaggedTensor.from_row_splits(
values=[3, 1, 4, 1, 5, 9, 2],
row_splits=[0, 4, 4, 6, 7]))
``````
```<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9], [2]]>
```

### 可以在不规则张量中存储什么

``````print(tf.ragged.constant([["Hi"], ["How", "are", "you"]]))  # ok: type=string, rank=2
``````
```<tf.RaggedTensor [[b'Hi'], [b'How', b'are', b'you']]>
```
``````print(tf.ragged.constant([[[1, 2], [3]], [[4, 5]]]))        # ok: type=int32, rank=3
``````
```<tf.RaggedTensor [[[1, 2], [3]], [[4, 5]]]>
```
``````try:
tf.ragged.constant([["one", "two"], [3, 4]])              # bad: multiple types
except ValueError as exception:
print(exception)
``````
```Can't convert Python sequence with mixed types to Tensor.
```
``````try:
tf.ragged.constant(["A", ["B", "C"]])                     # bad: multiple nesting depths
except ValueError as exception:
print(exception)
``````
```all scalar values must have the same nesting depth
```

## 示例用例

``````queries = tf.ragged.constant([['Who', 'is', 'Dan', 'Smith'],
['Pause'],
['Will', 'it', 'rain', 'later', 'today']])

# Create an embedding table.
num_buckets = 1024
embedding_size = 4
embedding_table = tf.Variable(
tf.random.truncated_normal([num_buckets, embedding_size],
stddev=1.0 / math.sqrt(embedding_size)))

# Look up the embedding for each word.
word_buckets = tf.strings.to_hash_bucket_fast(queries, num_buckets)
word_embeddings = tf.nn.embedding_lookup(embedding_table, word_buckets)     # ①

# Add markers to the beginning and end of each sentence.
marker = tf.fill([queries.nrows(), 1], '#')
padded = tf.concat([marker, queries, marker], axis=1)                       # ②

# Build word bigrams & look up embeddings.

bigram_buckets = tf.strings.to_hash_bucket_fast(bigrams, num_buckets)
bigram_embeddings = tf.nn.embedding_lookup(embedding_table, bigram_buckets) # ④

# Find the average embedding for each sentence
all_embeddings = tf.concat([word_embeddings, bigram_embeddings], axis=1)    # ⑤
avg_embedding = tf.reduce_mean(all_embeddings, axis=1)                      # ⑥
print(avg_embedding)
``````
```tf.Tensor(
[[-0.06720775 -0.04016568  0.09679101 -0.0036542 ]
[ 0.2561753  -0.15689333 -0.05115475 -0.2354176 ]
[-0.07111183 -0.15319577 -0.11507701  0.06048655]], shape=(3, 4), dtype=float32)
```

## 不规则维度和均匀维度

`tf.Tensor` 一样，不规则张量的是其总维数（包括不规则维度和均匀维度）。潜在的不规则张量是一个值，这个值可能是 `tf.Tensor``tf.RaggedTensor`

`RaggedTensor.shape` 特性返回不规则张量的 `tf.TensorShape`，其中不规则维度的大小为 `None`

``````tf.ragged.constant([["Hi"], ["How", "are", "you"]]).shape
``````
```TensorShape([2, None])
```

``````print(tf.ragged.constant([["Hi"], ["How", "are", "you"]]).bounding_shape())
``````
```tf.Tensor([2 3], shape=(2,), dtype=int64)
```

## 不规则张量和稀疏张量对比

• 对稀疏张量或密集张量应用某一运算应当始终获得相同结果。
• 对不规则张量或稀疏张量应用某一运算可能获得不同结果。

``````ragged_x = tf.ragged.constant([["John"], ["a", "big", "dog"], ["my", "cat"]])
ragged_y = tf.ragged.constant([["fell", "asleep"], ["barked"], ["is", "fuzzy"]])
print(tf.concat([ragged_x, ragged_y], axis=1))
``````
```<tf.RaggedTensor [[b'John', b'fell', b'asleep'], [b'a', b'big', b'dog', b'barked'],
[b'my', b'cat', b'is', b'fuzzy']]>
```

``````sparse_x = ragged_x.to_sparse()
sparse_y = ragged_y.to_sparse()
sparse_result = tf.sparse.concat(sp_inputs=[sparse_x, sparse_y], axis=1)
print(tf.sparse.to_dense(sparse_result, ''))
``````
```tf.Tensor(
[[b'John' b'' b'' b'fell' b'asleep']
[b'a' b'big' b'dog' b'barked' b'']
[b'my' b'cat' b'' b'is' b'fuzzy']], shape=(3, 5), dtype=string)
```

## TensorFlow API

### Keras

tf.keras 是 TensorFlow 的高级 API，用于构建和训练深度学习模型。通过在 `tf.keras.Input``tf.keras.layers.InputLayer` 上设置 `ragged=True`，不规则张量可以作为输入传送到 Keras 模型。不规则张量还可以在 Keras 层之间传递，并由 Keras 模型返回。以下示例显示了一个使用不规则张量训练的小 LSTM 模型。

``````# Task: predict whether each sentence is a question or not.
sentences = tf.constant(
['What makes you think she is a witch?',
'She turned me into a newt.',
'A newt?',
'Well, I got better.'])
is_question = tf.constant([True, False, True, False])

# Preprocess the input strings.
hash_buckets = 1000
words = tf.strings.split(sentences, ' ')
hashed_words = tf.strings.to_hash_bucket_fast(words, hash_buckets)

# Build the Keras model.
keras_model = tf.keras.Sequential([
tf.keras.layers.Input(shape=[None], dtype=tf.int64, ragged=True),
tf.keras.layers.Embedding(hash_buckets, 16),
tf.keras.layers.LSTM(32, use_bias=False),
tf.keras.layers.Dense(32),
tf.keras.layers.Activation(tf.nn.relu),
tf.keras.layers.Dense(1)
])

keras_model.compile(loss='binary_crossentropy', optimizer='rmsprop')
keras_model.fit(hashed_words, is_question, epochs=5)
print(keras_model.predict(hashed_words))
``````
```WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.
Epoch 1/5
/tmpfs/src/tf_docs_env/lib/python3.9/site-packages/tensorflow/python/framework/indexed_slices.py:444: UserWarning: Converting sparse IndexedSlices(IndexedSlices(indices=Tensor("gradient_tape/sequential/lstm/RaggedToTensor/boolean_mask_1/GatherV2:0", shape=(None,), dtype=int32), values=Tensor("gradient_tape/sequential/lstm/RaggedToTensor/boolean_mask/GatherV2:0", shape=(None, 16), dtype=float32), dense_shape=Tensor("gradient_tape/sequential/lstm/RaggedToTensor/Shape:0", shape=(2,), dtype=int32))) to a dense Tensor of unknown shape. This may consume a large amount of memory.
warnings.warn(
1/1 [==============================] - 2s 2s/step - loss: 7.7144
Epoch 2/5
1/1 [==============================] - 0s 16ms/step - loss: 7.7125
Epoch 3/5
1/1 [==============================] - 0s 15ms/step - loss: 7.7125
Epoch 4/5
1/1 [==============================] - 0s 14ms/step - loss: 7.7125
Epoch 5/5
1/1 [==============================] - 0s 14ms/step - loss: 7.7125
1/1 [==============================] - 0s 186ms/step
[[-0.00731867]
[-0.00921252]
[-0.01391566]
[-0.00607282]]
```

### tf.Example

tf.Example 是 TensorFlow 数据的标准 protobuf 编码。使用 `tf.Example` 编码的数据往往包括可变长度特征。例如，以下代码定义了一批具有不同特征长度的四条 `tf.Example` 消息：

``````import google.protobuf.text_format as pbtext

def build_tf_example(s):
return pbtext.Merge(s, tf.train.Example()).SerializeToString()

example_batch = [
build_tf_example(r'''
features {
feature {key: "colors" value {bytes_list {value: ["red", "blue"]} } }
feature {key: "lengths" value {int64_list {value: [7]} } } }'''),
build_tf_example(r'''
features {
feature {key: "colors" value {bytes_list {value: ["orange"]} } }
feature {key: "lengths" value {int64_list {value: []} } } }'''),
build_tf_example(r'''
features {
feature {key: "colors" value {bytes_list {value: ["black", "yellow"]} } }
feature {key: "lengths" value {int64_list {value: [1, 3]} } } }'''),
build_tf_example(r'''
features {
feature {key: "colors" value {bytes_list {value: ["green"]} } }
feature {key: "lengths" value {int64_list {value: [3, 5, 2]} } } }''')]
``````

``````feature_specification = {
'colors': tf.io.RaggedFeature(tf.string),
'lengths': tf.io.RaggedFeature(tf.int64),
}
feature_tensors = tf.io.parse_example(example_batch, feature_specification)
for name, value in feature_tensors.items():
print("{}={}".format(name, value))
``````
```colors=<tf.RaggedTensor [[b'red', b'blue'], [b'orange'], [b'black', b'yellow'], [b'green']]>
lengths=<tf.RaggedTensor [[7], [], [1, 3], [3, 5, 2]]>
```

`tf.io.RaggedFeature` 还可用于读取具有多个不规则维度的特征。有关详细信息，请参阅 API 文档

### 数据集

tf.data 是一个 API，可用于通过简单的可重用代码块构建复杂的输入流水线。它的核心数据结构是 `tf.data.Dataset`，表示一系列元素，每个元素包含一个或多个分量。

``````# Helper function used to print datasets in the examples below.
def print_dictionary_dataset(dataset):
for i, element in enumerate(dataset):
print("Element {}:".format(i))
for (feature_name, feature_value) in element.items():
print('{:>14} = {}'.format(feature_name, feature_value))
``````

#### 使用不规则张量构建数据集

``````dataset = tf.data.Dataset.from_tensor_slices(feature_tensors)
print_dictionary_dataset(dataset)
``````
```Element 0:
colors = [b'red' b'blue']
lengths = [7]
Element 1:
colors = [b'orange']
lengths = []
Element 2:
colors = [b'black' b'yellow']
lengths = [1 3]
Element 3:
colors = [b'green']
lengths = [3 5 2]
```

#### 批处理和取消批处理具有不规则张量的数据集

``````batched_dataset = dataset.batch(2)
print_dictionary_dataset(batched_dataset)
``````
```Element 0:
colors = <tf.RaggedTensor [[b'red', b'blue'], [b'orange']]>
lengths = <tf.RaggedTensor [[7], []]>
Element 1:
colors = <tf.RaggedTensor [[b'black', b'yellow'], [b'green']]>
lengths = <tf.RaggedTensor [[1, 3], [3, 5, 2]]>
```

``````unbatched_dataset = batched_dataset.unbatch()
print_dictionary_dataset(unbatched_dataset)
``````
```Element 0:
colors = [b'red' b'blue']
lengths = [7]
Element 1:
colors = [b'orange']
lengths = []
Element 2:
colors = [b'black' b'yellow']
lengths = [1 3]
Element 3:
colors = [b'green']
lengths = [3 5 2]
```

#### 对具有可变长度非不规则张量的数据集进行批处理

``````non_ragged_dataset = tf.data.Dataset.from_tensor_slices([1, 5, 3, 2, 8])
non_ragged_dataset = non_ragged_dataset.map(tf.range)
batched_non_ragged_dataset = non_ragged_dataset.apply(
tf.data.experimental.dense_to_ragged_batch(2))
for element in batched_non_ragged_dataset:
print(element)
``````
```<tf.RaggedTensor [[0], [0, 1, 2, 3, 4]]>
<tf.RaggedTensor [[0, 1, 2], [0, 1]]>
<tf.RaggedTensor [[0, 1, 2, 3, 4, 5, 6, 7]]>
```

#### 转换具有不规则张量的数据集

``````def transform_lengths(features):
return {
'mean_length': tf.math.reduce_mean(features['lengths']),
'length_ranges': tf.ragged.range(features['lengths'])}
transformed_dataset = dataset.map(transform_lengths)
print_dictionary_dataset(transformed_dataset)
``````
```Element 0:
mean_length = 7
length_ranges = <tf.RaggedTensor [[0, 1, 2, 3, 4, 5, 6]]>
Element 1:
mean_length = 0
length_ranges = <tf.RaggedTensor []>
Element 2:
mean_length = 2
length_ranges = <tf.RaggedTensor [[0], [0, 1, 2]]>
Element 3:
mean_length = 3
length_ranges = <tf.RaggedTensor [[0, 1, 2], [0, 1, 2, 3, 4], [0, 1]]>
```

### tf.function

tf.function 是预计算 Python 函数的 TensorFlow 计算图的装饰器，它可以大幅改善 TensorFlow 代码的性能。不规则张量能够透明地与 `@tf.function` 装饰的函数一起使用。例如，以下函数对不规则张量和非不规则张量均有效：

``````@tf.function
def make_palindrome(x, axis):
return tf.concat([x, tf.reverse(x, [axis])], axis)
``````
``````make_palindrome(tf.constant([[1, 2], [3, 4], [5, 6]]), axis=1)
``````
```<tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[1, 2, 2, 1],
[3, 4, 4, 3],
[5, 6, 6, 5]], dtype=int32)>
```
``````make_palindrome(tf.ragged.constant([[1, 2], [3], [4, 5, 6]]), axis=1)
``````
```2022-06-03 18:47:23.945175: W tensorflow/core/grappler/optimizers/loop_optimizer.cc:907] Skipping loop optimization for Merge node with control input: RaggedConcat/assert_equal_1/Assert/AssertGuard/branch_executed/_9
<tf.RaggedTensor [[1, 2, 2, 1], [3, 3], [4, 5, 6, 6, 5, 4]]>
```

``````@tf.function(
input_signature=[tf.RaggedTensorSpec(shape=[None, None], dtype=tf.int32)])
def max_and_min(rt):
return (tf.math.reduce_max(rt, axis=-1), tf.math.reduce_min(rt, axis=-1))

max_and_min(tf.ragged.constant([[1, 2], [3], [4, 5, 6]]))
``````
```(<tf.Tensor: shape=(3,), dtype=int32, numpy=array([2, 3, 6], dtype=int32)>,
<tf.Tensor: shape=(3,), dtype=int32, numpy=array([1, 3, 4], dtype=int32)>)
```

#### 具体函数

``````# Preferred way to use ragged tensors with concrete functions (TF 2.3+):
try:
@tf.function
def increment(x):
return x + 1

rt = tf.ragged.constant([[1, 2], [3], [4, 5, 6]])
cf = increment.get_concrete_function(rt)
print(cf(rt))
except Exception as e:
print(f"Not supported before TF 2.3: {type(e)}: {e}")
``````
```<tf.RaggedTensor [[2, 3], [4], [5, 6, 7]]>
```

### SavedModel

SavedModel 是序列化 TensorFlow 程序，包括权重和计算。它可以通过 Keras 模型或自定义模型构建。在任何一种情况下，不规则张量都可以透明地与 SavedModel 定义的函数和方法一起使用。

#### 示例：保存 Keras 模型

``````import tempfile

keras_module_path = tempfile.mkdtemp()
tf.saved_model.save(keras_model, keras_module_path)
imported_model(hashed_words)
``````
```WARNING:absl:Function `_wrapped_model` contains input name(s) args_0 with unsupported characters which will be renamed to args_0_1 in the SavedModel.
WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.trackable_utils has been moved to tensorflow.python.trackable.trackable_utils. The old module will be deleted in version 2.11.
WARNING:tensorflow:Please fix your imports. Module tensorflow.python.training.tracking.trackable_utils has been moved to tensorflow.python.trackable.trackable_utils. The old module will be deleted in version 2.11.
INFO:tensorflow:Assets written to: /tmpfs/tmp/tmp2sjv1ua1/assets
INFO:tensorflow:Assets written to: /tmpfs/tmp/tmp2sjv1ua1/assets
<tf.Tensor: shape=(4, 1), dtype=float32, numpy=
array([[-0.00731867],
[-0.00921252],
[-0.01391566],
[-0.00607282]], dtype=float32)>
```

#### 示例：保存自定义模型

``````class CustomModule(tf.Module):
def __init__(self, variable_value):
super(CustomModule, self).__init__()
self.v = tf.Variable(variable_value)

@tf.function
def grow(self, x):
return x * self.v

module = CustomModule(100.0)

# Before saving a custom model, we must ensure that concrete functions are
# built for each input signature that we will need.
module.grow.get_concrete_function(tf.RaggedTensorSpec(shape=[None, None],
dtype=tf.float32))

custom_module_path = tempfile.mkdtemp()
tf.saved_model.save(module, custom_module_path)
imported_model.grow(tf.ragged.constant([[1.0, 4.0, 3.0], [2.0]]))
``````
```INFO:tensorflow:Assets written to: /tmpfs/tmp/tmpc6cvl8ec/assets
INFO:tensorflow:Assets written to: /tmpfs/tmp/tmpc6cvl8ec/assets
<tf.RaggedTensor [[100.0, 400.0, 300.0], [200.0]]>
```

## 重载运算符

`RaggedTensor` 类会重载标准 Python 算术和比较运算符，使其易于执行基本的逐元素数学：

``````x = tf.ragged.constant([[1, 2], [3], [4, 5, 6]])
y = tf.ragged.constant([[1, 1], [2], [3, 3, 3]])
print(x + y)
``````
```<tf.RaggedTensor [[2, 3], [5], [7, 8, 9]]>
```

``````x = tf.ragged.constant([[1, 2], [3], [4, 5, 6]])
print(x + 3)
``````
```<tf.RaggedTensor [[4, 5], [6], [7, 8, 9]]>
```

## 索引

### 索引示例：二维不规则张量

``````queries = tf.ragged.constant(
[['Who', 'is', 'George', 'Washington'],
['What', 'is', 'the', 'weather', 'tomorrow'],
['Goodnight']])
``````
``````print(queries[1])                   # A single query
``````
```tf.Tensor([b'What' b'is' b'the' b'weather' b'tomorrow'], shape=(5,), dtype=string)
```
``````print(queries[1, 2])                # A single word
``````
```tf.Tensor(b'the', shape=(), dtype=string)
```
``````print(queries[1:])                  # Everything but the first row
``````
```<tf.RaggedTensor [[b'What', b'is', b'the', b'weather', b'tomorrow'], [b'Goodnight']]>
```
``````print(queries[:, :3])               # The first 3 words of each query
``````
```<tf.RaggedTensor [[b'Who', b'is', b'George'], [b'What', b'is', b'the'], [b'Goodnight']]>
```
``````print(queries[:, -2:])              # The last 2 words of each query
``````
```<tf.RaggedTensor [[b'George', b'Washington'], [b'weather', b'tomorrow'], [b'Goodnight']]>
```

### 索引示例：三维不规则张量

``````rt = tf.ragged.constant([[[1, 2, 3], [4]],
[[5], [], [6]],
[[7]],
[[8, 9], [10]]])
``````
``````print(rt[1])                        # Second row (2-D RaggedTensor)
``````
```<tf.RaggedTensor [[5], [], [6]]>
```
``````print(rt[3, 0])                     # First element of fourth row (1-D Tensor)
``````
```tf.Tensor([8 9], shape=(2,), dtype=int32)
```
``````print(rt[:, 1:3])                   # Items 1-3 of each row (3-D RaggedTensor)
``````
```<tf.RaggedTensor [[[4]], [[], [6]], [], [[10]]]>
```
``````print(rt[:, -1:])                   # Last item of each row (3-D RaggedTensor)
``````
```<tf.RaggedTensor [[[4]],

[[6]],

[[7]],

[[10]]]>
```

`RaggedTensor` 支持多维索引和切片，但有一个限制：不允许索引一个不规则维度。这种情况是有问题的，因为指示的值可能在某些行中存在，而在其他行中不存在。这种情况下，我们不知道是应该 (1) 引发 `IndexError`；(2) 使用默认值；还是 (3) 跳过该值并返回一个行数比开始时少的张量。根据 Python 的指导原则（“当面对不明确的情况时，不要尝试去猜测”），我们目前不允许此运算。

## 张量类型转换

`RaggedTensor` 类定义了可用于在 `RaggedTensor``tf.Tensor``tf.SparseTensors` 之间转换的方法：

``````ragged_sentences = tf.ragged.constant([
['Hi'], ['Welcome', 'to', 'the', 'fair'], ['Have', 'fun']])
``````
``````# RaggedTensor -> Tensor
print(ragged_sentences.to_tensor(default_value='', shape=[None, 10]))
``````
```tf.Tensor(
[[b'Hi' b'' b'' b'' b'' b'' b'' b'' b'' b'']
[b'Welcome' b'to' b'the' b'fair' b'' b'' b'' b'' b'' b'']
[b'Have' b'fun' b'' b'' b'' b'' b'' b'' b'' b'']], shape=(3, 10), dtype=string)
```
``````# Tensor -> RaggedTensor
x = [[1, 3, -1, -1], [2, -1, -1, -1], [4, 5, 8, 9]]
``````
```<tf.RaggedTensor [[1, 3], [2], [4, 5, 8, 9]]>
```
``````#RaggedTensor -> SparseTensor
print(ragged_sentences.to_sparse())
``````
```SparseTensor(indices=tf.Tensor(
[[0 0]
[1 0]
[1 1]
[1 2]
[1 3]
[2 0]
[2 1]], shape=(7, 2), dtype=int64), values=tf.Tensor([b'Hi' b'Welcome' b'to' b'the' b'fair' b'Have' b'fun'], shape=(7,), dtype=string), dense_shape=tf.Tensor([3 4], shape=(2,), dtype=int64))
```
``````# SparseTensor -> RaggedTensor
st = tf.SparseTensor(indices=[[0, 0], [2, 0], [2, 1]],
values=['a', 'b', 'c'],
dense_shape=[3, 3])
print(tf.RaggedTensor.from_sparse(st))
``````
```<tf.RaggedTensor [[b'a'], [], [b'b', b'c']]>
```

## 评估不规则张量

1. 使用 `tf.RaggedTensor.to_list()` 将不规则张量转换为嵌套 Python 列表。
2. 使用 `tf.RaggedTensor.numpy()` 将不规则张量转换为 numpy 数组，数组的值是嵌套的 numpy 数组。
3. 使用 `tf.RaggedTensor.values``tf.RaggedTensor.row_splits` 属性，或 `tf.RaggedTensor.row_lengths()``tf.RaggedTensor.value_rowids()` 之类的行分区方法，将不规则张量分解成其分量。
4. 使用 Python 索引从不规则张量中选择值。
``````rt = tf.ragged.constant([[1, 2], [3, 4, 5], [6], [], [7]])
print("python list:", rt.to_list())
print("numpy array:", rt.numpy())
print("values:", rt.values.numpy())
print("splits:", rt.row_splits.numpy())
print("indexed value:", rt[1].numpy())
``````
```python list: [[1, 2], [3, 4, 5], [6], [], [7]]
numpy array: [array([1, 2], dtype=int32) array([3, 4, 5], dtype=int32)
array([6], dtype=int32) array([], dtype=int32) array([7], dtype=int32)]
values: [1 2 3 4 5 6 7]
splits: [0 2 5 6 6 7]
indexed value: [3 4 5]
```

## 广播

1. 如果 `x``y` 没有相同的维数，则增加外层维度（使用大小 1），直至它们具有相同的维数。

2. 对于 `x``y` 的大小不同的每一个维度：

• 如果 `x``y``d` 维中的大小为 `1`，则跨 `d` 维重复其值以匹配其他输入的大小。

• 否则，引发异常（`x``y` 非广播兼容）。

### 广播示例

``````# x       (2D ragged):  2 x (num_rows)
# y       (scalar)
# result  (2D ragged):  2 x (num_rows)
x = tf.ragged.constant([[1, 2], [3]])
y = 3
print(x + y)
``````
```<tf.RaggedTensor [[4, 5], [6]]>
```
``````# x         (2d ragged):  3 x (num_rows)
# y         (2d tensor):  3 x          1
# Result    (2d ragged):  3 x (num_rows)
x = tf.ragged.constant(
[[10, 87, 12],
[19, 53],
[12, 32]])
y = [[1000], [2000], [3000]]
print(x + y)
``````
```<tf.RaggedTensor [[1010, 1087, 1012], [2019, 2053], [3012, 3032]]>
```
``````# x      (3d ragged):  2 x (r1) x 2
# y      (2d ragged):         1 x 1
# Result (3d ragged):  2 x (r1) x 2
x = tf.ragged.constant(
[[[1, 2], [3, 4], [5, 6]],
[[7, 8]]],
ragged_rank=1)
y = tf.constant([[10]])
print(x + y)
``````
```<tf.RaggedTensor [[[11, 12],
[13, 14],
[15, 16]], [[17, 18]]]>
```
``````# x      (3d ragged):  2 x (r1) x (r2) x 1
# y      (1d tensor):                    3
# Result (3d ragged):  2 x (r1) x (r2) x 3
x = tf.ragged.constant(
[
[
[[1], [2]],
[],
[[3]],
[[4]],
],
[
[[5], [6]],
[[7]]
]
],
ragged_rank=2)
y = tf.constant([10, 20, 30])
print(x + y)
``````
```<tf.RaggedTensor [[[[11, 21, 31],
[12, 22, 32]], [], [[13, 23, 33]], [[14, 24, 34]]],
[[[15, 25, 35],
[16, 26, 36]], [[17, 27, 37]]]]>
```

``````# x      (2d ragged): 3 x (r1)
# y      (2d tensor): 3 x    4  # trailing dimensions do not match
x = tf.ragged.constant([[1, 2], [3, 4, 5, 6], [7]])
y = tf.constant([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
try:
x + y
except tf.errors.InvalidArgumentError as exception:
print(exception)
``````
```Condition x == y did not hold.
Indices of first 3 different values:
[[1]
[2]
[3]]
Corresponding x values:
[ 4  8 12]
Corresponding y values:
[2 6 7]
First 3 elements of x:
[0 4 8]
First 3 elements of y:
[0 2 6]
```
``````# x      (2d ragged): 3 x (r1)
# y      (2d ragged): 3 x (r2)  # ragged dimensions do not match.
x = tf.ragged.constant([[1, 2, 3], [4], [5, 6]])
y = tf.ragged.constant([[10, 20], [30, 40], [50]])
try:
x + y
except tf.errors.InvalidArgumentError as exception:
print(exception)
``````
```Condition x == y did not hold.
Indices of first 2 different values:
[[1]
[3]]
Corresponding x values:
[3 6]
Corresponding y values:
[2 5]
First 3 elements of x:
[0 3 4]
First 3 elements of y:
[0 2 4]
```
``````# x      (3d ragged): 3 x (r1) x 2
# y      (3d ragged): 3 x (r1) x 3  # trailing dimensions do not match
x = tf.ragged.constant([[[1, 2], [3, 4], [5, 6]],
[[7, 8], [9, 10]]])
y = tf.ragged.constant([[[1, 2, 0], [3, 4, 0], [5, 6, 0]],
[[7, 8, 0], [9, 10, 0]]])
try:
x + y
except tf.errors.InvalidArgumentError as exception:
print(exception)
``````
```Condition x == y did not hold.
Indices of first 3 different values:
[[1]
[2]
[3]]
Corresponding x values:
[2 4 6]
Corresponding y values:
[3 6 9]
First 3 elements of x:
[0 2 4]
First 3 elements of y:
[0 3 6]
```

## RaggedTensor 编码

• 一个 `values` 张量，它将可变长度行连接成扁平列表。
• 一个 `row_partition`，它指示如何将这些扁平值分成各行。

• `row_splits` 是一个整型向量，用于指定行之间的拆分点。
• `value_rowids` 是一个整型向量，用于指定每个值的行索引。
• `row_lengths` 是一个整型向量，用于指定每一行的长度。
• `uniform_row_length` 是一个整型标量，用于指定所有行的单个长度。

``````rt = tf.RaggedTensor.from_row_splits(
values=[3, 1, 4, 1, 5, 9, 2],
row_splits=[0, 4, 4, 6, 7])
print(rt)
``````
```<tf.RaggedTensor [[3, 1, 4, 1], [], [5, 9], [2]]>
```

• 高效索引`row_splits` 编码可以实现不规则张量的恒定时间索引和切片。

• 高效连接`row_lengths` 编码在连接不规则张量时更有效，因为当两个张量连接在一起时，行长度不会改变。

• 较小的编码大小`value_rowids` 编码在存储有大量空行的不规则张量时更有效，因为张量的大小只取决于值的总数。另一方面，`row_splits``row_lengths` 编码在存储具有较长行的不规则张量时更有效，因为它们每行只需要一个标量值。

• 兼容性`value_rowids` 方案与 `tf.math.segment_sum` 等运算使用的分段格式相匹配。`row_limits` 方案与 `tf.sequence_mask` 等运算使用的格式相匹配。

• 均匀维度：如下文所述，`uniform_row_length` 编码用于对具有均匀维度的不规则张量进行编码。

### 多个不规则维度

``````rt = tf.RaggedTensor.from_row_splits(
values=tf.RaggedTensor.from_row_splits(
values=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
row_splits=[0, 3, 3, 5, 9, 10]),
row_splits=[0, 1, 1, 5])
print(rt)
print("Shape: {}".format(rt.shape))
print("Number of partitioned dimensions: {}".format(rt.ragged_rank))
``````
```<tf.RaggedTensor [[[10, 11, 12]], [], [[], [13, 14], [15, 16, 17, 18], [19]]]>
Shape: (3, None, None)
Number of partitioned dimensions: 2
```

``````rt = tf.RaggedTensor.from_nested_row_splits(
flat_values=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
nested_row_splits=([0, 1, 1, 5], [0, 3, 3, 5, 9, 10]))
print(rt)
``````
```<tf.RaggedTensor [[[10, 11, 12]], [], [[], [13, 14], [15, 16, 17, 18], [19]]]>
```

### 不规则秩和扁平值

``````# shape = [batch, (paragraph), (sentence), (word)]
conversations = tf.ragged.constant(
[[[["I", "like", "ragged", "tensors."]],
[["Oh", "yeah?"], ["What", "can", "you", "use", "them", "for?"]],
[["Processing", "variable", "length", "data!"]]],
[[["I", "like", "cheese."], ["Do", "you?"]],
[["Yes."], ["I", "do."]]]])
conversations.shape
``````
```TensorShape([2, None, None, None])
```
``````assert conversations.ragged_rank == len(conversations.nested_row_splits)
conversations.ragged_rank  # Number of partitioned dimensions.
``````
```3
```
``````conversations.flat_values.numpy()
``````
```array([b'I', b'like', b'ragged', b'tensors.', b'Oh', b'yeah?', b'What',
b'can', b'you', b'use', b'them', b'for?', b'Processing',
b'variable', b'length', b'data!', b'I', b'like', b'cheese.', b'Do',
b'you?', b'Yes.', b'I', b'do.'], dtype=object)
```

### 均匀内层维度

``````rt = tf.RaggedTensor.from_row_splits(
values=[[1, 3], [0, 0], [1, 3], [5, 3], [3, 3], [1, 2]],
row_splits=[0, 3, 4, 6])
print(rt)
print("Shape: {}".format(rt.shape))
print("Number of partitioned dimensions: {}".format(rt.ragged_rank))
print("Flat values shape: {}".format(rt.flat_values.shape))
print("Flat values:\n{}".format(rt.flat_values))
``````
```<tf.RaggedTensor [[[1, 3],
[0, 0],
[1, 3]], [[5, 3]], [[3, 3],
[1, 2]]]>
Shape: (3, None, 2)
Number of partitioned dimensions: 1
Flat values shape: (6, 2)
Flat values:
[[1 3]
[0 0]
[1 3]
[5 3]
[3 3]
[1 2]]
```

### 均匀非内层维度

``````rt = tf.RaggedTensor.from_uniform_row_length(
values=tf.RaggedTensor.from_row_splits(
values=[10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
row_splits=[0, 3, 5, 9, 10]),
uniform_row_length=2)
print(rt)
print("Shape: {}".format(rt.shape))
print("Number of partitioned dimensions: {}".format(rt.ragged_rank))
``````
```<tf.RaggedTensor [[[10, 11, 12], [13, 14]],
[[15, 16, 17, 18], [19]]]>
Shape: (2, 2, None)
Number of partitioned dimensions: 2
```
[]
[]