หน้านี้ได้รับการแปลโดย Cloud Translation API

การเรียนรู้ภาษาเชิงลึกที่ไม่แน่นอนด้วย BERT-SNGP

ดูบน TensorFlow.org

ใน บทช่วยสอน SNGP คุณได้เรียนรู้วิธีสร้างแบบจำลอง SNGP บนเครือข่ายที่เหลือลึกเพื่อปรับปรุงความสามารถในการหาปริมาณความไม่แน่นอน ในบทช่วยสอนนี้ คุณจะนำ SNGP ไปใช้กับงานการทำความเข้าใจภาษาธรรมชาติ (NLU) โดยสร้างทับตัวเข้ารหัส BERT แบบลึกเพื่อปรับปรุงความสามารถของโมเดล NLU เชิงลึกในการตรวจหาการสืบค้นที่อยู่นอกขอบเขต

โดยเฉพาะอย่างยิ่ง คุณจะ:

สร้าง BERT-SNGP ซึ่งเป็นแบบจำลอง BERT ที่เสริมด้วย SNGP
โหลดชุดข้อมูลการตรวจจับเจตนา นอกขอบเขต (OOS) ของ CLINC
ฝึกโมเดล BERT-SNGP
ประเมินประสิทธิภาพของโมเดล BERT-SNGP ในการสอบเทียบความไม่แน่นอนและการตรวจจับนอกโดเมน

นอกเหนือจาก CLINC OOS แล้ว โมเดล SNGP ยังใช้กับชุดข้อมูลขนาดใหญ่ เช่น การตรวจจับความเป็นพิษของ Jigsaw และชุดข้อมูลภาพ เช่น CIFAR-100 และ ImageNet สำหรับผลลัพธ์การเปรียบเทียบของ SNGP และวิธีการที่ไม่แน่นอนอื่นๆ รวมถึงการนำไปใช้งานคุณภาพสูงด้วยสคริปต์การฝึกอบรม/การประเมินแบบ end-to-end คุณสามารถดูเกณฑ์มาตรฐาน ความไม่แน่นอน ได้

ติดตั้ง

pip uninstall -y tensorflow tf-text

pip install -U tensorflow-text-nightly

pip install -U tf-nightly

pip install -U tf-models-nightly

import matplotlib.pyplot as plt

import sklearn.metrics
import sklearn.calibration

import tensorflow_hub as hub
import tensorflow_datasets as tfds

import numpy as np
import tensorflow as tf

import official.nlp.modeling.layers as layers
import official.nlp.optimization as optimization

/tmpfs/src/tf_docs_env/lib/python3.7/site-packages/tensorflow_addons/utils/ensure_tf_install.py:43: UserWarning: You are currently using a nightly version of TensorFlow (2.9.0-dev20220203). 
TensorFlow Addons offers no support for the nightly versions of TensorFlow. Some things might work, some other might not. 
If you encounter a bug, do not file an issue on GitHub.
  UserWarning,

บทช่วยสอนนี้ต้องการให้ GPU ทำงานอย่างมีประสิทธิภาพ ตรวจสอบว่า GPU พร้อมใช้งานหรือไม่

tf.__version__

'2.9.0-dev20220203'

gpus = tf.config.list_physical_devices('GPU')
gpus

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

assert gpus, """
  No GPU(s) found! This tutorial will take many hours to run without a GPU.

  You may hit this error if the installed tensorflow package is not
  compatible with the CUDA and CUDNN versions."""

ขั้นแรกให้ใช้ตัวแยกประเภท BERT มาตรฐานตาม ข้อความการจัดประเภทด้วยบทช่วยสอน BERT เราจะใช้ตัวเข้ารหัส BERT-base และ ClassificationHead ในตัวเป็นตัวแยกประเภท

รุ่นมาตรฐาน BERT

PREPROCESS_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3'
MODEL_HANDLE = 'https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3'

class BertClassifier(tf.keras.Model):
  def __init__(self, 
               num_classes=150, inner_dim=768, dropout_rate=0.1,
               **classifier_kwargs):

    super().__init__()
    self.classifier_kwargs = classifier_kwargs

    # Initiate the BERT encoder components.
    self.bert_preprocessor = hub.KerasLayer(PREPROCESS_HANDLE, name='preprocessing')
    self.bert_hidden_layer = hub.KerasLayer(MODEL_HANDLE, trainable=True, name='bert_encoder')

    # Defines the encoder and classification layers.
    self.bert_encoder = self.make_bert_encoder()
    self.classifier = self.make_classification_head(num_classes, inner_dim, dropout_rate)

  def make_bert_encoder(self):
    text_inputs = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text')
    encoder_inputs = self.bert_preprocessor(text_inputs)
    encoder_outputs = self.bert_hidden_layer(encoder_inputs)
    return tf.keras.Model(text_inputs, encoder_outputs)

  def make_classification_head(self, num_classes, inner_dim, dropout_rate):
    return layers.ClassificationHead(
        num_classes=num_classes, 
        inner_dim=inner_dim,
        dropout_rate=dropout_rate,
        **self.classifier_kwargs)

  def call(self, inputs, **kwargs):
    encoder_outputs = self.bert_encoder(inputs)
    classifier_inputs = encoder_outputs['sequence_output']
    return self.classifier(classifier_inputs, **kwargs)

สร้างโมเดล SNGP

ในการใช้งานโมเดล BERT-SNGP คุณจะต้องแทนที่ ClassificationHead ด้วย GaussianProcessClassificationHead ในตัว การทำให้เป็นมาตรฐานของสเปกตรัมได้รับการบรรจุไว้ล่วงหน้าในส่วนหัวของการจัดหมวดหมู่นี้แล้ว เช่นเดียวกับใน บทช่วยสอน SNGP ให้เพิ่มการเรียกกลับการรีเซ็ตความแปรปรวนร่วมให้กับโมเดล ดังนั้นโมเดลจะรีเซ็ตตัวประมาณค่าความแปรปรวนร่วมโดยอัตโนมัติที่จุดเริ่มต้นของยุคใหม่เพื่อหลีกเลี่ยงการนับข้อมูลเดียวกันซ้ำสองครั้ง

class ResetCovarianceCallback(tf.keras.callbacks.Callback):

  def on_epoch_begin(self, epoch, logs=None):
    """Resets covariance matrix at the begining of the epoch."""
    if epoch > 0:
      self.model.classifier.reset_covariance_matrix()

class SNGPBertClassifier(BertClassifier):

  def make_classification_head(self, num_classes, inner_dim, dropout_rate):
    return layers.GaussianProcessClassificationHead(
        num_classes=num_classes, 
        inner_dim=inner_dim,
        dropout_rate=dropout_rate,
        gp_cov_momentum=-1,
        temperature=30.,
        **self.classifier_kwargs)

  def fit(self, *args, **kwargs):
    """Adds ResetCovarianceCallback to model callbacks."""
    kwargs['callbacks'] = list(kwargs.get('callbacks', []))
    kwargs['callbacks'].append(ResetCovarianceCallback())

    return super().fit(*args, **kwargs)

โหลดชุดข้อมูล CLINC OOS

ตอนนี้โหลดชุดข้อมูลการตรวจหาเจตนา CLINC OOS ชุดข้อมูลนี้ประกอบด้วยข้อความค้นหาที่พูดของผู้ใช้ 15,000 รายการที่รวบรวมไว้มากกว่า 150 คลาสเจตนา และยังมีประโยคนอกโดเมน (OOD) 1,000 ประโยคที่ไม่ครอบคลุมโดยคลาสที่รู้จักใดๆ

(clinc_train, clinc_test, clinc_test_oos), ds_info = tfds.load(
    'clinc_oos', split=['train', 'test', 'test_oos'], with_info=True, batch_size=-1)

ทำข้อมูลรถไฟและทดสอบ

train_examples = clinc_train['text']
train_labels = clinc_train['intent']

# Makes the in-domain (IND) evaluation data.
ind_eval_data = (clinc_test['text'], clinc_test['intent'])

สร้างชุดข้อมูลการประเมิน OOD สำหรับสิ่งนี้ ให้รวมข้อมูลการทดสอบในโดเมน clinc_test และข้อมูลนอกโดเมน clinc_test_oos เราจะกำหนดป้ายกำกับ 0 ให้กับตัวอย่างในโดเมน และป้ายกำกับ 1 ให้กับตัวอย่างนอกโดเมน

test_data_size = ds_info.splits['test'].num_examples
oos_data_size = ds_info.splits['test_oos'].num_examples

# Combines the in-domain and out-of-domain test examples.
oos_texts = tf.concat([clinc_test['text'], clinc_test_oos['text']], axis=0)
oos_labels = tf.constant([0] * test_data_size + [1] * oos_data_size)

# Converts into a TF dataset.
ood_eval_dataset = tf.data.Dataset.from_tensor_slices(
    {"text": oos_texts, "label": oos_labels})

ฝึกฝนและประเมินผล

ขั้นแรกให้ตั้งค่าการกำหนดค่าการฝึกขั้นพื้นฐาน

TRAIN_EPOCHS = 3
TRAIN_BATCH_SIZE = 32
EVAL_BATCH_SIZE = 256

def bert_optimizer(learning_rate, 
                   batch_size=TRAIN_BATCH_SIZE, epochs=TRAIN_EPOCHS, 
                   warmup_rate=0.1):
  """Creates an AdamWeightDecay optimizer with learning rate schedule."""
  train_data_size = ds_info.splits['train'].num_examples

  steps_per_epoch = int(train_data_size / batch_size)
  num_train_steps = steps_per_epoch * epochs
  num_warmup_steps = int(warmup_rate * num_train_steps)  

  # Creates learning schedule.
  lr_schedule = tf.keras.optimizers.schedules.PolynomialDecay(
      initial_learning_rate=learning_rate,
      decay_steps=num_train_steps,
      end_learning_rate=0.0)  

  return optimization.AdamWeightDecay(
      learning_rate=lr_schedule,
      weight_decay_rate=0.01,
      epsilon=1e-6,
      exclude_from_weight_decay=['LayerNorm', 'layer_norm', 'bias'])

optimizer = bert_optimizer(learning_rate=1e-4)
loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
metrics = tf.metrics.SparseCategoricalAccuracy()

fit_configs = dict(batch_size=TRAIN_BATCH_SIZE,
                   epochs=TRAIN_EPOCHS,
                   validation_batch_size=EVAL_BATCH_SIZE, 
                   validation_data=ind_eval_data)

sngp_model = SNGPBertClassifier()
sngp_model.compile(optimizer=optimizer, loss=loss, metrics=metrics)
sngp_model.fit(train_examples, train_labels, **fit_configs)

Epoch 1/3
469/469 [==============================] - 219s 427ms/step - loss: 1.0725 - sparse_categorical_accuracy: 0.7870 - val_loss: 0.4358 - val_sparse_categorical_accuracy: 0.9380
Epoch 2/3
469/469 [==============================] - 198s 422ms/step - loss: 0.0885 - sparse_categorical_accuracy: 0.9797 - val_loss: 0.2424 - val_sparse_categorical_accuracy: 0.9518
Epoch 3/3
469/469 [==============================] - 199s 424ms/step - loss: 0.0259 - sparse_categorical_accuracy: 0.9951 - val_loss: 0.1927 - val_sparse_categorical_accuracy: 0.9642
<keras.callbacks.History at 0x7fe24c0a7090>

ประเมินประสิทธิภาพของ OOD

ประเมินว่าโมเดลสามารถตรวจจับการสืบค้นข้อมูลนอกโดเมนที่ไม่คุ้นเคยได้ดีเพียงใด สำหรับการประเมินอย่างเข้มงวด ให้ใช้ชุดข้อมูลการประเมิน OOD ood_eval_dataset สร้างไว้ก่อนหน้านี้

def oos_predict(model, ood_eval_dataset, **model_kwargs):
  oos_labels = []
  oos_probs = []

  ood_eval_dataset = ood_eval_dataset.batch(EVAL_BATCH_SIZE)
  for oos_batch in ood_eval_dataset:
    oos_text_batch = oos_batch["text"]
    oos_label_batch = oos_batch["label"] 

    pred_logits = model(oos_text_batch, **model_kwargs)
    pred_probs_all = tf.nn.softmax(pred_logits, axis=-1)
    pred_probs = tf.reduce_max(pred_probs_all, axis=-1)

    oos_labels.append(oos_label_batch)
    oos_probs.append(pred_probs)

  oos_probs = tf.concat(oos_probs, axis=0)
  oos_labels = tf.concat(oos_labels, axis=0) 

  return oos_probs, oos_labels

คำนวณความน่าจะเป็น OOD เป็น \(1 - p(x)\)โดยที่ \(p(x)=softmax(logit(x))\) คือความน่าจะเป็นที่คาดการณ์ได้

sngp_probs, ood_labels = oos_predict(sngp_model, ood_eval_dataset)

ood_probs = 1 - sngp_probs

ตอนนี้ประเมินว่าคะแนนความไม่แน่นอนของโมเดล ood_probs คาดการณ์ป้ายกำกับนอกโดเมนได้ดีเพียงใด ขั้นแรกให้คำนวณพื้นที่ภายใต้เส้นโค้งการเรียกคืนที่แม่นยำ (AUPRC) เพื่อความน่าจะเป็นของ OOD เทียบกับความแม่นยำในการตรวจจับ OOD

precision, recall, _ = sklearn.metrics.precision_recall_curve(ood_labels, ood_probs)

auprc = sklearn.metrics.auc(recall, precision)
print(f'SNGP AUPRC: {auprc:.4f}')

SNGP AUPRC: 0.9039

ซึ่งตรงกับประสิทธิภาพของ SNGP ที่รายงานที่เกณฑ์มาตรฐาน CLINC OOS ภายใต้ Uncertainty Baselines

ต่อไป ตรวจสอบคุณภาพของแบบจำลองใน การสอบเทียบความไม่แน่นอน กล่าวคือ ความน่าจะเป็นเชิงคาดการณ์ของแบบจำลองนั้นสอดคล้องกับความแม่นยำในการทำนายหรือไม่ แบบจำลองที่สอบเทียบมาอย่างดีถือว่าน่าเชื่อถือ เนื่องจากตัวอย่างเช่น ความน่าจะเป็นที่คาดการณ์ได้ \(p(x)=0.8\) หมายความว่าแบบจำลองนั้นถูกต้อง 80% ของเวลาทั้งหมด

prob_true, prob_pred = sklearn.calibration.calibration_curve(
    ood_labels, ood_probs, n_bins=10, strategy='quantile')

plt.plot(prob_pred, prob_true)

plt.plot([0., 1.], [0., 1.], c='k', linestyle="--")
plt.xlabel('Predictive Probability')
plt.ylabel('Predictive Accuracy')
plt.title('Calibration Plots, SNGP')

plt.show()

png

แหล่งข้อมูลและการอ่านเพิ่มเติม

ดู บทช่วยสอน SNGP สำหรับคำแนะนำโดยละเอียดเกี่ยวกับการใช้ SNGP ตั้งแต่เริ่มต้น
ดูข้อมูล พื้นฐานความไม่แน่นอน สำหรับการนำแบบจำลอง SNGP ไปใช้ (และวิธีการที่ไม่แน่นอนอื่นๆ มากมาย) ในชุดข้อมูลเปรียบเทียบที่หลากหลาย (เช่น CIFAR , ImageNet , การตรวจจับความเป็นพิษของจิ๊กซอว์ เป็นต้น)
เพื่อความเข้าใจที่ลึกซึ้งยิ่งขึ้นเกี่ยวกับวิธี SNGP ให้ดูบทความ การประเมินความไม่แน่นอนแบบง่ายและตามหลักการด้วยการเรียนรู้เชิงลึกเชิงกำหนดผ่านการรับรู้ทางไกล