迁移指南

从 v2.x 迁移到 v3.x

v3 Sentence Transformers 版本重构了 SentenceTransformer 嵌入模型的训练,用 SentenceTransformerTrainerSentenceTransformerTrainingArguments 替换了 SentenceTransformer.fit。此更新温和地弃用了 SentenceTransformer.fit,这意味着它仍然有效,但建议切换到新的 v3.x 训练格式。在幕后,此方法现在使用新的训练器。

警告

如果您没有使用 SentenceTransformer.fit 的代码,那么您无需对代码进行任何更改即可从 v2.x 更新到 v3.x。

如果您使用了,您的代码仍然有效,但建议切换到新的 v3.x 训练格式,因为它允许更多的训练参数和功能。有关更多详细信息,请参阅 训练概述

新旧训练流程

v2.x

v3.x(推荐)

from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader

# 1. Define the model. Either from scratch of by loading a pre-trained model
model = SentenceTransformer("microsoft/mpnet-base")

# 2. Define your train examples. You need more than just two examples...
train_examples = [
    InputExample(texts=[
        "A person on a horse jumps over a broken down airplane.",
        "A person is outdoors, on a horse.",
        "A person is at a diner, ordering an omelette.",
    ]),
    InputExample(texts=[
        "Children smiling and waving at camera",
        "There are children present",
        "The kids are frowning",
    ]),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# 3. Define a loss function
train_loss = losses.MultipleNegativesRankingLoss(model)

# 4. Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=1,
    warmup_steps=100,
)

# 5. Save the trained model
model.save_pretrained("models/mpnet-base-all-nli")
from datasets import load_dataset
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer
from sentence_transformers.losses import MultipleNegativesRankingLoss

# 1. Define the model. Either from scratch of by loading a pre-trained model
model = SentenceTransformer("microsoft/mpnet-base")

# 2. Load a dataset to finetune on
dataset = load_dataset("sentence-transformers/all-nli", "triplet")
train_dataset = dataset["train"].select(range(10_000))
eval_dataset = dataset["dev"].select(range(1_000))

# 3. Define a loss function
loss = MultipleNegativesRankingLoss(model)

# 4. Create a trainer & train
trainer = SentenceTransformerTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
)
trainer.train()

# 5. Save the trained model
model.save_pretrained("models/mpnet-base-all-nli")
# model.push_to_hub("mpnet-base-all-nli")

SentenceTransformer.fit 迁移特定参数

SentenceTransformer.fit(train_objectives)

v2.x

v3.x(推荐)

from sentence_transformers import SentenceTransformer, InputExample, losses
from torch.utils.data import DataLoader

# Define a training dataloader
train_examples = [
    InputExample(texts=[
        "A person on a horse jumps over a broken down airplane.",
        "A person is outdoors, on a horse.",
        "A person is at a diner, ordering an omelette.",
    ]),
    InputExample(texts=[
        "Children smiling and waving at camera",
        "There are children present",
        "The kids are frowning",
    ]),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# Define a loss function
train_loss = losses.MultipleNegativesRankingLoss(model)

# Finetune the model
model.fit(train_objectives=[(train_dataloader, train_loss)])
from datasets import Dataset
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainer
from sentence_transformers.losses import MultipleNegativesRankingLoss

# Define a training dataset
train_examples = [
    {
        "anchor": "A person on a horse jumps over a broken down airplane.",
        "positive": "A person is outdoors, on a horse.",
        "negative": "A person is at a diner, ordering an omelette.",
    },
    {
        "anchor": "Children smiling and waving at camera",
        "positive": "There are children present",
        "negative": "The kids are frowning",
    },
]
train_dataset = Dataset.from_list(train_examples)

# Define a loss function
loss = MultipleNegativesRankingLoss(model)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(evaluator)

v2.x

v3.x(推荐)

...

# Load an evaluator
evaluator = NanoBEIREvaluator()

# Finetune with an evaluator
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
)
# Load an evaluator
evaluator = NanoBEIREvaluator()

# Finetune with an evaluator
trainer = SentenceTransformerTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
    evaluator=evaluator,
)
trainer.train()
SentenceTransformer.fit(epochs)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    epochs=1,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    num_train_epochs=1,
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(steps_per_epoch)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    steps_per_epoch=1000,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    max_steps=1000, # Note: max_steps is across all epochs, not per epoch
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(scheduler)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    scheduler="WarmupLinear",
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    # See https://hugging-face.cn/docs/transformers/main_classes/optimizer_schedules#transformers.SchedulerType
    lr_scheduler_type="linear"
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(warmup_steps)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    warmup_steps=1000,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    warmup_steps=1000,
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(optimizer_class, optimizer_params)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    optimizer_class=torch.optim.AdamW,
    optimizer_params={"eps": 1e-7},
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    # See https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py
    optim="adamw_torch",
    optim_args={"eps": 1e-7},
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(weight_decay)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    weight_decay=0.02,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    weight_decay=0.02,
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(evaluation_steps)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
    evaluation_steps=1000,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    eval_strategy="steps",
    eval_steps=1000,
)

# Finetune the model
# Note: You need an eval_dataset and/or evaluator to evaluate
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
    evaluator=evaluator,
)
trainer.train()
SentenceTransformer.fit(output_path, save_best_model)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
    output_path="my/path",
    save_best_model=True,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    load_best_model_at_end=True,
    metric_for_best_model="all_nli_cosine_accuracy", # E.g. `evaluator.primary_metric`
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()

# Save the best model at my output path
model.save_pretrained("my/path")
SentenceTransformer.fit(max_grad_norm)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    max_grad_norm=1,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    max_grad_norm=1,
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(use_amp)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    use_amp=True,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    fp16=True,
    bf16=False, # If your GPU supports it, you can also use bf16 instead
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(callback)

v2.x

v3.x(推荐)

...

def printer_callback(score, epoch, steps):
    print(f"Score: {score:.4f} at epoch {epoch:d}, step {steps:d}")

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    callback=printer_callback,
)
from transformers import TrainerCallback

...

class PrinterCallback(TrainerCallback):
    # Subclass any method from https://hugging-face.cn/docs/transformers/main_classes/callback#transformers.TrainerCallback
    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        print(f"Metrics: {metrics} at epoch {state.epoch:d}, step {state.global_step:d}")

printer_callback = PrinterCallback()

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    train_dataset=train_dataset,
    loss=loss,
    callbacks=[printer_callback],
)
trainer.train()
SentenceTransformer.fit(show_progress_bar)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    show_progress_bar=True,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    disable_tqdm=False,
)

# Finetune the model
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
SentenceTransformer.fit(checkpoint_path, checkpoint_save_steps, checkpoint_save_total_limit)

v2.x

v3.x(推荐)

...

# Finetune the model
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    checkpoint_path="checkpoints",
    checkpoint_save_steps=5000,
    checkpoint_save_total_limit=2,
)
...

# Prepare the Training Arguments
args = SentenceTransformerTrainingArguments(
    eval_strategy="steps",
    eval_steps=5000,
    save_strategy="steps",
    save_steps=5000,
    save_total_limit=2,
)

# Finetune the model
# Note: You need an eval_dataset and/or evaluator to checkpoint
trainer = SentenceTransformerTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
)
trainer.train()

迁移 SentenceTransformer.fit 中使用的自定义数据集和数据加载器

v2.x

v3.x(推荐)

ParallelSentencesDataset

手动创建 Dataset 并为嵌入添加 label 列。或者,考虑加载我们预先提供的 平行句子数据集 之一。

SentenceLabelDataset

加载或创建 Dataset 并使用 SentenceTransformerTrainingArguments(batch_sampler=BatchSamplers.GROUP_BY_LABEL)(使用 GroupByLabelBatchSampler)。推荐用于 BatchTripletLosses。

DenoisingAutoEncoderDataset

手动向带有文本的 Dataset 添加带有噪声文本的列,例如使用 Dataset.map

NoDuplicatesDataLoader

加载或创建 Dataset 并使用 SentenceTransformerTrainingArguments(batch_sampler=BatchSamplers.NO_DUPLICATES)(使用 NoDuplicatesBatchSampler)。推荐用于 MultipleNegativesRankingLoss

从 v3.x 迁移到 v4.x

v4 Sentence Transformers 版本重构了 CrossEncoder 重新排序器/对分类模型的训练,用 CrossEncoderTrainerCrossEncoderTrainingArguments 替换了 CrossEncoder.fit。与 v3 和 SentenceTransformer 模型一样,此更新温和地弃用了 CrossEncoder.fit,这意味着它仍然有效,但建议切换到新的 v4.x 训练格式。在幕后,此方法现在使用新的训练器。

警告

如果您没有使用 CrossEncoder.fit 的代码,那么您无需对代码进行任何更改即可从 v3.x 更新到 v4.x。

如果您使用了,您的代码仍然有效,但建议切换到新的 v4.x 训练格式,因为它允许更多的训练参数和功能。有关更多详细信息,请参阅 训练概述

新旧训练流程

v3.x

v4.x(推荐)

from sentence_transformers import CrossEncoder, InputExample
from torch.utils.data import DataLoader

# 1. Define the model. Either from scratch of by loading a pre-trained model
model = CrossEncoder("microsoft/mpnet-base")

# 2. Define your train examples. You need more than just two examples...
train_examples = [
    InputExample(texts=["What are pandas?", "The giant panda ..."], label=1),
    InputExample(texts=["What's a panda?", "Mount Vesuvius is a ..."], label=0),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# 3. Finetune the model
model.fit(train_dataloader=train_dataloader, epochs=1, warmup_steps=100)
from datasets import load_dataset
from sentence_transformers import CrossEncoder, CrossEncoderTrainer
from sentence_transformers.cross_encoder.losses import BinaryCrossEntropyLoss

# 1. Define the model. Either from scratch of by loading a pre-trained model
model = CrossEncoder("microsoft/mpnet-base")

# 2. Load a dataset to finetune on, convert to required format
dataset = load_dataset("sentence-transformers/hotpotqa", "triplet", split="train")

def triplet_to_labeled_pair(batch):
    anchors = batch["anchor"]
    positives = batch["positive"]
    negatives = batch["negative"]
    return {
        "sentence_A": anchors * 2,
        "sentence_B": positives + negatives,
        "labels": [1] * len(positives) + [0] * len(negatives),
    }

dataset = dataset.map(triplet_to_labeled_pair, batched=True, remove_columns=dataset.column_names)
train_dataset = dataset.select(range(10_000))
eval_dataset = dataset.select(range(10_000, 11_000))

# 3. Define a loss function
loss = BinaryCrossEntropyLoss(model)

# 4. Create a trainer & train
trainer = CrossEncoderTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
)
trainer.train()

# 5. Save the trained model
model.save_pretrained("models/mpnet-base-hotpotqa")
# model.push_to_hub("mpnet-base-hotpotqa")

迁移 CrossEncoder 初始化和方法的参数

v3.x

v4.x(推荐)

CrossEncoder(model_name=...)

重命名为 CrossEncoder(model_name_or_path=...)

CrossEncoder(automodel_args=...)

重命名为 CrossEncoder(model_kwargs=...)

CrossEncoder(tokenizer_args=...)

重命名为 CrossEncoder(tokenizer_kwargs=...)

CrossEncoder(config_args=...)

重命名为 CrossEncoder(config_kwargs=...)

CrossEncoder(cache_dir=...)

重命名为 CrossEncoder(cache_folder=...)

CrossEncoder(default_activation_function=...)

重命名为 CrossEncoder(activation_fn=...)

CrossEncoder(classifier_dropout=...)

请改用 CrossEncoder(config_kwargs={"classifier_dropout": ...})

CrossEncoder.predict(activation_fct=...)

重命名为 CrossEncoder.predict(activation_fn=...)

CrossEncoder.rank(activation_fct=...)

重命名为 CrossEncoder.rank(activation_fn=...)

CrossEncoder.predict(num_workers=...)

已完全弃用,不再有任何效果。

CrossEncoder.rank(num_workers=...)

已完全弃用,不再有任何效果。

注意

旧的关键字参数仍然有效,但它们会发出警告,建议您改用新名称。

CrossEncoder.fit 迁移特定参数

CrossEncoder.fit(train_dataloader)

v3.x

v4.x(推荐)

from sentence_transformers import CrossEncoder, InputExample
from torch.utils.data import DataLoader

# 1. Define the model. Either from scratch of by loading a pre-trained model
model = CrossEncoder("microsoft/mpnet-base")

# 2. Define your train examples. You need more than just two examples...
train_examples = [
    InputExample(texts=["What are pandas?", "The giant panda ..."], label=1),
    InputExample(texts=["What's a panda?", "Mount Vesuvius is a ..."], label=0),
]
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# 3. Finetune the model
model.fit(train_dataloader=train_dataloader)
from datasets import Dataset
from sentence_transformers import CrossEncoder, CrossEncoderTrainer
from sentence_transformers.cross_encoder.losses import BinaryCrossEntropyLoss

# Define a training dataset
train_examples = [
    {
        "sentence_1": "A person on a horse jumps over a broken down airplane.",
        "sentence_2": "A person is outdoors, on a horse.",
        "label": 1,
    },
    {
        "sentence_1": "Children smiling and waving at camera",
        "sentence_2": "The kids are frowning",
        "label": 0,
    },
]
train_dataset = Dataset.from_list(train_examples)

# Define a loss function
loss = BinaryCrossEntropyLoss(model)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(loss_fct)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    loss_fct=torch.nn.MSELoss(),
)
from sentence_transformers.cross_encoder.losses import MSELoss

...

# Prepare the loss function
# See all valid losses in https://sbert.net.cn/docs/cross_encoder/loss_overview.html
loss = MSELoss(model)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(evaluator)

v3.x

v4.x(推荐)

...

# Load an evaluator
evaluator = CrossEncoderNanoBEIREvaluator()

# Finetune with an evaluator
model.fit(
    train_dataloader=train_dataloader,
    evaluator=evaluator,
)
# Load an evaluator
evaluator = CrossEncoderNanoBEIREvaluator()

# Finetune with an evaluator
trainer = CrossEncoderTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
    evaluator=evaluator,
)
trainer.train()
CrossEncoder.fit(epochs)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    epochs=1,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    num_train_epochs=1,
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(activation_fct)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    activation_fct=torch.nn.Sigmoid(),
)
...

# Prepare the loss function
loss = MSELoss(model, activation_fn=torch.nn.Sigmoid())

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(scheduler)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    scheduler="WarmupLinear",
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    # See https://hugging-face.cn/docs/transformers/main_classes/optimizer_schedules#transformers.SchedulerType
    lr_scheduler_type="linear"
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(warmup_steps)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    warmup_steps=1000,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    warmup_steps=1000,
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(optimizer_class, optimizer_params)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    optimizer_class=torch.optim.AdamW,
    optimizer_params={"eps": 1e-7},
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    # See https://github.com/huggingface/transformers/blob/main/src/transformers/training_args.py
    optim="adamw_torch",
    optim_args={"eps": 1e-7},
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(weight_decay)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    weight_decay=0.02,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    weight_decay=0.02,
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(evaluation_steps)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    evaluator=evaluator,
    evaluation_steps=1000,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    eval_strategy="steps",
    eval_steps=1000,
)

# Finetune the model
# Note: You need an eval_dataset and/or evaluator to evaluate
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss=loss,
    evaluator=evaluator,
)
trainer.train()
CrossEncoder.fit(output_path, save_best_model)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    evaluator=evaluator,
    output_path="my/path",
    save_best_model=True,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    load_best_model_at_end=True,
    metric_for_best_model="hotpotqa_ndcg@10", # E.g. `evaluator.primary_metric`
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()

# Save the best model at my output path
model.save_pretrained("my/path")
CrossEncoder.fit(max_grad_norm)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    max_grad_norm=1,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    max_grad_norm=1,
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(use_amp)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    use_amp=True,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    fp16=True,
    bf16=False, # If your GPU supports it, you can also use bf16 instead
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()
CrossEncoder.fit(callback)

v3.x

v4.x(推荐)

...

def printer_callback(score, epoch, steps):
    print(f"Score: {score:.4f} at epoch {epoch:d}, step {steps:d}")

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    callback=printer_callback,
)
from transformers import TrainerCallback

...

class PrinterCallback(TrainerCallback):
    # Subclass any method from https://hugging-face.cn/docs/transformers/main_classes/callback#transformers.TrainerCallback
    def on_evaluate(self, args, state, control, metrics=None, **kwargs):
        print(f"Metrics: {metrics} at epoch {state.epoch:d}, step {state.global_step:d}")

printer_callback = PrinterCallback()

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    train_dataset=train_dataset,
    loss=loss,
    callbacks=[printer_callback],
)
trainer.train()
CrossEncoder.fit(show_progress_bar)

v3.x

v4.x(推荐)

...

# Finetune the model
model.fit(
    train_dataloader=train_dataloader,
    show_progress_bar=True,
)
...

# Prepare the Training Arguments
args = CrossEncoderTrainingArguments(
    disable_tqdm=False,
)

# Finetune the model
trainer = CrossEncoderTrainer(
    model=model,
    args=args,
    train_dataset=train_dataset,
    loss=loss,
)
trainer.train()

注意

旧的 CrossEncoder.fit 方法仍然有效,它只是被温和地弃用了。它现在在幕后使用新的 CrossEncoderTrainer

迁移 CrossEncoder 评估器

v3.x

v4.x(推荐)

CEBinaryAccuracyEvaluator

使用 CrossEncoderClassificationEvaluator,一个包含相同输入和输出的评估器。

CEBinaryClassificationEvaluator

使用 CrossEncoderClassificationEvaluator,一个包含相同输入和输出的评估器。

CECorrelationEvaluator

使用 CrossEncoderCorrelationEvaluator,此评估器已重命名。

CEF1Evaluator

使用 CrossEncoderClassificationEvaluator,一个包含相同输入和输出的评估器。

CESoftmaxAccuracyEvaluator

使用 CrossEncoderClassificationEvaluator,一个包含相同输入和输出的评估器。

CERerankingEvaluator

重命名为 CrossEncoderRerankingEvaluator,此评估器已重命名

注意

旧的评估器仍然有效,它们只会警告您更新到新的评估器。