Source code for torchtune.data._instruct_templates
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from abc import ABC, abstractmethod
from typing import Any, Dict, Mapping, Optional
[docs]class InstructTemplate(ABC):
"""
Interface for instruction templates. Each template should include the template
prompt with placeholders for the data inputs.
"""
template = ""
[docs] @classmethod
@abstractmethod
def format(
cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None
) -> str:
"""
Format the prompt template with the given arguments.
Args:
sample (Mapping[str, Any]): a single data sample with various fields
column_map (Optional[Dict[str, str]]): a mapping from the expected
placeholder names in the template to the column names in the sample.
If None, assume these are identical. Note: if the sample output is not named
as "output" in the dataset, you always need to map it to "output" in column_map.
Returns:
The formatted prompt
"""
pass
[docs]class AlpacaInstructTemplate(InstructTemplate):
"""
Prompt template for Alpaca-style datasets. Template prompt changes slightly depending
on if there's an instruction + input or just an instruction.
"""
template = {
"prompt_input": (
"Below is an instruction that describes a task, paired with an input that provides further context. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n"
),
"prompt_no_input": (
"Below is an instruction that describes a task. "
"Write a response that appropriately completes the request.\n\n"
"### Instruction:\n{instruction}\n\n### Response:\n"
),
}
[docs] @classmethod
def format(
cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None
) -> str:
"""
Generate prompt from instruction and input.
Args:
sample (Mapping[str, Any]): a single data sample with instruction
column_map (Optional[Dict[str, str]]): a mapping from the expected
placeholder names in the template to the column names in the sample.
If None, assume these are identical.
Returns:
The formatted prompt
"""
column_map = column_map or {}
key_input = column_map.get("input", "input")
key_instruction = column_map.get("instruction", "instruction")
if key_input in sample and sample[key_input]:
prompt = cls.template["prompt_input"].format(
instruction=sample[key_instruction], input=sample[key_input]
)
else:
prompt = cls.template["prompt_no_input"].format(
instruction=sample[key_instruction]
)
return prompt
[docs]class GrammarErrorCorrectionTemplate(InstructTemplate):
"""
Prompt template for grammar correction datasets.
"""
template = "Correct this to standard English: {sentence}\n---\nCorrected: "
[docs] @classmethod
def format(
cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None
) -> str:
"""
Generate prompt from sentence.
Args:
sample (Mapping[str, Any]): a single data sample with sentence
column_map (Optional[Dict[str, str]]): a mapping from the expected
placeholder names in the template to the column names in the sample.
If None, assume these are identical.
Returns:
The formatted prompt
"""
column_map = column_map or {}
key_sentence = column_map.get("sentence", "sentence")
prompt = cls.template.format(sentence=sample[key_sentence])
return prompt
[docs]class SummarizeTemplate(InstructTemplate):
"""
Prompt template to format datasets for summarization tasks.
"""
template = "Summarize this dialogue:\n{dialogue}\n---\nSummary:\n"
[docs] @classmethod
def format(
cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None
) -> str:
"""
Generate prompt from dialogue.
Args:
sample (Mapping[str, Any]): a single data sample with dialog
column_map (Optional[Dict[str, str]]): a mapping from the expected
placeholder names in the template to the column names in the sample.
If None, assume these are identical.
Returns:
The formatted prompt
"""
column_map = column_map or {}
key_dialogue = column_map.get("dialogue", "dialogue")
prompt = cls.template.format(dialogue=sample[key_dialogue])
return prompt
class StackExchangedPairedTemplate(InstructTemplate):
"""
Prompt template for preference datasets similar to StackExchangedPaired.
"""
template = "Question: {question}\n\nAnswer: "
@classmethod
def format(
cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None
) -> str:
"""
Generate prompt from instruction and input.
Args:
sample (Mapping[str, Any]): a single data sample with instruction
column_map (Optional[Dict[str, str]]): a mapping from the expected
placeholder names in the template to the column names in the sample.
If None, assume these are identical.
Returns:
The formatted prompt
"""
column_map = column_map or {}
key_prompt = column_map.get("prompt", "prompt")
prompt = cls.template.format(question=sample[key_prompt])
return prompt