Shortcuts

Source code for torchtune.data._instruct_templates

# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

from abc import ABC, abstractmethod
from typing import Any, Dict, Mapping, Optional


[docs]class InstructTemplate(ABC): """ Interface for instruction templates. Each template should include the template prompt with placeholders for the data inputs. """ template = ""
[docs] @classmethod @abstractmethod def format( cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None ) -> str: """ Format the prompt template with the given arguments. Args: sample (Mapping[str, Any]): a single data sample with various fields column_map (Optional[Dict[str, str]]): a mapping from the expected placeholder names in the template to the column names in the sample. If None, assume these are identical. Note: if the sample output is not named as "output" in the dataset, you always need to map it to "output" in column_map. Returns: The formatted prompt """ pass
[docs]class AlpacaInstructTemplate(InstructTemplate): """ Prompt template for Alpaca-style datasets. Template prompt changes slightly depending on if there's an instruction + input or just an instruction. """ template = { "prompt_input": ( "Below is an instruction that describes a task, paired with an input that provides further context. " "Write a response that appropriately completes the request.\n\n" "### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:\n" ), "prompt_no_input": ( "Below is an instruction that describes a task. " "Write a response that appropriately completes the request.\n\n" "### Instruction:\n{instruction}\n\n### Response:\n" ), }
[docs] @classmethod def format( cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None ) -> str: """ Generate prompt from instruction and input. Args: sample (Mapping[str, Any]): a single data sample with instruction column_map (Optional[Dict[str, str]]): a mapping from the expected placeholder names in the template to the column names in the sample. If None, assume these are identical. Returns: The formatted prompt """ column_map = column_map or {} key_input = column_map.get("input", "input") key_instruction = column_map.get("instruction", "instruction") if key_input in sample and sample[key_input]: prompt = cls.template["prompt_input"].format( instruction=sample[key_instruction], input=sample[key_input] ) else: prompt = cls.template["prompt_no_input"].format( instruction=sample[key_instruction] ) return prompt
[docs]class GrammarErrorCorrectionTemplate(InstructTemplate): """ Prompt template for grammar correction datasets. """ template = "Correct this to standard English: {sentence}\n---\nCorrected: "
[docs] @classmethod def format( cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None ) -> str: """ Generate prompt from sentence. Args: sample (Mapping[str, Any]): a single data sample with sentence column_map (Optional[Dict[str, str]]): a mapping from the expected placeholder names in the template to the column names in the sample. If None, assume these are identical. Returns: The formatted prompt """ column_map = column_map or {} key_sentence = column_map.get("sentence", "sentence") prompt = cls.template.format(sentence=sample[key_sentence]) return prompt
[docs]class SummarizeTemplate(InstructTemplate): """ Prompt template to format datasets for summarization tasks. """ template = "Summarize this dialogue:\n{dialogue}\n---\nSummary:\n"
[docs] @classmethod def format( cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None ) -> str: """ Generate prompt from dialogue. Args: sample (Mapping[str, Any]): a single data sample with dialog column_map (Optional[Dict[str, str]]): a mapping from the expected placeholder names in the template to the column names in the sample. If None, assume these are identical. Returns: The formatted prompt """ column_map = column_map or {} key_dialogue = column_map.get("dialogue", "dialogue") prompt = cls.template.format(dialogue=sample[key_dialogue]) return prompt
class StackExchangedPairedTemplate(InstructTemplate): """ Prompt template for preference datasets similar to StackExchangedPaired. """ template = "Question: {question}\n\nAnswer: " @classmethod def format( cls, sample: Mapping[str, Any], column_map: Optional[Dict[str, str]] = None ) -> str: """ Generate prompt from instruction and input. Args: sample (Mapping[str, Any]): a single data sample with instruction column_map (Optional[Dict[str, str]]): a mapping from the expected placeholder names in the template to the column names in the sample. If None, assume these are identical. Returns: The formatted prompt """ column_map = column_map or {} key_prompt = column_map.get("prompt", "prompt") prompt = cls.template.format(question=sample[key_prompt]) return prompt

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources