Shortcuts

Source code for torchrl.envs.llm.datasets.ifeval

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations

from typing import Any, Callable, Literal, TYPE_CHECKING

import torch
from tensordict import NonTensorData, NonTensorStack, TensorClass, TensorDict
from torchrl.data import Composite, NonTensor, Unbounded
from torchrl.envs import StepCounter
from torchrl.envs.llm.chat import DatasetChatEnv
from torchrl.envs.llm.reward.ifeval import IfEvalScorer

if TYPE_CHECKING:
    import transformers


class IFEvalData(TensorClass["nocast"]):
    """A tensorclass for IFEval dta."""

    key: torch.Tensor
    instruction_id_list: list[str]
    kwargs: list[dict]
    query: str

    # Reponses and additional fields
    response: str | None = None
    tokens: torch.Tensor | None = None
    tokens_response: torch.Tensor | None = None
    logits: torch.Tensor | None = None
    reward: torch.Tensor | None = None

    @classmethod
    def default_spec(
        cls, shape: torch.Size, device: torch.device | None = None
    ) -> Composite:
        return Composite(
            key=Unbounded(shape=shape, dtype=torch.int64, device=device),
            instruction_id_list=NonTensor(
                shape=shape,
                device=device,
                feature_dims=0,
                example_data=["punctuation:no_comma"],
            ),
            kwargs=NonTensor(
                shape=shape,
                device=device,
                feature_dims=0,
                example_data={
                    "num_highlights": None,
                    "relation": None,
                    "num_placeholders": None,
                },
            ),
            query=NonTensor(
                shape=shape,
                device=device,
                example_data="Plan a 2 week Europe trip and visit London, Paris, and Rome. Answer in all caps. The response must contain at least 8 placeholders (i.e., [restaurant]).",
            ),
            shape=shape,
            step_mdp_static=True,
            data_cls=cls,
        )


def _collate_fn(batch):
    batch = torch.stack([TensorDict.from_any(_batch) for _batch in batch])
    batch.rename_key_("prompt", "query")
    # we want instruction_id_list and kwargs to be lists, but not NonTensorStacks
    instruction_id_list = batch["instruction_id_list"]
    # instruction_id_list should be a list of lists
    instruction_id_list = NonTensorStack(
        *[
            NonTensorData([item] if not isinstance(item, list) else item)
            for item in instruction_id_list
        ]
    )
    kwargs = batch["kwargs"]
    kwargs = NonTensorStack(
        *[
            NonTensorData([item] if not isinstance(item, list) else item)
            for item in kwargs
        ]
    )
    batch.set("instruction_id_list", instruction_id_list)
    batch.set("kwargs", kwargs)
    # we don't need a tensorclass here
    return batch
    # return IFEvalData.from_tensordict(batch)


[docs]class IFEvalEnv(DatasetChatEnv): r"""A chat environment based on the IFEval dataset. Keyword Args: dataset (str, optional): The name of the dataset. Defaults to `"google/IFeval"`. shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`. num_envs (int, optional): The number of environments to create. Defaults to `1`. repeats (int | None, optional): The number of times to repeat each sample from the dataset (mainly for Monte-Carlo based value estimation). If `None`, the dataset is not repeated. Defaults to `None`. batch_size_dl (int, optional): The batch size for data loading. Defaults to `1`. seed (int | None, optional): The random seed for reproducibility. If `None`, a random seed is used. Defaults to `None`. group_repeats (bool, optional): Whether to group repeated samples together. Defaults to `False`. tokenizer (transformers.AutoTokenizer | None, optional): The tokenizer to use for text processing. Defaults to `None`. .. note:: It is recommended to pass a tokenizer to the environment. This is an easy way to ensure that the template applied to the chat history is consistent with the format required by the model. device (torch.device | None, optional): The device to use for computations. Defaults to None. template_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the template. Defaults to `None`. apply_template (bool | None, optional): Whether to apply the template to the text. Defaults to `False`. compute_reward (bool, optional): Whether to compute rewards. Defaults to `True`. collate_fn (Callable | None, optional): A custom collate function for data loading. If `None`, a default collate function is used. Defaults to `None`. max_steps (int, optional): The maximum number of steps allowed in an episode. Defaults to `1`. input_mode (Literal["history", "text", "tokens"], optional): The mode of input to use. Defaults to `"history"`. Examples: >>> import transformers >>> from pprint import pprint >>> from torchrl.envs.llm.datasets import IFEvalEnv >>> from tensordict import set_list_to_stack >>> set_list_to_stack(True).set() >>> >>> tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B") >>> env = IFEvalEnv(tokenizer=tokenizer, apply_template=True) >>> r = env.reset() >>> print(r) LazyStackedTensorDict( fields={ done: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False), history: History( content=NonTensorStack( [['A conversation between User and Assistant.\nYou..., batch_size=torch.Size([1, 2]), device=None), role=NonTensorStack( [['system', 'user']], batch_size=torch.Size([1, 2]), device=None), batch_size=torch.Size([1, 2]), device=None, is_shared=False), instruction_id_list: NonTensorStack( [['detectable_content:number_placeholders']], batch_size=torch.Size([1, 1]), device=None), key: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.int64, is_shared=False), kwargs: NonTensorStack( [[{'num_highlights': None, 'relation': None, 'num_..., batch_size=torch.Size([1, 1]), device=None), step_count: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.int64, is_shared=False), terminated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False), text: NonTensorStack( ['<|im_start|>system\nA conversation between User ..., batch_size=torch.Size([1]), device=None), truncated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False)}, exclusive_fields={ }, batch_size=torch.Size([1]), device=None, is_shared=False, stack_dim=0) >>> # Print content of conversation so far >>> pprint(r["history", "content"]) [['A conversation between User and Assistant.\n' 'You are tasked with responding to user queries in a very specific format. \n' 'When given a task or question, first think through the problem and provide ' 'your thought process between <think> and </think> tags.\n' 'Then, give your final answer or response between <answer> and </answer> ' 'tags.\n' 'You will be assessed by the content of the answer block only, so make sure ' 'it contains all the required information, and only that.', 'Plan a 2 week Europe trip and visit London, Paris, and Rome. Answer in all ' 'caps. The response must contain at least 8 placeholders (i.e., ' '[restaurant]).']] >>> # Actions space: the environment expects an action with key "text_response" containing a (list of) strings >>> print(env.action_spec) Composite( text_response: NonTensor( shape=torch.Size([1]), space=None, device=None, dtype=None, domain=None, example_data=a string), device=None, shape=torch.Size([1])) """ SYSTEM_PROMPT = """You are a helpful AI assistant that follows instructions extremely well. IMPORTANT: You must respond in a specific format for every task: 1. First, think through the problem step by step and write your reasoning between <think> and </think> tags 2. Then, provide your final answer between <answer> and </answer> tags CRITICAL RULES: - ALWAYS use <think>...</think> and <answer>...</answer> tags exactly as shown - Do NOT use <thought>, <reasoning>, or any other tag variations - Your <answer> section will be evaluated, so make it complete and accurate - Follow ALL specific requirements in the user's request (formatting, content, etc.) - If the user asks for placeholders like [restaurant], include them exactly as requested - Pay attention to capitalization, punctuation, and other formatting requirements Example format: <think> I need to analyze what the user is asking for... [Your reasoning here] </think> <answer> [Your final answer here, following all user requirements] </answer>""" def __init__( self, *, dataset: str = "google/IFeval", shuffle: bool = True, num_envs: int = 1, repeats: int | None = None, batch_size_dl: int = 1, seed: int | None = None, group_repeats: bool = False, tokenizer: transformers.AutoTokenizer | None = None, # noqa device: torch.device | None = None, template_kwargs: dict[str, Any] | None = None, apply_template: bool | None = False, compute_reward: bool = True, collate_fn: Callable | None = None, max_steps: int = 1, input_mode: Literal["history", "text", "tokens"] = "history", ): if collate_fn is None: collate_fn = _collate_fn super().__init__( dataset=dataset, shuffle=shuffle, num_envs=num_envs, repeats=repeats, batch_size_dl=batch_size_dl, seed=seed, group_repeats=group_repeats, tokenizer=tokenizer, device=device, template_kwargs=template_kwargs, apply_template=apply_template, collate_fn=collate_fn, input_mode=input_mode, data_key="query", primers=IFEvalData.default_spec((num_envs,), device), ) if max_steps: self.append_transform(StepCounter(max_steps=max_steps)) if compute_reward: self.append_transform(IfEvalScorer())

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources