Source code for torchrl.envs.llm.datasets.ifeval

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations

from typing import Any, Callable

import torch
from tensordict import TensorClass, TensorDict
from torchrl.envs import StepCounter

from torchrl.envs.llm.chat import DatasetChatEnv

from torchrl.envs.llm.reward.ifeval import IfEvalScorer


class IFEvalData(TensorClass["nocast"]):
    """A tensorclass for IFEval dta."""

    key: torch.Tensor
    instruction_id_list: str
    kwargs: list[dict]
    text: str
    # Reponses and additional fields
    response: str | None = None
    tokens: torch.Tensor | None = None
    tokens_response: torch.Tensor | None = None
    logits: torch.Tensor | None = None
    reward: torch.Tensor | None = None


def _collate_fn(batch):
    batch = torch.stack([TensorDict.from_any(_batch) for _batch in batch])
    batch.rename_key_("prompt", "text")
    return IFEvalData.from_tensordict(batch)


[docs]class IFEvalEnv(DatasetChatEnv):
    r"""A chat environment based on the IFEval dataset.

    Keyword Args:
        dataset (str, optional): The name of the dataset. Defaults to `"google/IFeval"`.
        shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`.
        num_envs (int, optional): The number of environments to create. Defaults to `1`.
        repeats (int | None, optional): The number of times to repeat each sample from the dataset (mainly for Monte-Carlo
            based value estimation). If `None`, the dataset is not repeated. Defaults to `None`.
        batch_size_dl (int, optional): The batch size for data loading. Defaults to `1`.
        seed (int | None, optional): The random seed for reproducibility. If `None`, a random seed is used. Defaults to `None`.
        group_repeats (bool, optional): Whether to group repeated samples together. Defaults to `False`.
        tokenizer (transformers.AutoTokenizer | None, optional): The tokenizer to use for text processing. Defaults to `None`.

            .. note:: It is recommended to pass a tokenizer to the environment. This is an easy way to ensure that the
                template applied to the chat history is consistent with the format required by the model.

        device (torch.device | None, optional): The device to use for computations. Defaults to None.
        template_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the template. Defaults to `None`.
        apply_template (bool | None, optional): Whether to apply the template to the text. Defaults to `False`.
        compute_reward (bool, optional): Whether to compute rewards. Defaults to `True`.
        collate_fn (Callable | None, optional): A custom collate function for data loading. If `None`, a default
            collate function is used. Defaults to `None`.
        max_steps (int, optional): The maximum number of steps allowed in an episode. Defaults to `1`.

    Examples:
        >>> import transformers
        >>> from pprint import pprint
        >>> from torchrl.envs.llm.datasets import IFEvalEnv
        >>> from tensordict import set_list_to_stack
        >>> set_list_to_stack(True).set()
        >>>
        >>> tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B")
        >>> env = IFEvalEnv(tokenizer=tokenizer, apply_template=True)
        >>> r = env.reset()
        >>> print(r)
        LazyStackedTensorDict(
            fields={
                done: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
                history: History(
                    content=NonTensorStack(
                        [['A conversation between User and Assistant.\nYou...,
                        batch_size=torch.Size([1, 2]),
                        device=None),
                    role=NonTensorStack(
                        [['system', 'user']],
                        batch_size=torch.Size([1, 2]),
                        device=None),
                    batch_size=torch.Size([1, 2]),
                    device=None,
                    is_shared=False),
                instruction_id_list: NonTensorStack(
                    [['detectable_content:number_placeholders']],
                    batch_size=torch.Size([1, 1]),
                    device=None),
                key: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.int64, is_shared=False),
                kwargs: NonTensorStack(
                    [[{'num_highlights': None, 'relation': None, 'num_...,
                    batch_size=torch.Size([1, 1]),
                    device=None),
                step_count: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.int64, is_shared=False),
                terminated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False),
                text: NonTensorStack(
                    ['<|im_start|>system\nA conversation between User ...,
                    batch_size=torch.Size([1]),
                    device=None),
                truncated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False)},
            exclusive_fields={
            },
            batch_size=torch.Size([1]),
            device=None,
            is_shared=False,
            stack_dim=0)
        >>> # Print content of conversation so far
        >>> pprint(r["history", "content"])
        [['A conversation between User and Assistant.\n'
          'You are tasked with responding to user queries in a very specific format. \n'
          'When given a task or question, first think through the problem and provide '
          'your thought process between <think> and </think> tags.\n'
          'Then, give your final answer or response between <answer> and </answer> '
          'tags.\n'
          'You will be assessed by the content of the answer block only, so make sure '
          'it contains all the required information, and only that.',
          'Plan a 2 week Europe trip and visit London, Paris, and Rome. Answer in all '
          'caps. The response must contain at least 8 placeholders (i.e., '
          '[restaurant]).']]
        >>> # Actions space: the environment expects an action with key "text_response" containing a (list of) strings
        >>> print(env.action_spec)
        Composite(
            text_response: NonTensor(
                shape=torch.Size([1]),
                space=None,
                device=None,
                dtype=None,
                domain=None,
                example_data=a string),
            device=None,
            shape=torch.Size([1]))

    """

    SYSTEM_PROMPT = """A conversation between User and Assistant.
You are tasked with responding to user queries in a very specific format.
When given a task or question, first think through the problem and provide your thought process between <think> and </think> tags.
Then, give your final answer or response between <answer> and </answer> tags.
You will be assessed by the content of the answer block only, so make sure it contains all the required information, and only that."""

    def __init__(
        self,
        *,
        dataset: str = "google/IFeval",
        shuffle: bool = True,
        num_envs: int = 1,
        repeats: int | None = None,
        batch_size_dl: int = 1,
        seed: int | None = None,
        group_repeats: bool = False,
        tokenizer: transformers.AutoTokenizer | None = None,  # noqa
        device: torch.device | None = None,
        template_kwargs: dict[str, Any] | None = None,
        apply_template: bool | None = False,
        compute_reward: bool = True,
        collate_fn: Callable | None = None,
        max_steps: int = 1,
    ):
        if collate_fn is None:
            collate_fn = _collate_fn
        super().__init__(
            dataset=dataset,
            shuffle=shuffle,
            num_envs=num_envs,
            repeats=repeats,
            batch_size_dl=batch_size_dl,
            seed=seed,
            group_repeats=group_repeats,
            tokenizer=tokenizer,
            device=device,
            template_kwargs=template_kwargs,
            apply_template=apply_template,
            collate_fn=collate_fn,
        )
        if max_steps:
            self.append_transform(StepCounter(max_steps=max_steps))
        if compute_reward:
            self.append_transform(IfEvalScorer())
Source code for torchrl.envs.llm.datasets.ifeval

Docs

Tutorials

Resources