Shortcuts

Source code for torchrl.envs.llm.datasets.ifeval

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations

from typing import Any, Callable

import torch
from tensordict import TensorClass, TensorDict
from torchrl.envs import StepCounter

from torchrl.envs.llm.chat import DatasetChatEnv

from torchrl.envs.llm.reward.ifeval import IfEvalScorer


class IFEvalData(TensorClass["nocast"]):
    """A tensorclass for IFEval dta."""

    key: torch.Tensor
    instruction_id_list: str
    kwargs: list[dict]
    text: str
    # Reponses and additional fields
    response: str | None = None
    tokens: torch.Tensor | None = None
    tokens_response: torch.Tensor | None = None
    logits: torch.Tensor | None = None
    reward: torch.Tensor | None = None


def _collate_fn(batch):
    batch = torch.stack([TensorDict.from_any(_batch) for _batch in batch])
    batch.rename_key_("prompt", "text")
    return IFEvalData.from_tensordict(batch)


[docs]class IFEvalEnv(DatasetChatEnv): r"""A chat environment based on the IFEval dataset. Keyword Args: dataset (str, optional): The name of the dataset. Defaults to `"google/IFeval"`. shuffle (bool, optional): Whether to shuffle the dataset. Defaults to `True`. num_envs (int, optional): The number of environments to create. Defaults to `1`. repeats (int | None, optional): The number of times to repeat each sample from the dataset (mainly for Monte-Carlo based value estimation). If `None`, the dataset is not repeated. Defaults to `None`. batch_size_dl (int, optional): The batch size for data loading. Defaults to `1`. seed (int | None, optional): The random seed for reproducibility. If `None`, a random seed is used. Defaults to `None`. group_repeats (bool, optional): Whether to group repeated samples together. Defaults to `False`. tokenizer (transformers.AutoTokenizer | None, optional): The tokenizer to use for text processing. Defaults to `None`. .. note:: It is recommended to pass a tokenizer to the environment. This is an easy way to ensure that the template applied to the chat history is consistent with the format required by the model. device (torch.device | None, optional): The device to use for computations. Defaults to None. template_kwargs (dict[str, Any] | None, optional): Additional keyword arguments for the template. Defaults to `None`. apply_template (bool | None, optional): Whether to apply the template to the text. Defaults to `False`. compute_reward (bool, optional): Whether to compute rewards. Defaults to `True`. collate_fn (Callable | None, optional): A custom collate function for data loading. If `None`, a default collate function is used. Defaults to `None`. max_steps (int, optional): The maximum number of steps allowed in an episode. Defaults to `1`. Examples: >>> import transformers >>> from pprint import pprint >>> from torchrl.envs.llm.datasets import IFEvalEnv >>> from tensordict import set_list_to_stack >>> set_list_to_stack(True).set() >>> >>> tokenizer = transformers.AutoTokenizer.from_pretrained("Qwen/Qwen2.5-3B") >>> env = IFEvalEnv(tokenizer=tokenizer, apply_template=True) >>> r = env.reset() >>> print(r) LazyStackedTensorDict( fields={ done: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False), history: History( content=NonTensorStack( [['A conversation between User and Assistant.\nYou..., batch_size=torch.Size([1, 2]), device=None), role=NonTensorStack( [['system', 'user']], batch_size=torch.Size([1, 2]), device=None), batch_size=torch.Size([1, 2]), device=None, is_shared=False), instruction_id_list: NonTensorStack( [['detectable_content:number_placeholders']], batch_size=torch.Size([1, 1]), device=None), key: Tensor(shape=torch.Size([1]), device=cpu, dtype=torch.int64, is_shared=False), kwargs: NonTensorStack( [[{'num_highlights': None, 'relation': None, 'num_..., batch_size=torch.Size([1, 1]), device=None), step_count: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.int64, is_shared=False), terminated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False), text: NonTensorStack( ['<|im_start|>system\nA conversation between User ..., batch_size=torch.Size([1]), device=None), truncated: Tensor(shape=torch.Size([1, 1]), device=cpu, dtype=torch.bool, is_shared=False)}, exclusive_fields={ }, batch_size=torch.Size([1]), device=None, is_shared=False, stack_dim=0) >>> # Print content of conversation so far >>> pprint(r["history", "content"]) [['A conversation between User and Assistant.\n' 'You are tasked with responding to user queries in a very specific format. \n' 'When given a task or question, first think through the problem and provide ' 'your thought process between <think> and </think> tags.\n' 'Then, give your final answer or response between <answer> and </answer> ' 'tags.\n' 'You will be assessed by the content of the answer block only, so make sure ' 'it contains all the required information, and only that.', 'Plan a 2 week Europe trip and visit London, Paris, and Rome. Answer in all ' 'caps. The response must contain at least 8 placeholders (i.e., ' '[restaurant]).']] >>> # Actions space: the environment expects an action with key "text_response" containing a (list of) strings >>> print(env.action_spec) Composite( text_response: NonTensor( shape=torch.Size([1]), space=None, device=None, dtype=None, domain=None, example_data=a string), device=None, shape=torch.Size([1])) """ SYSTEM_PROMPT = """A conversation between User and Assistant. You are tasked with responding to user queries in a very specific format. When given a task or question, first think through the problem and provide your thought process between <think> and </think> tags. Then, give your final answer or response between <answer> and </answer> tags. You will be assessed by the content of the answer block only, so make sure it contains all the required information, and only that.""" def __init__( self, *, dataset: str = "google/IFeval", shuffle: bool = True, num_envs: int = 1, repeats: int | None = None, batch_size_dl: int = 1, seed: int | None = None, group_repeats: bool = False, tokenizer: transformers.AutoTokenizer | None = None, # noqa device: torch.device | None = None, template_kwargs: dict[str, Any] | None = None, apply_template: bool | None = False, compute_reward: bool = True, collate_fn: Callable | None = None, max_steps: int = 1, ): if collate_fn is None: collate_fn = _collate_fn super().__init__( dataset=dataset, shuffle=shuffle, num_envs=num_envs, repeats=repeats, batch_size_dl=batch_size_dl, seed=seed, group_repeats=group_repeats, tokenizer=tokenizer, device=device, template_kwargs=template_kwargs, apply_template=apply_template, collate_fn=collate_fn, ) if max_steps: self.append_transform(StepCounter(max_steps=max_steps)) if compute_reward: self.append_transform(IfEvalScorer())

Docs

Access comprehensive developer documentation for PyTorch

View Docs

Tutorials

Get in-depth tutorials for beginners and advanced developers

View Tutorials

Resources

Find development resources and get your questions answered

View Resources