Skip to content

LLM Utils

build_LLM_prompt

Util function to build the LLM prompt from input text data

Parameters:

Name Type Description Default
input_ds Dataset

Input dataset containing text

required
ds_column_mapping dict

Dictionary mapping prompt entities to dataset column names.

required
prompt_template_prefix Union[str, None]

Text instruction to prepend to each transformed input text sample. Defaults to "".

''
answer_start_token str

Token to append to the prompt to indicate start of the answer. Defaults to ""

''
llm_prompt_col_name str

Name of the column for the built LLM prompts. Defaults to 'llm_prompt'

'llm_prompt'

Returns: Dataset: Dataset with generated predictions.

Source code in dqc/llm_utils/inference.py
def build_LLM_prompt(
    input_ds: Dataset,
    ds_column_mapping: dict,
    prompt_template_prefix: str = "",
    answer_start_token: str = "",
    llm_prompt_col_name: str = "llm_prompt",
) -> Dataset:
    """Util function to build the LLM prompt from input text data

    Args:
        input_ds (Dataset): Input dataset containing text
        ds_column_mapping (dict): Dictionary mapping prompt entities to dataset column names.
        prompt_template_prefix (Union[str, None], optional): Text instruction to prepend to each transformed input text sample. Defaults to "".
        answer_start_token (str, optional): Token to append to the prompt to indicate start of the answer. Defaults to ""
        llm_prompt_col_name (str, optional): Name of the column for the built LLM prompts. Defaults to 'llm_prompt'
    Returns:
        Dataset: Dataset with generated predictions.
    """
    if type(input_ds) == pd.DataFrame:
        input_ds = Dataset.from_pandas(input_ds)

    def _helper(
        example: datasets.formatting.formatting.LazyBatch,
        prompt_template_prefix: str,
        ds_column_mapping: dict,
        llm_prompt_col_name: str,
    ) -> dict:
        llm_prompt = prompt_template_prefix
        for entity_name, col_name in ds_column_mapping.items():
            if col_name:
                entity_value = example[col_name]
                if type(entity_value) == list:
                    entity_value = "|| ".join(map(str, entity_value))
                else:
                    entity_value = str(entity_value)
                llm_prompt += f"[{entity_name}]{entity_value}[/{entity_name}]"

        if answer_start_token:
            llm_prompt += answer_start_token

        return {llm_prompt_col_name: llm_prompt}

    input_ds = input_ds.map(
        _helper,
        fn_kwargs={
            "prompt_template_prefix": prompt_template_prefix,
            "ds_column_mapping": ds_column_mapping,
            "llm_prompt_col_name": llm_prompt_col_name,
        },
    )
    return input_ds

infer_LLM

Util function to run LLM inference

Parameters:

Name Type Description Default
model AutoModelForCausalLM

LLM artifact.

required
tokenizer Autotokenizer)

LLM tokenizer object

required
input_ds Dataset

Input dataset containing text prompts.

required
llm_prompt_col_name str

Name of the column containing text prompts. Defaults to 'llm_prompt'.

'llm_prompt'
llm_response_raw_col_name str

Name of the column containing prediction. Defaults to 'llm_response'.

'llm_response'

Returns:

Name Type Description
dataset Dataset

Dataset with generated predictions.

Source code in dqc/llm_utils/inference.py
def infer_LLM(
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    input_ds: Dataset,
    llm_prompt_col_name: str = "llm_prompt",
    llm_response_raw_col_name: str = "llm_response",
    **options,
) -> Dataset:
    """
    Util function to run LLM inference

    Args:
        model (AutoModelForCausalLM): LLM artifact.
        tokenizer (Autotokenizer) : LLM tokenizer object
        input_ds (Dataset): Input dataset containing text prompts.
        llm_prompt_col_name (str, optional): Name of the column containing text prompts. Defaults to 'llm_prompt'.
        llm_response_raw_col_name (str, optional): Name of the column containing prediction. Defaults to 'llm_response'.

    Returns:
        dataset: Dataset with generated predictions.
    """
    if options["random_state"]:
        _set_seed(options["random_state"])
        del options["random_state"]

    text_generator = pipeline(
        "text-generation", model=model, tokenizer=tokenizer, truncation=False, **options
    )
    text_generator.tokenizer.pad_token_id = model.config.eos_token_id

    batch_size = options["batch_size"] if "batch_size" in options else 8

    input_ds = input_ds.map(
        _generate_predictions,
        fn_kwargs={
            "generator": text_generator,
            "llm_prompt_col_name": llm_prompt_col_name,
            "llm_response_raw_col_name": llm_response_raw_col_name,
            **options,
        },
        batched=True,
        batch_size=batch_size,
    )

    return input_ds

run_LLM

Run end-to-end LLM inference (from pre-processing input data to post-processing the predictions) and return the computed performance metrics on input validation data

Parameters:

Name Type Description Default
val_data Union[DataFrame, Dataset]

Validation data with labels

required
model AutoModelForCausalLM

LLM artifact.

required
tokenizer Autotokenizer)

LLM tokenizer object

required
ds_column_mapping dict

Dictionary mapping prompt entities to dataset column names.

required
prompt_template_prefix Union[str, None]

Text instruction to prepend to each transformed input text sample. Defaults to "".

''
llm_prompt_col_name str

Name of the column with the built LLM prompts. Defaults to 'llm_prompt'

'llm_prompt'
llm_response_raw_col_name str

Name of the column containing prediction. Defaults to 'llm_response'.

'llm_response'
llm_response_cleaned_col_name str

Name of the column containing the final post processed result. Defaults to 'llm_response_cleaned'

'llm_response_cleaned'
answer_start_token str

Token that indicates the start of answer generation. Defaults to ''

''
answer_end_token str

Token that indicates the end of answer generation. Defaults to ''

''

Returns:

Name Type Description
dict dict

A dictionary containing F1 score.

Source code in dqc/llm_utils/inference.py
def run_LLM(
    val_data: Union[pd.DataFrame, Dataset],
    model: AutoModelForCausalLM,
    tokenizer: AutoTokenizer,
    ds_column_mapping: dict,
    prompt_template_prefix: Union[str, None] = "",
    llm_prompt_col_name: str = "llm_prompt",
    llm_response_raw_col_name: str = "llm_response",
    llm_response_cleaned_col_name: str = "llm_response_cleaned",
    answer_start_token: str = "",
    answer_end_token: str = "",
    **options,
) -> dict:
    """Run end-to-end LLM inference (from pre-processing input data to post-processing the predictions) and return the computed performance metrics on input validation data

    Args:
        val_data (Union[pd.DataFrame, Dataset]): Validation data with labels
        model (AutoModelForCausalLM): LLM artifact.
        tokenizer (Autotokenizer) : LLM tokenizer object
        ds_column_mapping (dict): Dictionary mapping prompt entities to dataset column names.
        prompt_template_prefix (Union[str, None], optional): Text instruction to prepend to each transformed input text sample. Defaults to "".
        llm_prompt_col_name (str, optional): Name of the column with the built LLM prompts. Defaults to 'llm_prompt'
        llm_response_raw_col_name (str, optional): Name of the column containing prediction. Defaults to 'llm_response'.
        llm_response_cleaned_col_name (str, optional): Name of the column containing the final post processed result. Defaults to 'llm_response_cleaned'
        answer_start_token (str, optional): Token that indicates the start of answer generation. Defaults to ''
        answer_end_token (str, optional): Token that indicates the end of answer generation. Defaults to ''

    Returns:
        dict: A dictionary containing F1 score.
    """
    predicted_label_list = []

    val_ds = build_LLM_prompt(
        val_data,
        ds_column_mapping=ds_column_mapping,
        prompt_template_prefix=prompt_template_prefix,
        answer_start_token=answer_start_token,
        llm_prompt_col_name=llm_prompt_col_name,
    )

    val_ds_with_pred = infer_LLM(
        model,
        tokenizer,
        val_ds,
        llm_prompt_col_name=llm_prompt_col_name,
        llm_response_raw_col_name=llm_response_raw_col_name,
        **options,
    )

    val_ds_with_pred = val_ds_with_pred.map(
        _postprocess,
        fn_kwargs={
            "llm_prompt_col_name": llm_prompt_col_name,
            "llm_response_raw_col_name": llm_response_raw_col_name,
            "llm_response_cleaned_col_name": llm_response_cleaned_col_name,
            "answer_end_token": answer_end_token,
        },
    )

    return val_ds_with_pred

compute_selfensembling_confidence_score

Util function to compute confidence score of a given target text using LLM generated reference texts.

Parameters:

Name Type Description Default
example LazyRow

A row of data from a dataset containing the target and reference texts.

required
target_column str

Name of the column containing the target text for estimation of confidence score.

required
reference_column_list List[str]

Names of the columns containing the reference texts to be compared with the target text.

required
scoring_method Union[Callable[[str, str], float], str]

A function or the string 'exact_match' to compute the confidence score. Defaults to 'exact_match'.

'exact_match'
case_sensitive bool

True if string comparisons need to be case aware. Else False. Defaults to False

False

Raises: ValueError: If scoring_method is neither 'exact_match' nor a valid callable function

Returns:

Name Type Description
float float

Score between 0 and 1 quantifying the confidence score for the target text

Source code in dqc/llm_utils/compute_confidence_score.py
def compute_selfensembling_confidence_score(
    example: datasets.formatting.formatting.LazyRow,
    target_column: str,
    reference_column_list: List[str],
    scoring_method: Union[Callable[[str, str], float], str] = "exact_match",
    case_sensitive: bool = False,
    **options,
) -> float:
    """Util function to compute confidence score of a given target text using LLM generated reference texts.

    Args:
        example (datasets.formatting.formatting.LazyRow): A row of data from a dataset containing the target and reference texts.
        target_column (str): Name of the column containing the target text for estimation of confidence score.
        reference_column_list (List[str]): Names of the columns containing the reference texts to be compared with the target text.
        scoring_method (Union[Callable[[str, str], float], str], optional): A function or the string 'exact_match' to compute the confidence score. Defaults to 'exact_match'.
        case_sensitive (bool, optional): `True` if string comparisons need to be case aware. Else `False`. Defaults to `False`
    Raises:
        ValueError: If `scoring_method` is neither 'exact_match' nor a valid callable function

    Returns:
        float: Score between 0 and 1 quantifying the confidence score for the target text
    """
    if not callable(scoring_method) and scoring_method != "exact_match":
        raise ValueError(
            "Parameter `scoring_method` must be 'exact_match' or a valid callable that measures string similarity"
        )

    reference_text_list = []
    result_dict = {}
    score = 0

    for col in reference_column_list:
        reference_text_list.append(example[col])

    target_text = example[target_column]

    if not case_sensitive:
        target_text = target_text.lower()
        reference_text_list = [text.lower() for text in reference_text_list]

    if scoring_method == "exact_match":
        score = _compute_exact_match_score(target_text, reference_text_list)
    else:
        score = _compute_custom_match_score(
            target_text, reference_text_list, scoring_method
        )

    return {"confidence_score": score}