GuardRails

Guardrails is an open-source Python package for specifying structure and type, validating and correcting the outputs of large language models (LLMs).

Guardrails has a built-in retry mechanism that allows you to validate if DeepEval metrics are passing.

from typing import Dict
from guardrails.validators import (
    FailResult,
    PassResult,
    register_validator,
    ValidationResult
)
from deepeval.metrics.factual_consistency import FactualConsistencyMetric
from deepeval.test_cases import LLMTestCase

# We are registering a new validator with the name "factual-consistency".
# This validator will be used to check the factual consistency of the generated text.
@register_validator(name="factual-consistency", data_type="string")
def factual_consistency(value: str, metadata: Dict) -> ValidationResult:
    # The 'output' is the generated text and 'context' is the text from which the LLM is supposed to reason and derive conclusions from.
    output = metadata.get('output')
    context = metadata.get('context')
    metric = FactualConsistencyMetric()
    # We use the 'assert_factual_consistency' function from deepeval to get the factual consistency score.

    test_case = LLMTestCase(query="This is an example input", output=output, context=context)
    score = metric.measure(test_case)

    # If the score is greater than the metric's minimum score, give it a PassResult
    if score >= metric.minimum_score:
        return PassResult(metadata)

    # If the score is less than 0.5, we return a FailResult with an appropriate error message.
    return FailResult(
        error_message=f"Factual consistency score {score} is less than {metric.minimum_score}."
    )