rename folder to prompt_evaluations and update README
@@ -0,0 +1,8 @@
|
||||
To get started, set your ANTHROPIC_API_KEY environment variable
|
||||
|
||||
Then run:
|
||||
```
|
||||
promptfoo eval
|
||||
```
|
||||
|
||||
Afterwards, you can view the results by running `promptfoo view`
|
||||
@@ -0,0 +1,13 @@
|
||||
animal_statement,__expected
|
||||
"The animal is a human.","2"
|
||||
"The animal is a snake.","0"
|
||||
"The fox lost a leg, but then magically grew back the leg he lost and a mysterious extra leg on top of that.","5"
|
||||
"The animal is a dog.","4"
|
||||
"The animal is a cat with two extra legs.","6"
|
||||
"The animal is an elephant.","4"
|
||||
"The animal is a bird.","2"
|
||||
"The animal is a fish.","0"
|
||||
"The animal is a spider with two extra legs","10"
|
||||
"The animal is an octopus.","8"
|
||||
"The animal is an octopus that lost two legs and then regrew three legs.","9"
|
||||
"The animal is a two-headed, eight-legged mythical creature.","8"
|
||||
|
|
After Width: | Height: | Size: 87 KiB |
|
After Width: | Height: | Size: 402 KiB |
|
After Width: | Height: | Size: 276 KiB |
|
After Width: | Height: | Size: 69 KiB |
|
After Width: | Height: | Size: 551 KiB |
|
After Width: | Height: | Size: 553 KiB |
|
After Width: | Height: | Size: 580 KiB |
|
After Width: | Height: | Size: 560 KiB |
|
After Width: | Height: | Size: 666 KiB |
|
After Width: | Height: | Size: 446 KiB |
|
After Width: | Height: | Size: 88 KiB |
6667
prompt_evaluations/05_prompt_foo_code_graded_animals/package-lock.json
generated
Normal file
@@ -0,0 +1,5 @@
|
||||
{
|
||||
"dependencies": {
|
||||
"promptfoo": "^0.78.0"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,18 @@
|
||||
description: "Animal Legs Eval"
|
||||
|
||||
prompts:
|
||||
- prompts.py:simple_prompt
|
||||
- prompts.py:better_prompt
|
||||
- prompts.py:chain_of_thought_prompt
|
||||
|
||||
providers:
|
||||
- anthropic:messages:claude-3-haiku-20240307
|
||||
- anthropic:messages:claude-3-5-sonnet-20240620
|
||||
|
||||
tests: animal_legs_tests.csv
|
||||
|
||||
defaultTest:
|
||||
options:
|
||||
transform: file://transform.py
|
||||
|
||||
|
||||
@@ -0,0 +1,26 @@
|
||||
def simple_prompt(animal_statement):
|
||||
return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
|
||||
|
||||
Here is the animal statement.
|
||||
<animal_statement>{animal_statement}</animal_statement>
|
||||
|
||||
How many legs does the animal have? Please respond with a number"""
|
||||
|
||||
def better_prompt(animal_statement):
|
||||
return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
|
||||
|
||||
Here is the animal statement.
|
||||
<animal_statement>{animal_statement}</animal_statement>
|
||||
|
||||
How many legs does the animal have? Please only respond with a single digit like 2 or 9"""
|
||||
|
||||
def chain_of_thought_prompt(animal_statement):
|
||||
return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
|
||||
|
||||
Here is the animal statement.
|
||||
<animal_statement>{animal_statement}</animal_statement>
|
||||
|
||||
How many legs does the animal have?
|
||||
Start by reasoning about the numbers of legs the animal has, thinking step by step inside of <thinking> tags.
|
||||
Then, output your final answer inside of <answer> tags.
|
||||
Inside the <answer> tags return just the number of legs as an integer and nothing else."""
|
||||
@@ -0,0 +1,9 @@
|
||||
def get_transform(output, context):
|
||||
if "<thinking>" in output:
|
||||
try:
|
||||
return output.split("<answer>")[1].split("</answer>")[0].strip()
|
||||
except Exception as e:
|
||||
print(f"Error in get_transform: {e}")
|
||||
return output
|
||||
return output
|
||||
|
||||