rename folder to prompt_evaluations and update README

2024-09-04 17:06:50 -06:00
parent 8174d9d587
commit c2dd47ec9b
94 changed files with 8 additions and 11 deletions
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/README.md
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/README.md
@@ -0,0 +1,8 @@
+To get started, set your ANTHROPIC_API_KEY environment variable
+
+Then run:
+```
+promptfoo eval
+```
+
+Afterwards, you can view the results by running `promptfoo view`
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/animal_legs_tests.csv
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/animal_legs_tests.csv
@@ -0,0 +1,13 @@
+animal_statement,__expected
+"The animal is a human.","2"
+"The animal is a snake.","0"
+"The fox lost a leg, but then magically grew back the leg he lost and a mysterious extra leg on top of that.","5"
+"The animal is a dog.","4"
+"The animal is a cat with two extra legs.","6"
+"The animal is an elephant.","4"
+"The animal is a bird.","2"
+"The animal is a fish.","0"
+"The animal is a spider with two extra legs","10"
+"The animal is an octopus.","8"
+"The animal is an octopus that lost two legs and then regrew three legs.","9"
+"The animal is a two-headed, eight-legged mythical creature.","8"
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/details.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/details.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_output.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_output.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_output1.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_output1.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_results1.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_results1.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_view.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_view.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_view1.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/eval_view1.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/final_view.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/final_view.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/multi_model_eval_view.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/multi_model_eval_view.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/prompt_foo.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/prompt_foo.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/three_prompt_eval.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/three_prompt_eval.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/images/toolbar.png
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/images/toolbar.png
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/lesson.ipynb
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/lesson.ipynb
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/package-lock.json
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/package-lock.json
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/package.json
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/package.json
@@ -0,0 +1,5 @@
+{
+  "dependencies": {
+    "promptfoo": "^0.78.0"
+  }
+}
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/promptfooconfig.yaml
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/promptfooconfig.yaml
@@ -0,0 +1,18 @@
+description: "Animal Legs Eval"
+
+prompts:
+  - prompts.py:simple_prompt
+  - prompts.py:better_prompt
+  - prompts.py:chain_of_thought_prompt
+  
+providers:
+  - anthropic:messages:claude-3-haiku-20240307
+  - anthropic:messages:claude-3-5-sonnet-20240620
+
+tests: animal_legs_tests.csv
+
+defaultTest:
+  options:
+    transform: file://transform.py
+
+
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/prompts.py
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/prompts.py
@@ -0,0 +1,26 @@
+def simple_prompt(animal_statement):
+    return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
+    
+    Here is the animal statement.
+    <animal_statement>{animal_statement}</animal_statement>
+    
+    How many legs does the animal have? Please respond with a number"""
+
+def better_prompt(animal_statement):
+    return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
+    
+    Here is the animal statement.
+    <animal_statement>{animal_statement}</animal_statement>
+    
+    How many legs does the animal have? Please only respond with a single digit like 2 or 9"""
+
+def chain_of_thought_prompt(animal_statement):
+    return f"""You will be provided a statement about an animal and your job is to determine how many legs that animal has.
+    
+    Here is the animal statement.
+    <animal_statement>{animal_statement}</animal_statement>
+    
+    How many legs does the animal have? 
+    Start by reasoning about the numbers of legs the animal has, thinking step by step inside of <thinking> tags.  
+    Then, output your final answer inside of <answer> tags. 
+    Inside the <answer> tags return just the number of legs as an integer and nothing else."""
--- a/prompt_evaluations/05_prompt_foo_code_graded_animals/transform.py
+++ b/prompt_evaluations/05_prompt_foo_code_graded_animals/transform.py
@@ -0,0 +1,9 @@
+def get_transform(output, context):
+    if "<thinking>" in output:
+        try:
+            return output.split("<answer>")[1].split("</answer>")[0].strip()
+        except Exception as e:
+            print(f"Error in get_transform: {e}")
+            return output
+    return output
+