rename folder to prompt_evaluations and update README
This commit is contained in:
@@ -0,0 +1,8 @@
|
||||
To get started, set your ANTHROPIC_API_KEY environment variable
|
||||
|
||||
Then run:
|
||||
```
|
||||
promptfoo eval
|
||||
```
|
||||
|
||||
Afterwards, you can view the results by running `promptfoo view`
|
||||
17
prompt_evaluations/07_prompt_foo_custom_graders/count.py
Normal file
17
prompt_evaluations/07_prompt_foo_custom_graders/count.py
Normal file
@@ -0,0 +1,17 @@
|
||||
import re
|
||||
|
||||
def get_assert(output, context):
|
||||
topic = context["vars"]["topic"]
|
||||
goal_count = int(context["vars"]["count"])
|
||||
pattern = fr'(^|\s)\b{re.escape(topic)}\b'
|
||||
|
||||
actual_count = len(re.findall(pattern, output.lower()))
|
||||
|
||||
pass_result = goal_count == actual_count
|
||||
|
||||
result = {
|
||||
"pass": pass_result,
|
||||
"score": 1 if pass_result else 0,
|
||||
"reason": f"Expected {topic} to appear {goal_count} times. Actual: {actual_count}",
|
||||
}
|
||||
return result
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 466 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 578 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 408 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 126 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 325 KiB |
Binary file not shown.
|
After Width: | Height: | Size: 363 KiB |
330
prompt_evaluations/07_prompt_foo_custom_graders/lesson.ipynb
Normal file
330
prompt_evaluations/07_prompt_foo_custom_graders/lesson.ipynb
Normal file
File diff suppressed because one or more lines are too long
@@ -0,0 +1,28 @@
|
||||
description: Count mentions
|
||||
prompts:
|
||||
- >-
|
||||
Write a short paragraph about {{topic}}. Make sure you mention {{topic}} exactly {{count}} times, no more or fewer. Only use lower case letters in your output.
|
||||
providers:
|
||||
- anthropic:messages:claude-3-haiku-20240307
|
||||
- anthropic:messages:claude-3-5-sonnet-20240620
|
||||
defaultTest:
|
||||
assert:
|
||||
- type: python
|
||||
value: file://count.py
|
||||
tests:
|
||||
- vars:
|
||||
topic: sheep
|
||||
count: 3
|
||||
- vars:
|
||||
topic: fowl
|
||||
count: 2
|
||||
- vars:
|
||||
topic: gallows
|
||||
count: 4
|
||||
- vars:
|
||||
topic: tweezers
|
||||
count: 7
|
||||
- vars:
|
||||
topic: jeans
|
||||
count: 6
|
||||
|
||||
Reference in New Issue
Block a user