code-dot-org · davidsbailey · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024 · Jan 30, 2024
diff --git a/bin/aws_claude_test.py b/bin/aws_claude_test.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python
+
+import boto3
+import json
+bedrock = boto3.client(service_name='bedrock-runtime')
+
+body = json.dumps({
+    "prompt": "\n\nHuman:explain black holes to 8th graders\n\nAssistant:",
+    "max_tokens_to_sample": 300,
+    "temperature": 0.1,
+    "top_p": 0.9,
+})
+
+modelId = 'anthropic.claude-v2'
+accept = 'application/json'
+contentType = 'application/json'
+
+response = bedrock.invoke_model(body=body, modelId=modelId, accept=accept, contentType=contentType)
+
+response_body = json.loads(response.get('body').read())
+# text
+print(response_body.get('completion'))
diff --git a/lib/assessment/config.py b/lib/assessment/config.py
@@ -1,6 +1,13 @@
 VALID_LABELS = ["Extensive Evidence", "Convincing Evidence", "Limited Evidence", "No Evidence"]
 # do not include gpt-4, so that we always know what version of the model we are using.
-SUPPORTED_MODELS = ['gpt-4-0314', 'gpt-4-32k-0314', 'gpt-4-0613', 'gpt-4-32k-0613', 'gpt-4-1106-preview']
+SUPPORTED_MODELS = [
+    'anthropic.claude-v2',
+    'gpt-4-0314',
+    'gpt-4-32k-0314',
+    'gpt-4-0613',
+    'gpt-4-32k-0613',
+    'gpt-4-1106-preview'
+]
 DEFAULT_MODEL = 'gpt-4-0613'
 LESSONS = {
     # "U3-2022-L10" : "1ROCbvHb3yWGVoQqzKAjwdaF0dSRPUjy_",

diff --git a/lib/assessment/label.py b/lib/assessment/label.py
@@ -5,6 +5,7 @@
 import time
 import requests
 import logging
+import boto3
 
 from typing import List, Dict, Any
 from lib.assessment.config import VALID_LABELS
@@ -48,6 +49,44 @@ def statically_label_student_work(self, rubric, student_code, student_id, exampl
         return None
 
     def ai_label_student_work(self, prompt, rubric, student_code, student_id, examples=[], num_responses=0, temperature=0.0, llm_model=""):
+        if llm_model.startswith("gpt"):
+            return self.openai_label_student_work(prompt, rubric, student_code, student_id, examples=examples, num_responses=num_responses, temperature=temperature, llm_model=llm_model)
+        elif llm_model.startswith("anthropic"):
+            return self.anthropic_label_student_work(prompt, rubric, student_code, student_id, examples=examples, num_responses=num_responses, temperature=temperature, llm_model=llm_model)
+        else:
+            raise Exception("Unknown model: {}".format(llm_model))
+
+    def anthropic_label_student_work(self, prompt, rubric, student_code, student_id, examples=[], num_responses=0, temperature=0.0, llm_model=""):
+        bedrock = boto3.client(service_name='bedrock-runtime')
+
+        anthropic_prompt = self.compute_anthropic_prompt(prompt, rubric, student_code, examples=examples)
+        body = json.dumps({
+            "prompt": anthropic_prompt,
+            "max_tokens_to_sample": 1024,
+            "temperature": temperature,
+            # "top_p": 0.9,
+        })
+        accept = 'application/json'
+        content_type = 'application/json'
+        response = bedrock.invoke_model(body=body, modelId=llm_model, accept=accept, contentType=content_type)
+
+        response_body = json.loads(response.get('body').read())
+        generation = response_body.get('completion')
+
+        tsv_data = self.get_tsv_data_if_valid(generation, rubric, student_id, reraise=True)
+
+        return {
+            'metadata': {
+                'agent': 'anthropic',
+                'request': body,
+            },
+            'data': tsv_data,
+        }
+
+    def compute_anthropic_prompt(self, prompt, rubric, student_code, examples=[]):
+            return f"Human:\n{prompt}\n\nRubric:\n{rubric}\n\nStudent Code:\n{student_code}\n\nAssistant:\n"
+
+    def openai_label_student_work(self, prompt, rubric, student_code, student_id, examples=[], num_responses=0, temperature=0.0, llm_model=""):
         # Determine the OpenAI URL and headers
         api_url = 'https://api.openai.com/v1/chat/completions'
         headers = {
@@ -178,6 +217,7 @@ def tsv_data_from_choices(self, info, rubric, student_id):
             tsv_data = self.get_consensus_response(tsv_data_choices, student_id)
         return tsv_data
 
+    # TODO: rename to compute_openai_messages
     def compute_messages(self, prompt, rubric, student_code, examples=[]):
         messages = [
             {'role': 'system', 'content': f"{prompt}\n\nRubric:\n{rubric}"}