Skip to main content

Step 1: Install Required Libraries

pip install pydantic requests

Step 2: Set Up API Configuration

import os
import requests
import json
import time

os.environ["OPENAI_API_KEY"] = "YOUR OPENAI API KEY"
BASE_URL = "https://orch.zenbase.ai/api"
API_KEY = "YOUR ZENBASE API KEY"

def api_call(method, endpoint, data=None, files=None):
    url = f"{BASE_URL}/{endpoint}"
    headers = {"Authorization": f"Api-Key {API_KEY}"}
    if not files:
        headers["Content-Type"] = "application/json"
        data = json.dumps(data)
    response = requests.request(method, url, headers=headers, data=data, files=files)
    return response

Step 3: Create and Configure Your Optimizer

First, create your function, dataset, and optimizer configuration as shown in the previous tutorials. Here’s a quick example:
# Create function
function_data = {
    "name": "Textual Entailment Analysis",
    "description": "Analyze the logical relationship between a premise and a hypothesis.",
    "prompt": "Your prompt here...",
    "input_schema": TextualEntailmentInput.model_json_schema(),
    "output_schema": TextualEntailmentOutput.model_json_schema(),
    "api_key": os.environ.get("OPENAI_API_KEY"),
    "model": "gpt-4o-mini"
}
function = api_call("POST", "functions/", function_data)
function_id = function.json()['id']

# Create optimizer configuration
optimizer_data = {
    "function": function_id,
    "magical_set": dataset_id,  # Your dataset ID here
    "parameters": {
        "shots": 2,
    },
    "api_key": os.environ.get("OPENAI_API_KEY"),
    "model": "gpt-4o-mini",
    "optimizer_type": "dynamic_fewshot"
}
optimizer = api_call("POST", "optimizer-configurations/", optimizer_data)
optimizer_id = optimizer.json()['id']

Step 4: Prepare Your Batch Input File

Create a JSON file containing an array of inputs you want to process. Each input should include an object_id for tracking results:
input_data = [
    {
        "inputs": {
            "premise": "The sun rises in the east.",
            "hypothesis": "The sun sets in the west."
        },
        "object_id": 1
    },
    {
        "inputs": {
            "premise": "All birds have feathers.",
            "hypothesis": "Penguins have feathers."
        },
        "object_id": 2
    },
    {
        "inputs": {
            "premise": "Water freezes at 0°C.",
            "hypothesis": "Ice melts above 0°C."
        },
        "object_id": 3
    }
]

# Save inputs to a file
inputs_path = "./batch_inputs.json"
with open(inputs_path, 'w') as f:
    json.dump(input_data, f, indent=2)

Step 5: Submit the Batch Run

Submit your batch run request with the input file:
# Create a dictionary with the uploaded file opened in binary mode
inputs_file = {"file": open(inputs_path, "rb")}

# Submit the batch run
batch_run = api_call(
    "POST", 
    "batch-run/", 
    data={"configuration": optimizer_id}, 
    files=inputs_file
)
batch_run_id = batch_run.json()['id']
print(f"Created batch run with ID: {batch_run_id}")

Step 6: Monitor Batch Run Status

Check the status of your batch run:
while True:
    batch_run_status = api_call("GET", f"batch-run/{batch_run_id}/status").json()['status']
    print(f"Current status: {batch_run_status}")
    
    if batch_run_status == "COMPLETED":
        print("Batch run completed successfully.")
        break
    elif batch_run_status == "FAILED":
        print("Batch run failed.")
        break
        
    print("Batch run is still running. Waiting for 5 seconds before checking again...")
    time.sleep(5)

Step 7: Retrieve Batch Results

Once the batch run is complete, retrieve the results:
# Get all results
final_results = []
results = api_call("GET", f"function-run-logs/?batch_run={batch_run_id}&page=1").json()
final_results.extend(results['results'])

# Handle pagination if needed
count = results['count']
for page in range(2, (count + 9) // 10):
    results = api_call("GET", f"function-run-logs/?batch_run={batch_run_id}&page={page}").json()
    final_results.extend(results['results'])

# Print results
for result in final_results:
    print(f"Object ID: {result['outputs']['object_id']}")
    print(f"Output: {result['outputs']['output']}\n")

Step 8: Get Results for Specific Inputs

You can also retrieve results for specific inputs using their object IDs:
# Get result for object_id = 1
result = api_call("GET", f"function-run-logs/?batch_run={batch_run_id}&page=1&object_id=1").json()
print(f"Result for object_id 1: {result['results'][0]['outputs']}")
The batch run feature allows you to process multiple inputs efficiently in a single request. This is particularly useful when you need to analyze large datasets in parallel. The results are stored and can be retrieved either all at once or individually using object IDs.