Loading...
Loading...
Compare original and translation side by side
<evaluation>
<qa_pair>
<question>Your question here</question>
<answer>Single verifiable answer</answer>
</qa_pair>
</evaluation><evaluation>
<qa_pair>
<question>Your question here</question>
<answer>Single verifiable answer</answer>
</qa_pair>
</evaluation>limitlimit<evaluation>
<qa_pair>
<question>Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name?</question>
<answer>Website Redesign</answer>
</qa_pair>
<qa_pair>
<question>Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username.</question>
<answer>sarah_dev</answer>
</qa_pair>
<qa_pair>
<question>Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs?</question>
<answer>7</answer>
</qa_pair>
<qa_pair>
<question>Find the repository with the most stars that was created before 2023. What is the repository name?</question>
<answer>data-pipeline</answer>
</qa_pair>
</evaluation><evaluation>
<qa_pair>
<question>Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name?</question>
<answer>Website Redesign</answer>
</qa_pair>
<qa_pair>
<question>Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username.</question>
<answer>sarah_dev</answer>
</qa_pair>
<qa_pair>
<question>Look for pull requests that modified files in the /api directory and were merged between January 1 and January 31, 2024. How many different contributors worked on these PRs?</question>
<answer>7</answer>
</qa_pair>
<qa_pair>
<question>Find the repository with the most stars that was created before 2023. What is the repository name?</question>
<answer>data-pipeline</answer>
</qa_pair>
</evaluation><qa_pair>
<question>Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository?</question>
<answer>Python</answer>
</qa_pair><qa_pair>
<question>Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time?</question>
<answer>Product Manager</answer>
</qa_pair><qa_pair>
<question>Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username.</question>
<answer>alex_eng</answer>
</qa_pair><qa_pair>
<question>Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in?</question>
<answer>Healthcare</answer>
</qa_pair><qa_pair>
<question>Find the repository that was archived in Q3 2023 and had previously been the most forked project in the organization. What was the primary programming language used in that repository?</question>
<answer>Python</answer>
</qa_pair><qa_pair>
<question>Locate the initiative focused on improving customer onboarding that was completed in late 2023. The project lead created a retrospective document after completion. What was the lead's role title at that time?</question>
<answer>Product Manager</answer>
</qa_pair><qa_pair>
<question>Among all bugs reported in January 2024 that were marked as critical priority, which assignee resolved the highest percentage of their assigned bugs within 48 hours? Provide the assignee's username.</question>
<answer>alex_eng</answer>
</qa_pair><qa_pair>
<question>Find the account that upgraded from the Starter to Enterprise plan in Q4 2023 and had the highest annual contract value. What industry does this account operate in?</question>
<answer>Healthcare</answer>
</qa_pair><qa_pair>
<question>How many open issues are currently assigned to the engineering team?</question>
<answer>47</answer>
</qa_pair><qa_pair>
<question>Find the pull request with title "Add authentication feature" and tell me who created it.</question>
<answer>developer123</answer>
</qa_pair><qa_pair>
<question>List all the repositories that have Python as their primary language.</question>
<answer>repo1, repo2, repo3, data-pipeline, ml-tools</answer>
</qa_pair><qa_pair>
<question>How many open issues are currently assigned to the engineering team?</question>
<answer>47</answer>
</qa_pair><qa_pair>
<question>Find the pull request with title "Add authentication feature" and tell me who created it.</question>
<answer>developer123</answer>
</qa_pair><qa_pair>
<question>List all the repositories that have Python as their primary language.</question>
<answer>repo1, repo2, repo3, data-pipeline, ml-tools</answer>
</qa_pair><qa_pair><qa_pair>pip install -r scripts/requirements.txtpip install anthropic mcpexport ANTHROPIC_API_KEY=your_api_key_herepip install -r scripts/requirements.txtpip install anthropic mcpexport ANTHROPIC_API_KEY=your_api_key_here<qa_pair><evaluation>
<qa_pair>
<question>Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name?</question>
<answer>Website Redesign</answer>
</qa_pair>
<qa_pair>
<question>Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username.</question>
<answer>sarah_dev</answer>
</qa_pair>
</evaluation><qa_pair><evaluation>
<qa_pair>
<question>Find the project created in Q2 2024 with the highest number of completed tasks. What is the project name?</question>
<answer>Website Redesign</answer>
</qa_pair>
<qa_pair>
<question>Search for issues labeled as "bug" that were closed in March 2024. Which user closed the most issues? Provide their username.</question>
<answer>sarah_dev</answer>
</qa_pair>
</evaluation>scripts/evaluation.pyscripts/evaluation.pypython scripts/evaluation.py \
-t stdio \
-c python \
-a my_mcp_server.py \
evaluation.xmlpython scripts/evaluation.py \
-t stdio \
-c python \
-a my_mcp_server.py \
-e API_KEY=abc123 \
-e DEBUG=true \
evaluation.xmlpython scripts/evaluation.py \
-t stdio \
-c python \
-a my_mcp_server.py \
evaluation.xmlpython scripts/evaluation.py \
-t stdio \
-c python \
-a my_mcp_server.py \
-e API_KEY=abc123 \
-e DEBUG=true \
evaluation.xmlpython scripts/evaluation.py \
-t sse \
-u https://example.com/mcp \
-H "Authorization: Bearer token123" \
-H "X-Custom-Header: value" \
evaluation.xmlpython scripts/evaluation.py \
-t sse \
-u https://example.com/mcp \
-H "Authorization: Bearer token123" \
-H "X-Custom-Header: value" \
evaluation.xmlpython scripts/evaluation.py \
-t http \
-u https://example.com/mcp \
-H "Authorization: Bearer token123" \
evaluation.xmlpython scripts/evaluation.py \
-t http \
-u https://example.com/mcp \
-H "Authorization: Bearer token123" \
evaluation.xmlusage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND]
[-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL]
[-H HEADERS [HEADERS ...]] [-o OUTPUT]
eval_file
positional arguments:
eval_file Path to evaluation XML file
optional arguments:
-h, --help Show help message
-t, --transport Transport type: stdio, sse, or http (default: stdio)
-m, --model Claude model to use (default: claude-3-7-sonnet-20250219)
-o, --output Output file for report (default: print to stdout)
stdio options:
-c, --command Command to run MCP server (e.g., python, node)
-a, --args Arguments for the command (e.g., server.py)
-e, --env Environment variables in KEY=VALUE format
sse/http options:
-u, --url MCP server URL
-H, --header HTTP headers in 'Key: Value' formatusage: evaluation.py [-h] [-t {stdio,sse,http}] [-m MODEL] [-c COMMAND]
[-a ARGS [ARGS ...]] [-e ENV [ENV ...]] [-u URL]
[-H HEADERS [HEADERS ...]] [-o OUTPUT]
eval_file
positional arguments:
eval_file Path to evaluation XML file
optional arguments:
-h, --help Show help message
-t, --transport Transport type: stdio, sse, or http (default: stdio)
-m, --model Claude model to use (default: claude-3-7-sonnet-20250219)
-o, --output Output file for report (default: print to stdout)
stdio options:
-c, --command Command to run MCP server (e.g., python, node)
-a, --args Arguments for the command (e.g., server.py)
-e, --env Environment variables in KEY=VALUE format
sse/http options:
-u, --url MCP server URL
-H, --header HTTP headers in 'Key: Value' formatpython scripts/evaluation.py \
-t stdio \
-c python \
-a my_server.py \
-o evaluation_report.md \
evaluation.xmlpython scripts/evaluation.py \
-t stdio \
-c python \
-a my_server.py \
-o evaluation_report.md \
evaluation.xmlmy_evaluation.xml<evaluation>
<qa_pair>
<question>Find the user who created the most issues in January 2024. What is their username?</question>
<answer>alice_developer</answer>
</qa_pair>
<qa_pair>
<question>Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name.</question>
<answer>backend-api</answer>
</qa_pair>
<qa_pair>
<question>Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take?</question>
<answer>127</answer>
</qa_pair>
</evaluation>pip install -r scripts/requirements.txt
export ANTHROPIC_API_KEY=your_api_keypython scripts/evaluation.py \
-t stdio \
-c python \
-a github_mcp_server.py \
-e GITHUB_TOKEN=ghp_xxx \
-o github_eval_report.md \
my_evaluation.xmlgithub_eval_report.mdmy_evaluation.xml<evaluation>
<qa_pair>
<question>Find the user who created the most issues in January 2024. What is their username?</question>
<answer>alice_developer</answer>
</qa_pair>
<qa_pair>
<question>Among all pull requests merged in Q1 2024, which repository had the highest number? Provide the repository name.</question>
<answer>backend-api</answer>
</qa_pair>
<qa_pair>
<question>Find the project that was completed in December 2023 and had the longest duration from start to finish. How many days did it take?</question>
<answer>127</answer>
</qa_pair>
</evaluation>pip install -r scripts/requirements.txt
export ANTHROPIC_API_KEY=your_api_keypython scripts/evaluation.py \
-t stdio \
-c python \
-a github_mcp_server.py \
-e GITHUB_TOKEN=ghp_xxx \
-o github_eval_report.md \
my_evaluation.xmlgithub_eval_report.mdmcpmcp-vector-search mcpmcp-ticketer mcp --path <repo>mcp-browser mcpsetupinstalldoctor.mcp.jsontype: stdiocommandenv.mcp.json{
"mcpServers": {
"mcp-vector-search": {
"type": "stdio",
"command": "uv",
"args": ["run", "mcp-vector-search", "mcp"],
"env": {
"MCP_ENABLE_FILE_WATCHING": "true"
}
}
}
}MCP_TICKETER_ADAPTERKUZU_MEMORY_DBmcpmcp-vector-search mcpmcp-ticketer mcp --path <repo>mcp-browser mcpsetupinstalldoctor.mcp.jsontype: stdiocommandenv.mcp.json{
"mcpServers": {
"mcp-vector-search": {
"type": "stdio",
"command": "uv",
"args": ["run", "mcp-vector-search", "mcp"],
"env": {
"MCP_ENABLE_FILE_WATCHING": "true"
}
}
}
}MCP_TICKETER_ADAPTERKUZU_MEMORY_DBclaude-3-7-sonnet-20250219claude-3-7-sonnet-20250219