|
3 | 3 | using System; |
4 | 4 | using System.Collections.Generic; |
5 | 5 | using System.Linq; |
| 6 | +using System.Threading; |
6 | 7 | using System.Threading.Tasks; |
7 | 8 | using Microsoft.Extensions.AI; |
8 | 9 | using Microsoft.Extensions.AI.Evaluation; |
@@ -1415,4 +1416,165 @@ public void EvalItem_ConversationConstructor_NullSplitter_DefaultsToLastTurn() |
1415 | 1416 | Assert.Equal("Q2", item.Query); |
1416 | 1417 | Assert.Equal("A2", item.Response); |
1417 | 1418 | } |
| 1419 | + |
| 1420 | + // --------------------------------------------------------------- |
| 1421 | + // EvalItem.PerTurnItems edge case tests |
| 1422 | + // --------------------------------------------------------------- |
| 1423 | + |
| 1424 | + [Fact] |
| 1425 | + public void PerTurnItems_EmptyConversation_ReturnsEmpty() |
| 1426 | + { |
| 1427 | + var result = EvalItem.PerTurnItems(new List<ChatMessage>()); |
| 1428 | + Assert.Empty(result); |
| 1429 | + } |
| 1430 | + |
| 1431 | + [Fact] |
| 1432 | + public void PerTurnItems_NoUserMessages_ReturnsEmpty() |
| 1433 | + { |
| 1434 | + var conversation = new List<ChatMessage> |
| 1435 | + { |
| 1436 | + new(ChatRole.System, "You are a helpful assistant."), |
| 1437 | + new(ChatRole.Assistant, "Hello! How can I help?"), |
| 1438 | + }; |
| 1439 | + |
| 1440 | + var result = EvalItem.PerTurnItems(conversation); |
| 1441 | + Assert.Empty(result); |
| 1442 | + } |
| 1443 | + |
| 1444 | + [Fact] |
| 1445 | + public void PerTurnItems_SystemAndAssistantOnly_ReturnsEmpty() |
| 1446 | + { |
| 1447 | + var conversation = new List<ChatMessage> |
| 1448 | + { |
| 1449 | + new(ChatRole.System, "Be helpful"), |
| 1450 | + new(ChatRole.Assistant, "First"), |
| 1451 | + new(ChatRole.Assistant, "Second"), |
| 1452 | + }; |
| 1453 | + |
| 1454 | + var result = EvalItem.PerTurnItems(conversation); |
| 1455 | + Assert.Empty(result); |
| 1456 | + } |
| 1457 | + |
| 1458 | + // --------------------------------------------------------------- |
| 1459 | + // MeaiEvaluatorAdapter tests |
| 1460 | + // --------------------------------------------------------------- |
| 1461 | + |
| 1462 | + [Fact] |
| 1463 | + public async Task MeaiEvaluatorAdapter_PassesQueryMessagesAndResponse_ToEvaluatorAsync() |
| 1464 | + { |
| 1465 | + // Arrange: a stub evaluator that records what it receives |
| 1466 | + var stub = new StubEvaluator(); |
| 1467 | + var adapter = new MeaiEvaluatorAdapter(stub, new ChatConfiguration(new StubChatClient())); |
| 1468 | + |
| 1469 | + var conversation = new List<ChatMessage> |
| 1470 | + { |
| 1471 | + new(ChatRole.User, "What is 2+2?"), |
| 1472 | + new(ChatRole.Assistant, "4"), |
| 1473 | + }; |
| 1474 | + var items = new List<EvalItem> |
| 1475 | + { |
| 1476 | + new("What is 2+2?", "4", conversation), |
| 1477 | + }; |
| 1478 | + |
| 1479 | + // Act |
| 1480 | + var results = await adapter.EvaluateAsync(items); |
| 1481 | + |
| 1482 | + // Assert: evaluator was called once with correct data |
| 1483 | + Assert.Single(stub.Calls); |
| 1484 | + |
| 1485 | + // The adapter passes Split() query messages (not the full conversation) |
| 1486 | + var (messages, response, _) = stub.Calls[0]; |
| 1487 | + Assert.Single(messages); |
| 1488 | + Assert.Equal(ChatRole.User, messages[0].Role); |
| 1489 | + Assert.Equal("What is 2+2?", messages[0].Text); |
| 1490 | + |
| 1491 | + // Response should be a ChatResponse with the assistant text |
| 1492 | + Assert.Equal("4", response.Messages.Last().Text); |
| 1493 | + |
| 1494 | + // Results should have inputItems populated |
| 1495 | + Assert.NotNull(results.InputItems); |
| 1496 | + Assert.Single(results.InputItems); |
| 1497 | + Assert.Equal("StubEvaluator", results.ProviderName); |
| 1498 | + } |
| 1499 | + |
| 1500 | + [Fact] |
| 1501 | + public async Task MeaiEvaluatorAdapter_SyntheticResponse_WhenNoRawResponseAsync() |
| 1502 | + { |
| 1503 | + // When RawResponse is null, the adapter creates a synthetic ChatResponse |
| 1504 | + var stub = new StubEvaluator(); |
| 1505 | + var adapter = new MeaiEvaluatorAdapter(stub, new ChatConfiguration(new StubChatClient())); |
| 1506 | + |
| 1507 | + var items = new List<EvalItem> |
| 1508 | + { |
| 1509 | + new("query", "my response"), |
| 1510 | + }; |
| 1511 | + |
| 1512 | + await adapter.EvaluateAsync(items); |
| 1513 | + |
| 1514 | + var (_, response, _) = stub.Calls[0]; |
| 1515 | + Assert.Equal(ChatRole.Assistant, response.Messages.Last().Role); |
| 1516 | + Assert.Equal("my response", response.Messages.Last().Text); |
| 1517 | + } |
| 1518 | + |
| 1519 | + [Fact] |
| 1520 | + public async Task MeaiEvaluatorAdapter_MultipleItems_AggregatesResultsAsync() |
| 1521 | + { |
| 1522 | + var stub = new StubEvaluator(); |
| 1523 | + var adapter = new MeaiEvaluatorAdapter(stub, new ChatConfiguration(new StubChatClient())); |
| 1524 | + |
| 1525 | + var items = new List<EvalItem> |
| 1526 | + { |
| 1527 | + new("q1", "r1"), |
| 1528 | + new("q2", "r2"), |
| 1529 | + }; |
| 1530 | + |
| 1531 | + var results = await adapter.EvaluateAsync(items); |
| 1532 | + |
| 1533 | + Assert.Equal(2, stub.Calls.Count); |
| 1534 | + Assert.Equal(2, results.Items.Count); |
| 1535 | + Assert.Equal(2, results.Total); |
| 1536 | + } |
| 1537 | + |
| 1538 | + /// <summary>Stub IEvaluator that records calls and returns a fixed BooleanMetric.</summary> |
| 1539 | + private sealed class StubEvaluator : IEvaluator |
| 1540 | + { |
| 1541 | + public List<(List<ChatMessage> Messages, ChatResponse Response, ChatConfiguration Config)> Calls { get; } = new(); |
| 1542 | + |
| 1543 | + public IReadOnlyCollection<string> EvaluationMetricNames { get; } = ["stub_check"]; |
| 1544 | + |
| 1545 | + public ValueTask<EvaluationResult> EvaluateAsync( |
| 1546 | + IEnumerable<ChatMessage> messages, |
| 1547 | + ChatResponse modelResponse, |
| 1548 | + ChatConfiguration? chatConfiguration = null, |
| 1549 | + IEnumerable<EvaluationContext>? additionalContext = null, |
| 1550 | + CancellationToken cancellationToken = default) |
| 1551 | + { |
| 1552 | + this.Calls.Add((messages.ToList(), modelResponse, chatConfiguration!)); |
| 1553 | + var result = new EvaluationResult(new BooleanMetric("stub_check", true)); |
| 1554 | + return new ValueTask<EvaluationResult>(result); |
| 1555 | + } |
| 1556 | + } |
| 1557 | + |
| 1558 | + /// <summary>Minimal IChatClient stub for ChatConfiguration (never called).</summary> |
| 1559 | + private sealed class StubChatClient : IChatClient |
| 1560 | + { |
| 1561 | + public void Dispose() |
| 1562 | + { |
| 1563 | + } |
| 1564 | + |
| 1565 | + public Task<ChatResponse> GetResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default) |
| 1566 | + { |
| 1567 | + throw new NotImplementedException(); |
| 1568 | + } |
| 1569 | + |
| 1570 | + public IAsyncEnumerable<ChatResponseUpdate> GetStreamingResponseAsync(IEnumerable<ChatMessage> messages, ChatOptions? options = null, CancellationToken cancellationToken = default) |
| 1571 | + { |
| 1572 | + throw new NotImplementedException(); |
| 1573 | + } |
| 1574 | + |
| 1575 | + public object? GetService(Type serviceType, object? serviceKey = null) |
| 1576 | + { |
| 1577 | + return null; |
| 1578 | + } |
| 1579 | + } |
1418 | 1580 | } |
0 commit comments