Skip to content

Commit 5c08471

Browse files
Merge pull request #605 from erikdarlingdata/feature/finops-test-seeder
Add FinOps test infrastructure — scoring extraction and scenario seeding
2 parents 87bad78 + ed82dd0 commit 5c08471

2 files changed

Lines changed: 444 additions & 58 deletions

File tree

Lite/Analysis/TestDataSeeder.cs

Lines changed: 368 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1397,4 +1397,372 @@ INSERT INTO database_size_stats
13971397
await cmd.ExecuteNonQueryAsync();
13981398
}
13991399
}
1400+
1401+
// ============================================
1402+
// FinOps Test Scenarios
1403+
// ============================================
1404+
1405+
/// <summary>
1406+
/// Scenario 1: Over-provisioned Enterprise server.
1407+
/// 32 cores, 256GB RAM, but avg CPU 8%, buffer pool only 40GB of 256GB.
1408+
///
1409+
/// Expected recommendations:
1410+
/// - CPU right-sizing (P95 &lt; 30%, many cores)
1411+
/// - Memory right-sizing (buffer pool &lt; 50% of physical RAM)
1412+
/// </summary>
1413+
public async Task SeedOverProvisionedEnterpriseAsync()
1414+
{
1415+
await ClearTestDataAsync();
1416+
await SeedTestServerAsync();
1417+
1418+
// 32 cores, 256GB RAM, but avg CPU 8%, buffer pool only 40GB of 256GB
1419+
await SeedCpuUtilizationAsync(8, 2);
1420+
await SeedMemoryStatsAsync(totalPhysicalMb: 262_144, bufferPoolMb: 40_960, targetMb: 245_760);
1421+
await SeedServerPropertiesAsync(cpuCount: 32, htRatio: 2, physicalMemMb: 262_144,
1422+
edition: "Enterprise Edition");
1423+
await SeedFileSizeAsync(totalDataSizeMb: 51_200); // 50GB — tiny for 256GB RAM
1424+
}
1425+
1426+
/// <summary>
1427+
/// Scenario 2: Idle databases with cost impact.
1428+
/// 3 databases seeded — only 1 has query activity, the other 2 are idle.
1429+
///
1430+
/// Expected recommendations:
1431+
/// - Dormant database detection (2 idle databases)
1432+
/// </summary>
1433+
public async Task SeedIdleDatabasesAsync()
1434+
{
1435+
await ClearTestDataAsync();
1436+
await SeedTestServerAsync();
1437+
1438+
// Seed database sizes for 3 databases + query activity for only 1
1439+
await SeedDatabaseSizesForIdleTestAsync();
1440+
await SeedQueryStatsForDatabaseAsync("ActiveDB", executions: 5000, cpuMs: 100_000);
1441+
}
1442+
1443+
/// <summary>
1444+
/// Scenario 3: High impact query skew — one query consuming 80%+ of CPU.
1445+
///
1446+
/// Expected: HighImpactScorer.Score() returns query "AAAA" with dominant CpuShare.
1447+
/// </summary>
1448+
public async Task SeedHighImpactQuerySkewAsync()
1449+
{
1450+
await ClearTestDataAsync();
1451+
await SeedTestServerAsync();
1452+
1453+
// 5 queries: one uses 80% CPU, rest split the remaining 20%
1454+
await SeedQueryStatsForHighImpactAsync();
1455+
}
1456+
1457+
/// <summary>
1458+
/// Scenario 4: Dev/test databases on a production server.
1459+
/// Seeds database_size_stats with databases named "staging_app", "dev_analytics", "test_warehouse".
1460+
///
1461+
/// NOTE: The recommendation engine detects dev/test databases via a LIVE SQL query
1462+
/// (sys.databases WHERE name LIKE '%dev%'). This won't fire against DuckDB test data.
1463+
/// The scenario documents the expected behavior but the live check will silently fail.
1464+
/// Use this scenario to test the idle-database detection path instead.
1465+
/// </summary>
1466+
public async Task SeedDevTestDatabasesAsync()
1467+
{
1468+
await ClearTestDataAsync();
1469+
await SeedTestServerAsync();
1470+
1471+
await SeedDatabaseSizesWithNamesAsync("staging_app", "dev_analytics", "test_warehouse", "ProductionDB");
1472+
}
1473+
1474+
/// <summary>
1475+
/// Scenario 5: Long-running maintenance jobs.
1476+
/// Seeds running_jobs with a job that ran long 5+ times in 7 days.
1477+
///
1478+
/// Expected recommendations:
1479+
/// - Maintenance window efficiency warning
1480+
/// </summary>
1481+
public async Task SeedLongRunningJobsAsync()
1482+
{
1483+
await ClearTestDataAsync();
1484+
await SeedTestServerAsync();
1485+
1486+
await SeedRunningJobsForMaintenanceTestAsync();
1487+
}
1488+
1489+
/// <summary>
1490+
/// Scenario 6: Clean FinOps server — no recommendations expected.
1491+
/// Healthy CPU (50%), good buffer pool ratio (75%), no idle databases.
1492+
///
1493+
/// Expected: empty or minimal recommendation list.
1494+
/// </summary>
1495+
public async Task SeedCleanFinOpsServerAsync()
1496+
{
1497+
await ClearTestDataAsync();
1498+
await SeedTestServerAsync();
1499+
1500+
// Healthy: 50% CPU, 75% buffer pool ratio, no idle databases
1501+
await SeedCpuUtilizationAsync(50, 5);
1502+
await SeedMemoryStatsAsync(totalPhysicalMb: 65_536, bufferPoolMb: 49_152, targetMb: 57_344);
1503+
await SeedServerPropertiesAsync(cpuCount: 8, htRatio: 2, physicalMemMb: 65_536,
1504+
edition: "Developer Edition");
1505+
await SeedFileSizeAsync(totalDataSizeMb: 204_800); // 200GB
1506+
}
1507+
1508+
// ============================================
1509+
// FinOps Test Runner Methods
1510+
// ============================================
1511+
1512+
/// <summary>
1513+
/// Runs the FinOps recommendation engine against test data.
1514+
/// Pass empty strings for connectionString/utilityConnectionString to skip live SQL checks.
1515+
/// </summary>
1516+
public async Task<List<PerformanceMonitorLite.Services.RecommendationRow>> RunFinOpsRecommendationsAsync(
1517+
PerformanceMonitorLite.Services.LocalDataService dataService, decimal monthlyCost = 10000m)
1518+
{
1519+
return await dataService.GetRecommendationsAsync(TestServerId, "", "", monthlyCost);
1520+
}
1521+
1522+
/// <summary>
1523+
/// Runs the High Impact scorer against test data.
1524+
/// </summary>
1525+
public async Task<List<PerformanceMonitorLite.Services.HighImpactQueryRow>> RunHighImpactAnalysisAsync(
1526+
PerformanceMonitorLite.Services.LocalDataService dataService, int hoursBack = 24)
1527+
{
1528+
return await dataService.GetHighImpactQueriesAsync(TestServerId, hoursBack);
1529+
}
1530+
1531+
// ============================================
1532+
// FinOps Seed Helpers
1533+
// ============================================
1534+
1535+
/// <summary>
1536+
/// Seeds database_size_stats with 3 databases for idle-database testing.
1537+
/// "ActiveDB" will have query_stats activity (seeded separately).
1538+
/// "OldReportsDB" (50GB) and "ArchiveDB" (100GB) have no activity — should be detected as idle.
1539+
/// </summary>
1540+
internal async Task SeedDatabaseSizesForIdleTestAsync()
1541+
{
1542+
using var readLock = _duckDb.AcquireReadLock();
1543+
using var connection = _duckDb.CreateConnection();
1544+
await connection.OpenAsync();
1545+
1546+
var databases = new (string name, int dbId, decimal totalSizeMb)[]
1547+
{
1548+
("ActiveDB", 10, 20_480), // 20GB — active
1549+
("OldReportsDB", 11, 51_200), // 50GB — idle
1550+
("ArchiveDB", 12, 102_400), // 100GB — idle
1551+
};
1552+
1553+
foreach (var (name, dbId, totalSizeMb) in databases)
1554+
{
1555+
using var cmd = connection.CreateCommand();
1556+
cmd.CommandText = @"
1557+
INSERT INTO database_size_stats
1558+
(collection_id, collection_time, server_id, server_name,
1559+
database_name, database_id, file_id, file_type_desc, file_name, physical_name,
1560+
total_size_mb, used_size_mb)
1561+
VALUES ($1, $2, $3, $4, $5, $6, 1, 'ROWS', $7, $8, $9, $10)";
1562+
1563+
cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- });
1564+
cmd.Parameters.Add(new DuckDBParameter { Value = TestPeriodEnd });
1565+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1566+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1567+
cmd.Parameters.Add(new DuckDBParameter { Value = name });
1568+
cmd.Parameters.Add(new DuckDBParameter { Value = dbId });
1569+
cmd.Parameters.Add(new DuckDBParameter { Value = $"{name}.mdf" });
1570+
cmd.Parameters.Add(new DuckDBParameter { Value = $"D:\\Data\\{name}.mdf" });
1571+
cmd.Parameters.Add(new DuckDBParameter { Value = totalSizeMb });
1572+
cmd.Parameters.Add(new DuckDBParameter { Value = totalSizeMb * 0.8m }); // 80% used
1573+
1574+
await cmd.ExecuteNonQueryAsync();
1575+
}
1576+
}
1577+
1578+
/// <summary>
1579+
/// Seeds query_stats with activity for a specific database.
1580+
/// Used to mark a database as "active" so it's excluded from idle detection.
1581+
/// </summary>
1582+
internal async Task SeedQueryStatsForDatabaseAsync(string databaseName, long executions, long cpuMs)
1583+
{
1584+
using var readLock = _duckDb.AcquireReadLock();
1585+
using var connection = _duckDb.CreateConnection();
1586+
await connection.OpenAsync();
1587+
1588+
// Spread across 16 collection points so it falls within time-range queries
1589+
var execsPerPoint = executions / 16;
1590+
var cpuPerPoint = cpuMs * 1000 / 16; // convert ms to microseconds for delta_worker_time
1591+
1592+
for (var i = 0; i < 16; i++)
1593+
{
1594+
using var cmd = connection.CreateCommand();
1595+
cmd.CommandText = @"
1596+
INSERT INTO query_stats
1597+
(collection_id, collection_time, server_id, server_name,
1598+
database_name, query_hash, delta_execution_count,
1599+
delta_worker_time, delta_elapsed_time, delta_logical_reads)
1600+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)";
1601+
1602+
var t = TestPeriodStart.AddMinutes(i * 15);
1603+
cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- });
1604+
cmd.Parameters.Add(new DuckDBParameter { Value = t });
1605+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1606+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1607+
cmd.Parameters.Add(new DuckDBParameter { Value = databaseName });
1608+
cmd.Parameters.Add(new DuckDBParameter { Value = $"0xACTIVE{i:D4}" });
1609+
cmd.Parameters.Add(new DuckDBParameter { Value = execsPerPoint });
1610+
cmd.Parameters.Add(new DuckDBParameter { Value = cpuPerPoint });
1611+
cmd.Parameters.Add(new DuckDBParameter { Value = cpuPerPoint * 2 });
1612+
cmd.Parameters.Add(new DuckDBParameter { Value = execsPerPoint * 500L });
1613+
1614+
await cmd.ExecuteNonQueryAsync();
1615+
}
1616+
}
1617+
1618+
/// <summary>
1619+
/// Seeds query_stats for high-impact skew testing.
1620+
/// 5 queries with one dominant (80% CPU):
1621+
/// AAAA — 800,000ms CPU, 10,000 executions (the monster)
1622+
/// BBBB — 50,000ms CPU, 5,000 executions
1623+
/// CCCC — 50,000ms CPU, 2,000 executions
1624+
/// DDDD — 50,000ms CPU, 1,000 executions
1625+
/// EEEE — 50,000ms CPU, 500 executions
1626+
/// </summary>
1627+
internal async Task SeedQueryStatsForHighImpactAsync()
1628+
{
1629+
using var readLock = _duckDb.AcquireReadLock();
1630+
using var connection = _duckDb.CreateConnection();
1631+
await connection.OpenAsync();
1632+
1633+
var queries = new (string hash, long cpuMs, long executions, long reads, long writes, long memoryKb)[]
1634+
{
1635+
("AAAA", 800_000, 10_000, 50_000_000, 1_000_000, 512_000), // The monster
1636+
("BBBB", 50_000, 5_000, 5_000_000, 100_000, 64_000),
1637+
("CCCC", 50_000, 2_000, 3_000_000, 50_000, 32_000),
1638+
("DDDD", 50_000, 1_000, 2_000_000, 25_000, 16_000),
1639+
("EEEE", 50_000, 500, 1_000_000, 10_000, 8_000),
1640+
};
1641+
1642+
foreach (var (hash, cpuMs, executions, reads, writes, memoryKb) in queries)
1643+
{
1644+
using var cmd = connection.CreateCommand();
1645+
cmd.CommandText = @"
1646+
INSERT INTO query_stats
1647+
(collection_id, collection_time, server_id, server_name,
1648+
database_name, query_hash, query_text,
1649+
delta_execution_count, delta_worker_time, delta_elapsed_time,
1650+
delta_logical_reads, delta_logical_writes, max_grant_kb)
1651+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13)";
1652+
1653+
cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- });
1654+
cmd.Parameters.Add(new DuckDBParameter { Value = TestPeriodEnd.AddMinutes(-30) }); // recent
1655+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1656+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1657+
cmd.Parameters.Add(new DuckDBParameter { Value = "UserDB" });
1658+
cmd.Parameters.Add(new DuckDBParameter { Value = hash });
1659+
cmd.Parameters.Add(new DuckDBParameter { Value = $"SELECT /* {hash} */ * FROM dbo.SomeTable" });
1660+
cmd.Parameters.Add(new DuckDBParameter { Value = executions });
1661+
cmd.Parameters.Add(new DuckDBParameter { Value = cpuMs * 1000L }); // microseconds
1662+
cmd.Parameters.Add(new DuckDBParameter { Value = cpuMs * 2000L }); // elapsed ~2x CPU
1663+
cmd.Parameters.Add(new DuckDBParameter { Value = reads });
1664+
cmd.Parameters.Add(new DuckDBParameter { Value = writes });
1665+
cmd.Parameters.Add(new DuckDBParameter { Value = memoryKb });
1666+
1667+
await cmd.ExecuteNonQueryAsync();
1668+
}
1669+
}
1670+
1671+
/// <summary>
1672+
/// Seeds database_size_stats with named databases.
1673+
/// Used for dev/test detection testing and general size seeding.
1674+
/// </summary>
1675+
internal async Task SeedDatabaseSizesWithNamesAsync(params string[] databaseNames)
1676+
{
1677+
using var readLock = _duckDb.AcquireReadLock();
1678+
using var connection = _duckDb.CreateConnection();
1679+
await connection.OpenAsync();
1680+
1681+
for (var i = 0; i < databaseNames.Length; i++)
1682+
{
1683+
var name = databaseNames[i];
1684+
var sizeMb = 10_240m + (i * 5_120m); // 10GB, 15GB, 20GB, 25GB, ...
1685+
1686+
using var cmd = connection.CreateCommand();
1687+
cmd.CommandText = @"
1688+
INSERT INTO database_size_stats
1689+
(collection_id, collection_time, server_id, server_name,
1690+
database_name, database_id, file_id, file_type_desc, file_name, physical_name,
1691+
total_size_mb, used_size_mb)
1692+
VALUES ($1, $2, $3, $4, $5, $6, 1, 'ROWS', $7, $8, $9, $10)";
1693+
1694+
cmd.Parameters.Add(new DuckDBParameter { Value = _nextId-- });
1695+
cmd.Parameters.Add(new DuckDBParameter { Value = TestPeriodEnd });
1696+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1697+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1698+
cmd.Parameters.Add(new DuckDBParameter { Value = name });
1699+
cmd.Parameters.Add(new DuckDBParameter { Value = 10 + i }); // database_id
1700+
cmd.Parameters.Add(new DuckDBParameter { Value = $"{name}.mdf" });
1701+
cmd.Parameters.Add(new DuckDBParameter { Value = $"D:\\Data\\{name}.mdf" });
1702+
cmd.Parameters.Add(new DuckDBParameter { Value = sizeMb });
1703+
cmd.Parameters.Add(new DuckDBParameter { Value = sizeMb * 0.7m }); // 70% used
1704+
1705+
await cmd.ExecuteNonQueryAsync();
1706+
}
1707+
}
1708+
1709+
/// <summary>
1710+
/// Seeds running_jobs for maintenance window testing.
1711+
/// Creates a "Weekly Index Rebuild" job that ran long 5 times in 7 days,
1712+
/// and a normal "Stats Update" job for contrast.
1713+
/// </summary>
1714+
internal async Task SeedRunningJobsForMaintenanceTestAsync()
1715+
{
1716+
using var readLock = _duckDb.AcquireReadLock();
1717+
using var connection = _duckDb.CreateConnection();
1718+
await connection.OpenAsync();
1719+
1720+
// "Weekly Index Rebuild" — ran long 5 times
1721+
var jobId = Guid.NewGuid().ToString();
1722+
for (var i = 0; i < 5; i++)
1723+
{
1724+
using var cmd = connection.CreateCommand();
1725+
cmd.CommandText = @"
1726+
INSERT INTO running_jobs
1727+
(collection_time, server_id, server_name, job_name, job_id,
1728+
job_enabled, start_time, current_duration_seconds,
1729+
avg_duration_seconds, p95_duration_seconds, successful_run_count,
1730+
is_running_long, percent_of_average)
1731+
VALUES ($1, $2, $3, $4, $5, true, $6, $7, $8, $9, 50, true, $10)";
1732+
1733+
// Spread collections across the 7-day window the recommendation engine queries
1734+
var collectionTime = DateTime.UtcNow.AddDays(-6).AddDays(i * 1.2);
1735+
cmd.Parameters.Add(new DuckDBParameter { Value = collectionTime });
1736+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1737+
cmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1738+
cmd.Parameters.Add(new DuckDBParameter { Value = "Weekly Index Rebuild" });
1739+
cmd.Parameters.Add(new DuckDBParameter { Value = jobId });
1740+
cmd.Parameters.Add(new DuckDBParameter { Value = collectionTime.AddSeconds(-900) }); // started 15min ago
1741+
cmd.Parameters.Add(new DuckDBParameter { Value = 900L }); // current_duration_seconds (15min)
1742+
cmd.Parameters.Add(new DuckDBParameter { Value = 300L }); // avg_duration_seconds (5min historical)
1743+
cmd.Parameters.Add(new DuckDBParameter { Value = 450L }); // p95_duration_seconds
1744+
cmd.Parameters.Add(new DuckDBParameter { Value = 300.0 }); // percent_of_average = 300%
1745+
1746+
await cmd.ExecuteNonQueryAsync();
1747+
}
1748+
1749+
// "Stats Update" — normal job, not running long
1750+
using var normalCmd = connection.CreateCommand();
1751+
normalCmd.CommandText = @"
1752+
INSERT INTO running_jobs
1753+
(collection_time, server_id, server_name, job_name, job_id,
1754+
job_enabled, start_time, current_duration_seconds,
1755+
avg_duration_seconds, p95_duration_seconds, successful_run_count,
1756+
is_running_long, percent_of_average)
1757+
VALUES ($1, $2, $3, $4, $5, true, $6, 120, 100, 130, 200, false, 120.0)";
1758+
1759+
normalCmd.Parameters.Add(new DuckDBParameter { Value = TestPeriodEnd.AddMinutes(-5) });
1760+
normalCmd.Parameters.Add(new DuckDBParameter { Value = TestServerId });
1761+
normalCmd.Parameters.Add(new DuckDBParameter { Value = TestServerName });
1762+
normalCmd.Parameters.Add(new DuckDBParameter { Value = "Stats Update" });
1763+
normalCmd.Parameters.Add(new DuckDBParameter { Value = Guid.NewGuid().ToString() });
1764+
normalCmd.Parameters.Add(new DuckDBParameter { Value = TestPeriodEnd.AddMinutes(-7) });
1765+
1766+
await normalCmd.ExecuteNonQueryAsync();
1767+
}
14001768
}

0 commit comments

Comments
 (0)