diff options
Diffstat (limited to 'Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services')
4 files changed, 591 insertions, 0 deletions
diff --git a/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KqlGuard.cs b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KqlGuard.cs new file mode 100644 index 000000000..6ce4e44c9 --- /dev/null +++ b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KqlGuard.cs @@ -0,0 +1,45 @@ +using System.Text.RegularExpressions; + +namespace ChatADX.Web.Services +{ + public sealed class KqlGuard + { + private static readonly string[] Banned = new[] { + "externaldata", "evaluate", "cluster(", "database(", "ingest", "print", "datatable", "delete", "drop", "truncate", "update", "set", "declare", "let", "materializedview", "mv-merge", "alter", "create", "append", "ingestiontime()", ".show", ".set", ".clear", ".drop", ".alter" + }; + + public KqlValidationResult Validate(string kql) + { + var text = kql.ToLowerInvariant(); + + foreach (var token in Banned) + if (text.Contains(token)) + return KqlValidationResult.Fail($"Query uses banned token: {token}"); + + // Ensure only allowed tables are referenced (quick heuristic) + //var tableNames = new HashSet<string>(allowTables.Select(t => t.ToLowerInvariant())); + //var tableRefs = Regex.Matches(text, @"\b([A-Za-z_][A-Za-z0-9_]*)\bTable").Cast<Match>().Select(m => m.Value.ToLowerInvariant().Replace("table", "")); + + //foreach (var tr in tableRefs) + // if (!tableNames.Contains(tr)) + // return KqlValidationResult.Fail($"Query references non-allowlisted table: {tr}"); + + // Encourage summarize/top/take to avoid huge result sets + //if (!(text.Contains("summarize") || text.Contains("| take ") || text.Contains("| top "))) + // return KqlValidationResult.Fail("Query must include summarize/top/take to limit results."); + + return KqlValidationResult.Ok(); + } + } + + public readonly struct KqlValidationResult + { + public bool IsOk { get; } + public string? Error { get; } + + private KqlValidationResult(bool ok, string? error) { IsOk = ok; Error = error; } + + public static KqlValidationResult Ok() => new(true, null); + public static KqlValidationResult Fail(string error) => new(false, error); + } +} diff --git a/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KustoQueryService.cs b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KustoQueryService.cs new file mode 100644 index 000000000..ecb89fcd6 --- /dev/null +++ b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/KustoQueryService.cs @@ -0,0 +1,54 @@ +using Azure.Core; +using Azure.Identity; +using Kusto.Data; +using Kusto.Data.Common; +using Kusto.Data.Net.Client; +using Microsoft.Extensions.Options; +using System.Data; + +namespace ChatADX.Web.Services +{ + public sealed class KustoQueryService + { + private readonly ICslQueryProvider _query; + private readonly string _database; + + public KustoQueryService(IOptions<AdxOptions> opts) + { + var options = opts.Value; + _database = options.Database; + + // Use DefaultAzureCredential: works locally (Azure CLI / Visual Studio), and in Azure (Managed Identity) + var cred = new ClientSecretCredential( + opts.Value.TenantId, + opts.Value.ClientId, + opts.Value.ClientSecret); + + var kcsb = new KustoConnectionStringBuilder(options.ClusterUri) + .WithAadAzureTokenCredentialsAuthentication(cred); + + _query = KustoClientFactory.CreateCslQueryProvider(kcsb); + } + + public async Task<DataTable> QueryAsync(string kql, IDictionary<string, string> parameters, CancellationToken ct = default) + { + var props = new ClientRequestProperties + { + ClientRequestId = $"chat_{Guid.NewGuid()}" + }; + + foreach (var kvp in parameters) + { + // Pass all as strings; let KQL cast via e.g., datetime({from}) if declared + props.SetParameter(kvp.Key, kvp.Value); + } + + props.SetOption("servertimeout", "00:00:12"); + + using var reader = await _query.ExecuteQueryAsync(_database, kql, props); + var table = new DataTable(); + table.Load(reader); + return table; + } + } +} diff --git a/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/LlmClient.cs b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/LlmClient.cs new file mode 100644 index 000000000..fb2226fb8 --- /dev/null +++ b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/LlmClient.cs @@ -0,0 +1,460 @@ +using System.Net.Http.Headers; +using System.Text; +using System.Text.Json; +using System.Text.Json.Nodes; +using Microsoft.Extensions.Options; +using ChatADX.Web.Models; +using System.Text.RegularExpressions; + +namespace ChatADX.Web.Services +{ + public sealed class LlmClient + { + private readonly HttpClient _http; + private readonly LlmOptions _opt; + + public sealed class AssistantRunResult + { + public string Answer { get; set; } = ""; + public string ThreadId { get; set; } = ""; + } + + public enum AssistantType + { + Data, + Docs + } + + public LlmClient(HttpClient http, IOptions<LlmOptions> opt) + { + _http = http; + _opt = opt.Value; + } + + public async Task<ProposeKqlResult> ProposeKqlAsync(string question, string schemaJson, CancellationToken ct = default) + { + var system = string.Join("\n", new[] { + "You are a Kusto (KQL) assistant for Azure Data Explorer.", + "Use ONLY the tables/columns provided in the SCHEMA JSON that follows.", + "ALWAYS try to query for the least amount of data neccessary to answer the question.", + "Return a JSON object with fields: assistant, kql, parameters, parameterTypes (optional), assumptions, why.", + "When asked to query by months ago, convert number of months to days (e.g last to months = StartTime >= ago(60d))", + "When joining tables, this example for correct syntax: EventsTable | join kind=inner (EventTypesTable) on $left.EventTypeGuid == $right.GUID.", + "When querying MachinesTable you can fetch the machine's Organization and Site by joining the latest record from JobRunsTable by SerialNumber.", + "Output raw JSON ONLY (no code fences).", + @"Classify the user's question into exactly one of: + - ""data"": requires querying telemetry via KQL (numbers, trends, counts, rates, top-N, timelines). + - ""docs"": architectural/how-to/design/definitions/“what is/how do we” that do not require live data. place data or docs in the assistant field you return.", + "If the question is classified as 'docs', set kql to an empty string and parameters to an empty object.", + + + }); + + var schemaBlock = $"SCHEMA:\n{schemaJson}"; + var user = $"Question: {question}\n\n{schemaBlock}"; + + var payload = new + { + model = _opt.Model, + temperature = _opt.Temperature, + response_format = new { type = "json_object" }, + messages = new object[] { + new { role = "system", content = system }, + new { role = "user", content = user } + } + }; + + using var req = new HttpRequestMessage(HttpMethod.Post, _opt.Endpoint); + var json = JsonSerializer.Serialize(payload); + req.Content = new StringContent(json, Encoding.UTF8, "application/json"); + + if (_opt.IsAzure) req.Headers.Add("api-key", _opt.ApiKey); + else req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _opt.ApiKey); + + using var resp = await _http.SendAsync(req, ct); + resp.EnsureSuccessStatusCode(); + var body = await resp.Content.ReadAsStringAsync(ct); + + var root = JsonNode.Parse(body)!.AsObject(); + var content = root["choices"]![0]!["message"]!["content"]?.ToString() ?? "{}"; + content = StripCodeFences(content); + + var opts = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; + opts.Converters.Add(new FlexibleStringListConverter()); + try + { + var result = JsonSerializer.Deserialize<ProposeKqlResult>(content, opts); + if (result != null) return result; + } + catch (JsonException) + { + // fall back + } + + // Lenient mapping + var node = JsonNode.Parse(content) as JsonObject ?? new JsonObject(); + var kql = node["kql"]?.ToString() ?? string.Empty; + + var parameters = new Dictionary<string, string>(); + if (node["parameters"] is JsonObject pObj) + { + foreach (var kv in pObj) + parameters[kv.Key] = kv.Value?.ToString() ?? string.Empty; + } + + List<string>? assumptions = null; + if (node.TryGetPropertyValue("assumptions", out var aNode) && aNode is not null) + { + assumptions = new List<string>(); + if (aNode is JsonArray arr) + { + foreach (var el in arr) assumptions.Add(el?.ToString() ?? string.Empty); + } + else + { + assumptions.Add(aNode.ToString()); + } + } + + return new ProposeKqlResult + { + Kql = kql, + Parameters = parameters, + Assumptions = assumptions, + Why = node["why"]?.ToString() + }; + } + + public async Task<ProposeKqlResult> ProposeKqlAsync( + string question, string schemaJson, IEnumerable<ChatMessage>? history, CancellationToken ct = default) + { + var system = string.Join("\n", new[] { + "You are a Kusto (KQL) assistant for Azure Data Explorer.", + "Use ONLY the tables/columns provided in the SCHEMA JSON that follows.", + "ALWAYS follow upper case column names when specified by the schema.", + "ALWAYS try to query for the least amount of data neccessary to answer the question.", + "Return a JSON object with fields: assistant, kql, parameters, parameterTypes (optional), assumptions, why.", + "When asked to query by months ago, convert number of months to days (e.g last to months = StartTime >= ago(60d))", + "When joining tables, this example for correct syntax: EventsTable | join kind=inner (EventTypesTable) on $left.EventTypeGuid == $right.GUID.", + "If you are joining and want to project two columns with the same name append '1' to the end of the second table name. (e.g: SitesTable | join kind=inner (OrganizationsTable) on $left.ORGANIZATION_GUID == $right.GUID | project Site = NAME, Organization = NAME1).", + "When joining tables, make sure to also join by environment (e.g: SitesTable | join kind=inner (OrganizationsTable) on $left.ORGANIZATION_GUID == $right.GUID and $left.ENVIRONMENT == $right.ENVIRONMENT)).", + "Output raw JSON ONLY (no code fences).", + // 👇 tiny but important instruction for routing: + @"Use conversational context from prior turns. If the current question refers to a previous result (e.g., 'that', 'those', 'the same one'), + keep the same 'assistant' classification as the previous turn unless the user clearly asks for documentation.", + @"Classify into exactly one of: + - ""data"": requires querying telemetry via KQL (numbers, trends, counts, rates, top-N, timelines). + - ""docs"": architectural/how-to/design/definitions not requiring live data." + }); + + var messages = new List<object> { new { role = "system", content = system } }; + + if (history != null) + { + foreach (var m in history.TakeLast(6)) + messages.Add(new { role = m.Role, content = m.Content }); + } + + var schemaBlock = $"SCHEMA:\n{schemaJson}"; + messages.Add(new { role = "user", content = $"Question: {question}\n\n{schemaBlock}" }); + + var payload = new + { + model = _opt.Model, + temperature = _opt.Temperature, + response_format = new { type = "json_object" }, + messages = messages + }; + + using var req = new HttpRequestMessage(HttpMethod.Post, _opt.Endpoint); + req.Content = new StringContent(JsonSerializer.Serialize(payload), Encoding.UTF8, "application/json"); + if (_opt.IsAzure) req.Headers.Add("api-key", _opt.ApiKey); + else req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _opt.ApiKey); + + using var resp = await _http.SendAsync(req, ct); + resp.EnsureSuccessStatusCode(); + var body = await resp.Content.ReadAsStringAsync(ct); + + var root = JsonNode.Parse(body)!.AsObject(); + var content = root["choices"]![0]!["message"]!["content"]?.ToString() ?? "{}"; + content = StripCodeFences(content); // your existing helper + + var opts = new JsonSerializerOptions { PropertyNameCaseInsensitive = true }; + opts.Converters.Add(new FlexibleStringListConverter()); + var result = JsonSerializer.Deserialize<ProposeKqlResult>(content, opts) + ?? new ProposeKqlResult { Kql = "", Parameters = new() }; + return result; + } + + private static string StripCodeFences(string s) + { + if (string.IsNullOrWhiteSpace(s)) return s ?? string.Empty; + var t = s.Trim(); + + if (t.StartsWith("```")) + { + // Remove first line (the ```json or ``` block) + var firstNl = t.IndexOf('\n'); + if (firstNl >= 0 && firstNl + 1 < t.Length) + { + var inner = t.Substring(firstNl + 1); + + // Remove trailing fence + var fence = inner.LastIndexOf("```", StringComparison.Ordinal); + if (fence >= 0) + inner = inner.Substring(0, fence); + + return inner.Trim(); + } + } + return t; + } + + + public async Task<string> AnswerFromFactsAsync(string question, string factsJson, string kqlForDisplay, CancellationToken ct = default) + { + var system = string.Join("\n", new[] { + "You are a precise analyst.", + "Answer ONLY from the provided ADX facts. If insufficient, say so.", + "Be explicit about the time range and columns used.", + "Ink quantities are stored as nanoliters. Make them humanly readable by converting them to milliliters or liters depending on what makes more sense." + }); + + var user = $"Question: {question}\n\nFacts(JSON):\n{factsJson}\n\nKQL used:\n{kqlForDisplay}"; + + var payload = new + { + model = _opt.Model, + temperature = 0.2, + messages = new object[] { + new { role = "system", content = system }, + new { role = "user", content = user } + } + }; + + using var req = new HttpRequestMessage(HttpMethod.Post, _opt.Endpoint); + var json = JsonSerializer.Serialize(payload); + req.Content = new StringContent(json, Encoding.UTF8, "application/json"); + + if (_opt.IsAzure) req.Headers.Add("api-key", _opt.ApiKey); + else req.Headers.Authorization = new AuthenticationHeaderValue("Bearer", _opt.ApiKey); + + using var resp = await _http.SendAsync(req, ct); + resp.EnsureSuccessStatusCode(); + var body = await resp.Content.ReadAsStringAsync(ct); + + var root = JsonNode.Parse(body)!.AsObject(); + var content = root["choices"]![0]!["message"]!["content"]!.ToString(); + return content; + } + + // Add once in your class + private void AddOpenAIHeaders(HttpRequestMessage req) + { + // MUST be a standard OpenAI key (sk-...), not an Azure key + req.Headers.Authorization = + new System.Net.Http.Headers.AuthenticationHeaderValue("Bearer", _opt.ApiKey); + + // REQUIRED for Assistants v2 + req.Headers.Add("OpenAI-Beta", "assistants=v2"); + + // If your org enforces projects or you want to scope, uncomment: + // req.Headers.Add("OpenAI-Organization", "<org_xxx>"); + // req.Headers.Add("OpenAI-Project", "<proj_xxx>"); + } + + private static async Task<string> ReadBodyOrThrowAsync(HttpResponseMessage res, CancellationToken ct) + { + var body = await res.Content.ReadAsStringAsync(ct); + if (!res.IsSuccessStatusCode) + throw new HttpRequestException($"{(int)res.StatusCode} {res.ReasonPhrase}: {body}"); + return body; + } + + + public async Task<string> AnswerWithAssistantAsync( + AssistantType assistant, + string question, + string factsJson, + string kql, + CancellationToken ct = default) + { + // 1) Create a thread (empty is fine) + using var tReq = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/threads") + { + Content = new StringContent("{}", System.Text.Encoding.UTF8, "application/json") + }; + AddOpenAIHeaders(tReq); + var tBody = await ReadBodyOrThrowAsync(await _http.SendAsync(tReq, ct), ct); + var threadId = System.Text.Json.JsonDocument.Parse(tBody).RootElement.GetProperty("id").GetString(); + + // 2) Add a single user message with the three sections + + String messageText = String.Empty; + + if (assistant == AssistantType.Data) + { + messageText = $"Question:\n{question}\n\nFacts(JSON):\n{factsJson}\n\nKQL used:\n{kql}"; + } + else + { + messageText = $"Question:\n{question}"; + } + + using var mReq = new HttpRequestMessage(HttpMethod.Post, $"https://api.openai.com/v1/threads/{threadId}/messages") + { + // v2 expects "content" to be an array of content parts + Content = JsonContent.Create(new + { + role = "user", + content = new object[] { new { type = "text", text = messageText } } + }) + }; + AddOpenAIHeaders(mReq); + await ReadBodyOrThrowAsync(await _http.SendAsync(mReq, ct), ct); + + // 3) Create a run targeting your Assistant (must have File Search enabled & schema.json attached) + using var rReq = new HttpRequestMessage(HttpMethod.Post, $"https://api.openai.com/v1/threads/{threadId}/runs") + { + Content = JsonContent.Create(new + { + assistant_id = assistant == AssistantType.Data ? _opt.AnswererAssistantId : _opt.DocsAssistantId, + // You can override instructions here if you ever need to: + // instructions = "..." + }) + }; + AddOpenAIHeaders(rReq); + var rBody = await ReadBodyOrThrowAsync(await _http.SendAsync(rReq, ct), ct); + var runId = System.Text.Json.JsonDocument.Parse(rBody).RootElement.GetProperty("id").GetString(); + + // 4) Poll run until completed + while (true) + { + await Task.Delay(600, ct); + using var gReq = new HttpRequestMessage(HttpMethod.Get, $"https://api.openai.com/v1/threads/{threadId}/runs/{runId}"); + AddOpenAIHeaders(gReq); + var gBody = await ReadBodyOrThrowAsync(await _http.SendAsync(gReq, ct), ct); + var root = System.Text.Json.JsonDocument.Parse(gBody).RootElement; + var status = root.GetProperty("status").GetString(); + if (status == "completed") break; + if (status == "failed" || status == "cancelled" || status == "expired") + { + var lastError = root.TryGetProperty("last_error", out var le) ? le.ToString() : "unknown"; + throw new Exception($"Assistant run {status}: {lastError}"); + } + } + + // 5) Fetch messages and return the latest assistant text + using var lReq = new HttpRequestMessage(HttpMethod.Get, $"https://api.openai.com/v1/threads/{threadId}/messages"); + AddOpenAIHeaders(lReq); + var lBody = await ReadBodyOrThrowAsync(await _http.SendAsync(lReq, ct), ct); + using var doc = System.Text.Json.JsonDocument.Parse(lBody); + + // Messages are returned most-recent-first; take the first assistant message + foreach (var msg in doc.RootElement.GetProperty("data").EnumerateArray()) + { + var role = msg.GetProperty("role").GetString(); + if (role == "assistant") + { + foreach (var part in msg.GetProperty("content").EnumerateArray()) + { + if (part.GetProperty("type").GetString() == "text") + return part.GetProperty("text").GetProperty("value").GetString() ?? ""; + } + } + } + + return "(no assistant message found)"; + } + + public async Task<AssistantRunResult> AnswerWithAssistantAsync( + AssistantType assistant, + string question, + string factsJson, + string kql, + string? threadId, + CancellationToken ct = default) + { + // 1) Use existing thread or create a new one + if (string.IsNullOrEmpty(threadId)) + { + using var tReq = new HttpRequestMessage(HttpMethod.Post, "https://api.openai.com/v1/threads") + { Content = new StringContent("{}", Encoding.UTF8, "application/json") }; + AddOpenAIHeaders(tReq); + var tBody = await ReadBodyOrThrowAsync(await _http.SendAsync(tReq, ct), ct); + threadId = System.Text.Json.JsonDocument.Parse(tBody).RootElement.GetProperty("id").GetString(); + } + + // 2) Add the user message (same text you build today) + var messageText = assistant == AssistantType.Data + ? $"Question:\n{question}\n\nFacts(JSON):\n{factsJson}\n\nKQL used:\n{kql}" + : $"Question:\n{question}"; + + using (var mReq = new HttpRequestMessage(HttpMethod.Post, $"https://api.openai.com/v1/threads/{threadId}/messages") + { Content = JsonContent.Create(new { role = "user", content = new object[] { new { type = "text", text = messageText } } }) }) + { + AddOpenAIHeaders(mReq); + await ReadBodyOrThrowAsync(await _http.SendAsync(mReq, ct), ct); + } + + // 3) Run with the correct assistant id (unchanged logic) + using (var rReq = new HttpRequestMessage(HttpMethod.Post, $"https://api.openai.com/v1/threads/{threadId}/runs") + { Content = JsonContent.Create(new { assistant_id = assistant == AssistantType.Data ? _opt.AnswererAssistantId : _opt.DocsAssistantId }) }) + { + AddOpenAIHeaders(rReq); + var rBody = await ReadBodyOrThrowAsync(await _http.SendAsync(rReq, ct), ct); + var runId = System.Text.Json.JsonDocument.Parse(rBody).RootElement.GetProperty("id").GetString(); + + // Poll until completed (same as your existing loop) + while (true) + { + await Task.Delay(600, ct); + using var gReq = new HttpRequestMessage(HttpMethod.Get, $"https://api.openai.com/v1/threads/{threadId}/runs/{runId}"); + AddOpenAIHeaders(gReq); + var gBody = await ReadBodyOrThrowAsync(await _http.SendAsync(gReq, ct), ct); + var root = System.Text.Json.JsonDocument.Parse(gBody).RootElement; + var status = root.GetProperty("status").GetString(); + if (status == "completed") break; + if (status == "failed" || status == "cancelled" || status == "expired") + { + var lastError = root.TryGetProperty("last_error", out var le) ? le.ToString() : "unknown"; + throw new Exception($"Assistant run {status}: {lastError}"); + } + } + } + + // 4) Fetch the latest assistant text and return it with the thread id + using var lReq = new HttpRequestMessage(HttpMethod.Get, $"https://api.openai.com/v1/threads/{threadId}/messages"); + AddOpenAIHeaders(lReq); + var lBody = await ReadBodyOrThrowAsync(await _http.SendAsync(lReq, ct), ct); + using var doc = System.Text.Json.JsonDocument.Parse(lBody); + foreach (var msg in doc.RootElement.GetProperty("data").EnumerateArray()) + { + if (msg.GetProperty("role").GetString() == "assistant") + { + foreach (var part in msg.GetProperty("content").EnumerateArray()) + if (part.GetProperty("type").GetString() == "text") + { + var raw = part.GetProperty("text").GetProperty("value").GetString() ?? ""; + var cleaned = assistant == AssistantType.Docs ? StripCitations(raw) : raw; + return new AssistantRunResult + { + Answer = cleaned, + ThreadId = threadId! + }; + } + } + } + return new AssistantRunResult { Answer = "(no assistant message found)", ThreadId = threadId! }; + } + + private static string StripCitations(string s) + { + if (string.IsNullOrWhiteSpace(s)) return s ?? string.Empty; + // Remove any inline citation markers like: or + return Regex.Replace(s, @"\s*【[^】]*】", string.Empty); + } + + + } +} diff --git a/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/SchemaRegistry.cs b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/SchemaRegistry.cs new file mode 100644 index 000000000..da6868bc5 --- /dev/null +++ b/Software/Visual_Studio_22/Tango.Portal.Chat.Web/Services/SchemaRegistry.cs @@ -0,0 +1,32 @@ +using System.Text.Json; + +namespace ChatADX.Web.Services +{ + public sealed class SchemaRegistry + { + private readonly IWebHostEnvironment _env; + private readonly ILogger<SchemaRegistry> _log; + private string? _cached; + + public SchemaRegistry(IWebHostEnvironment env, ILogger<SchemaRegistry> log) + { + _env = env; _log = log; + } + + public string GetSchemaJson() + { + if (!string.IsNullOrEmpty(_cached)) return _cached!; + var path = Path.Combine(_env.ContentRootPath, "Data", "schema.json"); + if (!File.Exists(path)) + { + _log.LogWarning("Schema file not found at {Path}. Returning empty schema.", path); + _cached = "{\"tables\":{}}"; + return _cached!; + } + _cached = File.ReadAllText(path); + // Basic sanity check + JsonDocument.Parse(_cached); + return _cached!; + } + } +} |
