From 2b176d5d106a3d312268ccf9939b92ba4bfc5b09 Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Mon, 29 Dec 2025 18:58:30 +0000 Subject: [PATCH] feat: Resolve exact repo matches in search parser This change allows the search parser to resolve exact repository names, including display names, when they are anchored with '^' and '$'. This improves search accuracy by directly mapping literal repository identifiers to their corresponding entries in the database. If the pattern is not an exact literal, it falls back to regex handling. Co-authored-by: michael --- .../web/src/features/search/parser.test.ts | 57 ++++++++++++++ packages/web/src/features/search/parser.ts | 76 +++++++++++++++++++ 2 files changed, 133 insertions(+) create mode 100644 packages/web/src/features/search/parser.test.ts diff --git a/packages/web/src/features/search/parser.test.ts b/packages/web/src/features/search/parser.test.ts new file mode 100644 index 00000000..1798a463 --- /dev/null +++ b/packages/web/src/features/search/parser.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it, vi } from 'vitest'; +import type { PrismaClient } from '@sourcebot/db'; +import { parseQuerySyntaxIntoIR } from './parser'; + +describe('parseQuerySyntaxIntoIR', () => { + it('resolves anchored repo display names to repo_set queries', async () => { + const findMany = vi.fn().mockResolvedValue([ + { name: 'gerrit.example.com:29418/zximgw/rcsiap2001' }, + ]); + + const prisma = { + repo: { + findMany, + }, + } as unknown as PrismaClient; + + const query = await parseQuerySyntaxIntoIR({ + query: 'repo:"^zximgw/rcsiap2001$"', + options: {}, + prisma, + }); + + expect(findMany).toHaveBeenCalledWith({ + where: { + orgId: expect.any(Number), + OR: [ + { name: 'zximgw/rcsiap2001' }, + { displayName: 'zximgw/rcsiap2001' }, + ], + }, + select: { name: true }, + }); + + expect(query.repo_set).toBeDefined(); + expect(query.repo_set?.set).toEqual({ + 'gerrit.example.com:29418/zximgw/rcsiap2001': true, + }); + }); + + it('falls back to regex handling when pattern is not a literal string', async () => { + const findMany = vi.fn(); + const prisma = { + repo: { + findMany, + }, + } as unknown as PrismaClient; + + const query = await parseQuerySyntaxIntoIR({ + query: 'repo:^gerrit.*$', + options: {}, + prisma, + }); + + expect(findMany).not.toHaveBeenCalled(); + expect(query.repo?.regexp).toEqual('^gerrit.*$'); + }); +}); diff --git a/packages/web/src/features/search/parser.ts b/packages/web/src/features/search/parser.ts index e3e9d41a..c2bf5715 100644 --- a/packages/web/src/features/search/parser.ts +++ b/packages/web/src/features/search/parser.ts @@ -28,6 +28,7 @@ import { SINGLE_TENANT_ORG_ID } from '@/lib/constants'; import { ServiceErrorException } from '@/lib/serviceError'; import { StatusCodes } from 'http-status-codes'; import { ErrorCode } from '@/lib/errorCodes'; +import escapeStringRegexp from 'escape-string-regexp'; // Configure the parser to throw errors when encountering invalid syntax. const parser = _parser.configure({ @@ -95,6 +96,26 @@ export const parseQuerySyntaxIntoIR = async ({ return context.repos.map((repo) => repo.name); }, + onResolveRepoExactMatch: async (literalRepoName: string) => { + const repos = await prisma.repo.findMany({ + where: { + orgId: SINGLE_TENANT_ORG_ID, + OR: [ + { name: literalRepoName }, + { displayName: literalRepoName }, + ], + }, + select: { + name: true, + } + }); + + if (repos.length === 0) { + return undefined; + } + + return repos.map((repo) => repo.name); + }, }); } catch (error) { if (error instanceof SyntaxError) { @@ -117,12 +138,14 @@ const transformTreeToIR = async ({ isCaseSensitivityEnabled, isRegexEnabled, onExpandSearchContext, + onResolveRepoExactMatch, }: { tree: Tree; input: string; isCaseSensitivityEnabled: boolean; isRegexEnabled: boolean; onExpandSearchContext: (contextName: string) => Promise; + onResolveRepoExactMatch?: (literalRepoName: string) => Promise; }): Promise => { const transformNode = async (node: SyntaxNode): Promise => { switch (node.type.id) { @@ -239,6 +262,16 @@ const transformTreeToIR = async ({ }; case RepoExpr: + if (onResolveRepoExactMatch) { + const repoSet = await resolveRepoLiteralIfPossible({ + value, + onResolveRepoExactMatch, + }); + if (repoSet) { + return repoSet; + } + } + return { repo: { regexp: value @@ -409,3 +442,46 @@ const getChildren = (node: SyntaxNode): SyntaxNode[] => { } return children; } + +const resolveRepoLiteralIfPossible = async ({ + value, + onResolveRepoExactMatch, +}: { + value: string; + onResolveRepoExactMatch: (literalRepoName: string) => Promise; +}): Promise => { + const literalMatch = value.match(/^\^(.*)\$/); + if (!literalMatch) { + return undefined; + } + + const innerPattern = literalMatch[1]; + const unescaped = unescapeRegexLiteral(innerPattern); + + if (escapeStringRegexp(unescaped) !== innerPattern) { + return undefined; + } + + const repoNames = await onResolveRepoExactMatch(unescaped); + if (!repoNames || repoNames.length === 0) { + return undefined; + } + + return { + repo_set: { + set: repoNames.reduce((acc, name) => { + acc[name.trim()] = true; + return acc; + }, {} as Record) + }, + query: "repo_set" + }; +} + +const unescapeRegexLiteral = (pattern: string) => { + const hexUnescaped = pattern.replace(/\\x([0-9a-fA-F]{2})/g, (_match, hex) => { + return String.fromCharCode(parseInt(hex, 16)); + }); + + return hexUnescaped.replace(/\\([\\.^$|?*+()[\]{}])/g, (_match, char) => char); +}