From f3d576e84dce0271f8d7743cc2b9ae6889e743b2 Mon Sep 17 00:00:00 2001 From: Joon Lee Date: Thu, 12 Feb 2026 00:52:39 +0000 Subject: [PATCH 1/4] Made separate list of return fields for dataset search --- Services/dataset.service.js | 7 +++++-- Utils/datasetFields.js | 10 +++++++++- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/Services/dataset.service.js b/Services/dataset.service.js index 4050175..ba877ba 100644 --- a/Services/dataset.service.js +++ b/Services/dataset.service.js @@ -6,7 +6,10 @@ const mysql = require('../Components/mysql'); const queryGenerator = require('./queryGenerator'); const cacheKeyGenerator = require('./cacheKeyGenerator'); const utils = require('../Utils'); -const { DATASET_RETURN_FIELDS } = require('../Utils/datasetFields.js'); +const { + DATASET_RETURN_FIELDS, + DATASET_SEARCH_RETURN_FIELDS +} = require('../Utils/datasetFields.js'); const FACET_FILTERS = [ 'dataset_source_repo', 'primary_disease', @@ -39,7 +42,7 @@ const search = async (searchText, filters, options) => { searchableText = utils.getSearchableText(sanitizedSearchText); } - query = queryGenerator.getSearchQueryV2(searchableText, filters, options, DATASET_RETURN_FIELDS); + query = queryGenerator.getSearchQueryV2(searchableText, filters, options, DATASET_SEARCH_RETURN_FIELDS); if (query == null) { return result; diff --git a/Utils/datasetFields.js b/Utils/datasetFields.js index 8537e33..603a6ca 100644 --- a/Utils/datasetFields.js +++ b/Utils/datasetFields.js @@ -4,7 +4,8 @@ const DATASET_SEARCH_FIELDS = [ // 'dataset_uuid', 'dataset_source_repo.search', 'dataset_title.search', - 'description.search', + // 'description.search', + 'description_anchorless.search', 'experimental_approaches.search', 'dataset_source_id.search', 'dataset_source_url.search', @@ -65,6 +66,12 @@ const DATASET_RETURN_FIELDS = [ 'related_diseases', 'related_terms', ]; +const DATASET_SEARCH_RETURN_FIELDS = [ + ...DATASET_RETURN_FIELDS.filter(str => ![ + 'description', + ].includes(str)), + 'description_anchorless' +]; const datasetFields = { 'Dataset UUID': 'dataset_uuid', 'Dataset Title': 'dataset_title', @@ -98,5 +105,6 @@ module.exports = { DATASET_SEARCH_FIELDS, DATASET_HIGHLIGHT_FIELDS, DATASET_RETURN_FIELDS, + DATASET_SEARCH_RETURN_FIELDS, datasetFields, }; From 9c400342ad3d1270ab935d279ece46ea62affbfb Mon Sep 17 00:00:00 2001 From: Joon Lee Date: Thu, 12 Feb 2026 22:07:28 +0000 Subject: [PATCH 2/4] Created mapping from Opensearch properties to dataset search return fields --- Services/dataset.service.js | 66 ++++++++++++++++++++++--------------- Utils/datasetFields.js | 29 +++++++++++----- 2 files changed, 61 insertions(+), 34 deletions(-) diff --git a/Services/dataset.service.js b/Services/dataset.service.js index ba877ba..d5c51a6 100644 --- a/Services/dataset.service.js +++ b/Services/dataset.service.js @@ -8,7 +8,7 @@ const cacheKeyGenerator = require('./cacheKeyGenerator'); const utils = require('../Utils'); const { DATASET_RETURN_FIELDS, - DATASET_SEARCH_RETURN_FIELDS + DATASET_SEARCH_RETURN_MAPPING } = require('../Utils/datasetFields.js'); const FACET_FILTERS = [ 'dataset_source_repo', @@ -42,7 +42,7 @@ const search = async (searchText, filters, options) => { searchableText = utils.getSearchableText(sanitizedSearchText); } - query = queryGenerator.getSearchQueryV2(searchableText, filters, options, DATASET_SEARCH_RETURN_FIELDS); + query = queryGenerator.getSearchQueryV2(searchableText, filters, options, Object.keys(DATASET_SEARCH_RETURN_MAPPING)); if (query == null) { return result; @@ -74,32 +74,46 @@ const search = async (searchText, filters, options) => { } let datasets = searchResults.hits.hits.map((ds) => { - if (ds.inner_hits) { - const terms = Object.keys(ds.inner_hits); - const additionalHitsDict = {}; - if (terms.length > 0) { - terms.forEach((t) => { - ds.inner_hits[t].hits.hits.forEach((hit) => { - if (!additionalHitsDict[hit._nested.offset]) { - additionalHitsDict[hit._nested.offset] = {}; - additionalHitsDict[hit._nested.offset].source = hit._source; - additionalHitsDict[hit._nested.offset].highlight = []; - } - additionalHitsDict[hit._nested.offset].highlight = additionalHitsDict[hit._nested.offset].highlight.concat(hit.highlight['additional.attr_set.k']); - }); + // const content = ds._source; + // const highlight = ds.highlight; + + // Rename return fields and highlights according to mappings + const content = Object.keys(DATASET_SEARCH_RETURN_MAPPING).reduce((acc, key) => { + acc[DATASET_SEARCH_RETURN_MAPPING[key]] = ds._source[key]; + return acc; + }, {}); + const highlight = Object.keys(DATASET_SEARCH_RETURN_MAPPING).reduce((acc, key) => { + acc[DATASET_SEARCH_RETURN_MAPPING[key]] = ds.highlight[`${key}.search`]; + return acc; + }, {}); + + if (!ds.inner_hits) { + return {content: content, highlight: highlight}; + } + + const terms = Object.keys(ds.inner_hits); + const additionalHitsDict = {}; + if (terms.length > 0) { + terms.forEach((t) => { + ds.inner_hits[t].hits.hits.forEach((hit) => { + if (!additionalHitsDict[hit._nested.offset]) { // We currently don't use this code + additionalHitsDict[hit._nested.offset] = {}; + additionalHitsDict[hit._nested.offset].source = hit._source; + additionalHitsDict[hit._nested.offset].highlight = []; + } + additionalHitsDict[hit._nested.offset].highlight = additionalHitsDict[hit._nested.offset].highlight.concat(hit.highlight['additional.attr_set.k']); }); - } - const additionalHits = []; - for (let key in additionalHitsDict) { - const tmp = {}; - tmp.content = additionalHitsDict[key].source; - tmp.highlight = {}; - tmp.highlight['additional.attr_set.k'] = utils.consolidateHighlight(additionalHitsDict[key].highlight); - additionalHits.push(tmp); - } - return {content: ds._source, highlight: ds.highlight, additionalHits: additionalHits}; + }); + } + const additionalHits = []; + for (let key in additionalHitsDict) { // We currently don't use this code + const tmp = {}; + tmp.content = additionalHitsDict[key].source; + tmp.highlight = {}; + tmp.highlight['additional.attr_set.k'] = utils.consolidateHighlight(additionalHitsDict[key].highlight); + additionalHits.push(tmp); } - return {content: ds._source, highlight: ds.highlight}; + return {content: content, highlight: highlight, additionalHits: additionalHits}; }); result.total = searchResults.hits.total.value; result.data = datasets; diff --git a/Utils/datasetFields.js b/Utils/datasetFields.js index 603a6ca..1241ffc 100644 --- a/Utils/datasetFields.js +++ b/Utils/datasetFields.js @@ -1,5 +1,7 @@ +// Default sort field for dataset search const DATASET_DEFAULT_SORT_FIELD = 'dataset_title_sort'; -// Maps Dataset natural field names to property names + +// Dataset fields eligible for text search const DATASET_SEARCH_FIELDS = [ // 'dataset_uuid', 'dataset_source_repo.search', @@ -33,7 +35,11 @@ const DATASET_SEARCH_FIELDS = [ 'related_diseases.search', 'related_terms.search', ]; + +// Fields to highlight in dataset search results const DATASET_HIGHLIGHT_FIELDS = DATASET_SEARCH_FIELDS; + +// Fields to return in dataset search results const DATASET_RETURN_FIELDS = [ // 'dataset_uuid', 'dataset_source_repo', @@ -66,12 +72,19 @@ const DATASET_RETURN_FIELDS = [ 'related_diseases', 'related_terms', ]; -const DATASET_SEARCH_RETURN_FIELDS = [ - ...DATASET_RETURN_FIELDS.filter(str => ![ - 'description', - ].includes(str)), - 'description_anchorless' -]; + +// Opensearch properties mapped to dataset search return fields +const DATASET_SEARCH_RETURN_MAPPING = { + ...DATASET_RETURN_FIELDS.filter(field => ![ // Exclude some fields + 'description' + ].includes(field)).reduce((acc, str) => ({ // By default, Opensearch property has same name as return field + ...acc, + [str]: str, + }), {}), + 'description_anchorless': 'description', // Special fields are mapped here +}; + +// Map column names to properties for dataset CSV export const datasetFields = { 'Dataset UUID': 'dataset_uuid', 'Dataset Title': 'dataset_title', @@ -105,6 +118,6 @@ module.exports = { DATASET_SEARCH_FIELDS, DATASET_HIGHLIGHT_FIELDS, DATASET_RETURN_FIELDS, - DATASET_SEARCH_RETURN_FIELDS, + DATASET_SEARCH_RETURN_MAPPING, datasetFields, }; From 7a94f47c09acbe0d0f16a3d2387a0d3ea59ad203 Mon Sep 17 00:00:00 2001 From: Joon Lee Date: Fri, 13 Feb 2026 12:57:30 -0500 Subject: [PATCH 3/4] Isolated dataset search return mappings --- Utils/datasetFields.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Utils/datasetFields.js b/Utils/datasetFields.js index 1241ffc..f97a6d9 100644 --- a/Utils/datasetFields.js +++ b/Utils/datasetFields.js @@ -74,14 +74,16 @@ const DATASET_RETURN_FIELDS = [ ]; // Opensearch properties mapped to dataset search return fields +const DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS = { + 'description_anchorless': 'description', +} const DATASET_SEARCH_RETURN_MAPPING = { - ...DATASET_RETURN_FIELDS.filter(field => ![ // Exclude some fields - 'description' - ].includes(field)).reduce((acc, str) => ({ // By default, Opensearch property has same name as return field + ...DATASET_RETURN_FIELDS.filter(field => !Object.values(DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS).includes(field)) // Exclude some fields + .reduce((acc, str) => ({ // By default, Opensearch property has same name as return field ...acc, [str]: str, }), {}), - 'description_anchorless': 'description', // Special fields are mapped here + ...DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS, // Special fields are mapped here }; // Map column names to properties for dataset CSV export From 39c3e9f01261ebe99d886fca4a14b672c76eca95 Mon Sep 17 00:00:00 2001 From: Joon Lee Date: Fri, 13 Feb 2026 12:58:23 -0500 Subject: [PATCH 4/4] Cleanup --- Utils/datasetFields.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Utils/datasetFields.js b/Utils/datasetFields.js index f97a6d9..44993a6 100644 --- a/Utils/datasetFields.js +++ b/Utils/datasetFields.js @@ -73,10 +73,12 @@ const DATASET_RETURN_FIELDS = [ 'related_terms', ]; -// Opensearch properties mapped to dataset search return fields +// Opensearch properties that need to be mapped to different return fields const DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS = { 'description_anchorless': 'description', -} +}; + +// Opensearch properties mapped to dataset search return fields const DATASET_SEARCH_RETURN_MAPPING = { ...DATASET_RETURN_FIELDS.filter(field => !Object.values(DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS).includes(field)) // Exclude some fields .reduce((acc, str) => ({ // By default, Opensearch property has same name as return field