diff --git a/Services/dataset.service.js b/Services/dataset.service.js index 4050175..d5c51a6 100644 --- a/Services/dataset.service.js +++ b/Services/dataset.service.js @@ -6,7 +6,10 @@ const mysql = require('../Components/mysql'); const queryGenerator = require('./queryGenerator'); const cacheKeyGenerator = require('./cacheKeyGenerator'); const utils = require('../Utils'); -const { DATASET_RETURN_FIELDS } = require('../Utils/datasetFields.js'); +const { + DATASET_RETURN_FIELDS, + DATASET_SEARCH_RETURN_MAPPING +} = require('../Utils/datasetFields.js'); const FACET_FILTERS = [ 'dataset_source_repo', 'primary_disease', @@ -39,7 +42,7 @@ const search = async (searchText, filters, options) => { searchableText = utils.getSearchableText(sanitizedSearchText); } - query = queryGenerator.getSearchQueryV2(searchableText, filters, options, DATASET_RETURN_FIELDS); + query = queryGenerator.getSearchQueryV2(searchableText, filters, options, Object.keys(DATASET_SEARCH_RETURN_MAPPING)); if (query == null) { return result; @@ -71,32 +74,46 @@ const search = async (searchText, filters, options) => { } let datasets = searchResults.hits.hits.map((ds) => { - if (ds.inner_hits) { - const terms = Object.keys(ds.inner_hits); - const additionalHitsDict = {}; - if (terms.length > 0) { - terms.forEach((t) => { - ds.inner_hits[t].hits.hits.forEach((hit) => { - if (!additionalHitsDict[hit._nested.offset]) { - additionalHitsDict[hit._nested.offset] = {}; - additionalHitsDict[hit._nested.offset].source = hit._source; - additionalHitsDict[hit._nested.offset].highlight = []; - } - additionalHitsDict[hit._nested.offset].highlight = additionalHitsDict[hit._nested.offset].highlight.concat(hit.highlight['additional.attr_set.k']); - }); + // const content = ds._source; + // const highlight = ds.highlight; + + // Rename return fields and highlights according to mappings + const content = Object.keys(DATASET_SEARCH_RETURN_MAPPING).reduce((acc, key) => { + acc[DATASET_SEARCH_RETURN_MAPPING[key]] = ds._source[key]; + return acc; + }, {}); + const highlight = Object.keys(DATASET_SEARCH_RETURN_MAPPING).reduce((acc, key) => { + acc[DATASET_SEARCH_RETURN_MAPPING[key]] = ds.highlight[`${key}.search`]; + return acc; + }, {}); + + if (!ds.inner_hits) { + return {content: content, highlight: highlight}; + } + + const terms = Object.keys(ds.inner_hits); + const additionalHitsDict = {}; + if (terms.length > 0) { + terms.forEach((t) => { + ds.inner_hits[t].hits.hits.forEach((hit) => { + if (!additionalHitsDict[hit._nested.offset]) { // We currently don't use this code + additionalHitsDict[hit._nested.offset] = {}; + additionalHitsDict[hit._nested.offset].source = hit._source; + additionalHitsDict[hit._nested.offset].highlight = []; + } + additionalHitsDict[hit._nested.offset].highlight = additionalHitsDict[hit._nested.offset].highlight.concat(hit.highlight['additional.attr_set.k']); }); - } - const additionalHits = []; - for (let key in additionalHitsDict) { - const tmp = {}; - tmp.content = additionalHitsDict[key].source; - tmp.highlight = {}; - tmp.highlight['additional.attr_set.k'] = utils.consolidateHighlight(additionalHitsDict[key].highlight); - additionalHits.push(tmp); - } - return {content: ds._source, highlight: ds.highlight, additionalHits: additionalHits}; + }); + } + const additionalHits = []; + for (let key in additionalHitsDict) { // We currently don't use this code + const tmp = {}; + tmp.content = additionalHitsDict[key].source; + tmp.highlight = {}; + tmp.highlight['additional.attr_set.k'] = utils.consolidateHighlight(additionalHitsDict[key].highlight); + additionalHits.push(tmp); } - return {content: ds._source, highlight: ds.highlight}; + return {content: content, highlight: highlight, additionalHits: additionalHits}; }); result.total = searchResults.hits.total.value; result.data = datasets; diff --git a/Utils/datasetFields.js b/Utils/datasetFields.js index 8537e33..44993a6 100644 --- a/Utils/datasetFields.js +++ b/Utils/datasetFields.js @@ -1,10 +1,13 @@ +// Default sort field for dataset search const DATASET_DEFAULT_SORT_FIELD = 'dataset_title_sort'; -// Maps Dataset natural field names to property names + +// Dataset fields eligible for text search const DATASET_SEARCH_FIELDS = [ // 'dataset_uuid', 'dataset_source_repo.search', 'dataset_title.search', - 'description.search', + // 'description.search', + 'description_anchorless.search', 'experimental_approaches.search', 'dataset_source_id.search', 'dataset_source_url.search', @@ -32,7 +35,11 @@ const DATASET_SEARCH_FIELDS = [ 'related_diseases.search', 'related_terms.search', ]; + +// Fields to highlight in dataset search results const DATASET_HIGHLIGHT_FIELDS = DATASET_SEARCH_FIELDS; + +// Fields to return in dataset search results const DATASET_RETURN_FIELDS = [ // 'dataset_uuid', 'dataset_source_repo', @@ -65,6 +72,23 @@ const DATASET_RETURN_FIELDS = [ 'related_diseases', 'related_terms', ]; + +// Opensearch properties that need to be mapped to different return fields +const DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS = { + 'description_anchorless': 'description', +}; + +// Opensearch properties mapped to dataset search return fields +const DATASET_SEARCH_RETURN_MAPPING = { + ...DATASET_RETURN_FIELDS.filter(field => !Object.values(DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS).includes(field)) // Exclude some fields + .reduce((acc, str) => ({ // By default, Opensearch property has same name as return field + ...acc, + [str]: str, + }), {}), + ...DATASET_SEARCH_RETURN_MAPPING_EXCEPTIONS, // Special fields are mapped here +}; + +// Map column names to properties for dataset CSV export const datasetFields = { 'Dataset UUID': 'dataset_uuid', 'Dataset Title': 'dataset_title', @@ -98,5 +122,6 @@ module.exports = { DATASET_SEARCH_FIELDS, DATASET_HIGHLIGHT_FIELDS, DATASET_RETURN_FIELDS, + DATASET_SEARCH_RETURN_MAPPING, datasetFields, };