From c69e36d379041af1e9d19e4ba18f6698db97bc54 Mon Sep 17 00:00:00 2001 From: Petrica Nanca Date: Wed, 26 Nov 2025 15:52:05 +0200 Subject: [PATCH 1/3] Add support to also use keywords from application/ld+json --- src/fpd/enrichment.ts | 39 ++++++++++++++++++++++++++++++-- test/spec/fpd/enrichment_spec.js | 28 +++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/src/fpd/enrichment.ts b/src/fpd/enrichment.ts index b8102caa6ac..069fd0ae9b5 100644 --- a/src/fpd/enrichment.ts +++ b/src/fpd/enrichment.ts @@ -160,12 +160,47 @@ const ENRICHMENTS = { } }; +/** + * Detect keywords also from json/ld if this is present + */ +const detectJsonLdKeywords = () => { + const scriptTags = Array.from(document.querySelectorAll('script[type="application/ld+json"]')); + + let keywords = []; + + for (const scriptTag of scriptTags) { + try { + const jsonData = JSON.parse(scriptTag.textContent); + const jsonObjects = Array.isArray(jsonData) ? jsonData : [jsonData]; + + for (const obj of jsonObjects) { + if (typeof obj.keywords === 'string') { + const parts = obj.keywords.split(',').map(k => k.trim()).filter(k => k.length > 0); + keywords.push(...parts); + } + } + } catch (error) { + // silent + } + } + return keywords.length > 0 ? keywords.join(',') : undefined; +}; + // Enrichment of properties common across dooh, app and site - will be dropped into whatever // section is appropriate function clientEnrichment(ortb2, ri) { const domain = parseDomain(ri.page, {noLeadingWww: true}); - const keywords = winFallback((win) => win.document.querySelector('meta[name=\'keywords\']')) - ?.content?.replace?.(/\s/g, ''); + const jsonLdKeywords = detectJsonLdKeywords(); + let keywords; + + if (jsonLdKeywords) { + keywords = jsonLdKeywords; + } else { + keywords = winFallback((win) => + win.document.querySelector('meta[name="keywords"]') + )?.content?.replace?.(/\s/g, ''); + } + return removeUndef({ domain, keywords, diff --git a/test/spec/fpd/enrichment_spec.js b/test/spec/fpd/enrichment_spec.js index 98f655e70e7..a821b1c5a62 100644 --- a/test/spec/fpd/enrichment_spec.js +++ b/test/spec/fpd/enrichment_spec.js @@ -99,6 +99,34 @@ describe('FPD enrichment', () => { }); }); + // ===== New test: prefer JSON-LD keywords over meta ===== + describe('json/ld keywords', () => { + let scriptTag; + beforeEach(() => { + // add a JSON-LD script that contains keywords + scriptTag = document.createElement('script'); + scriptTag.type = 'application/ld+json'; + scriptTag.textContent = JSON.stringify({ + '@context': 'https://schema.org', + '@type': 'WebPage', + keywords: 'json1, json2' + }); + document.head.appendChild(scriptTag); + }); + afterEach(() => { + document.head.removeChild(scriptTag); + }); + + testWindows(() => window, () => { + it('uses JSON-LD keywords when present', () => { + return fpd(ORTB2).then(ortb2 => { + // JSON-LD should be preferred; returned format is a comma-joined string (no spaces) + expect(ortb2[section].keywords).to.eql('json1,json2'); + }); + }); + }); + }); + it('should not set keywords if meta tag is not present', () => { return fpd(ORTB2).then(ortb2 => { expect(ortb2[section].hasOwnProperty('keywords')).to.be.false; From 8a8ef1dedd5e94eac7583cc9c5b76e618ca755fa Mon Sep 17 00:00:00 2001 From: Petrica Nanca Date: Thu, 27 Nov 2025 11:33:48 +0200 Subject: [PATCH 2/3] Fix tests --- src/fpd/enrichment.ts | 8 ++-- test/spec/fpd/enrichment_spec.js | 67 ++++++++++++++++++-------------- 2 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/fpd/enrichment.ts b/src/fpd/enrichment.ts index 069fd0ae9b5..a886b89bae7 100644 --- a/src/fpd/enrichment.ts +++ b/src/fpd/enrichment.ts @@ -163,9 +163,9 @@ const ENRICHMENTS = { /** * Detect keywords also from json/ld if this is present */ -const detectJsonLdKeywords = () => { - const scriptTags = Array.from(document.querySelectorAll('script[type="application/ld+json"]')); - +const detectJsonLdKeywords = (win = window) => { + const doc = win.document; + const scriptTags = Array.from(doc.querySelectorAll('script[type="application/ld+json"]')); let keywords = []; for (const scriptTag of scriptTags) { @@ -190,7 +190,7 @@ const detectJsonLdKeywords = () => { // section is appropriate function clientEnrichment(ortb2, ri) { const domain = parseDomain(ri.page, {noLeadingWww: true}); - const jsonLdKeywords = detectJsonLdKeywords(); + const jsonLdKeywords = winFallback((win) => detectJsonLdKeywords(win)); let keywords; if (jsonLdKeywords) { diff --git a/test/spec/fpd/enrichment_spec.js b/test/spec/fpd/enrichment_spec.js index a821b1c5a62..ebbdff72074 100644 --- a/test/spec/fpd/enrichment_spec.js +++ b/test/spec/fpd/enrichment_spec.js @@ -34,7 +34,8 @@ describe('FPD enrichment', () => { language: '' }, document: { - querySelector: sinon.stub() + querySelector: sinon.stub(), + querySelectorAll: sinon.stub().callsFake((sel) => Array.from(document.querySelectorAll(sel))), }, screen: { width: 1, @@ -99,34 +100,6 @@ describe('FPD enrichment', () => { }); }); - // ===== New test: prefer JSON-LD keywords over meta ===== - describe('json/ld keywords', () => { - let scriptTag; - beforeEach(() => { - // add a JSON-LD script that contains keywords - scriptTag = document.createElement('script'); - scriptTag.type = 'application/ld+json'; - scriptTag.textContent = JSON.stringify({ - '@context': 'https://schema.org', - '@type': 'WebPage', - keywords: 'json1, json2' - }); - document.head.appendChild(scriptTag); - }); - afterEach(() => { - document.head.removeChild(scriptTag); - }); - - testWindows(() => window, () => { - it('uses JSON-LD keywords when present', () => { - return fpd(ORTB2).then(ortb2 => { - // JSON-LD should be preferred; returned format is a comma-joined string (no spaces) - expect(ortb2[section].keywords).to.eql('json1,json2'); - }); - }); - }); - }); - it('should not set keywords if meta tag is not present', () => { return fpd(ORTB2).then(ortb2 => { expect(ortb2[section].hasOwnProperty('keywords')).to.be.false; @@ -135,6 +108,42 @@ describe('FPD enrichment', () => { }) }) + CLIENT_SECTIONS.forEach(section => { + const ORTB2 = {[section]: {ext: {}}}; + // ===== New test: prefer JSON-LD keywords over meta ===== + describe('json/ld keywords', () => { + let scriptTag; + let metaTag; + beforeEach(() => { + metaTag = document.createElement('meta'); + metaTag.name = 'keywords'; + metaTag.content = 'kw1, kw2'; + document.head.appendChild(metaTag); + // add a JSON-LD script that contains keywords + scriptTag = document.createElement('script'); + scriptTag.type = 'application/ld+json'; + scriptTag.textContent = JSON.stringify({ + '@context': 'https://schema.org', + '@type': 'WebPage', + keywords: 'json1, json2' + }); + document.head.appendChild(scriptTag); + }); + afterEach(() => { + document.head.removeChild(scriptTag); + document.head.removeChild(metaTag); + }); + + testWindows(() => window, () => { + it('uses JSON-LD keywords when present', () => { + return fpd(ORTB2).then(ortb2 => { + // JSON-LD should be preferred; returned format is a comma-joined string (no spaces) + expect(ortb2[section].keywords).to.eql('json1,json2'); + }); + }); + }); + }); + }); describe('site', () => { describe('when mixed with app/dooh', () => { beforeEach(() => { From feaad1b33923daf2ff3adf9f3048e3125382f172 Mon Sep 17 00:00:00 2001 From: Demetrio Girardi Date: Thu, 4 Dec 2025 10:57:59 -0800 Subject: [PATCH 3/3] add configuration options for keyword lookup, include both meta and json by default --- src/fpd/enrichment.ts | 89 +++++++++++++------- test/spec/fpd/enrichment_spec.js | 140 +++++++++++++++++++------------ 2 files changed, 144 insertions(+), 85 deletions(-) diff --git a/src/fpd/enrichment.ts b/src/fpd/enrichment.ts index a886b89bae7..1ccaaaf6692 100644 --- a/src/fpd/enrichment.ts +++ b/src/fpd/enrichment.ts @@ -1,7 +1,17 @@ import {hook} from '../hook.js'; import {getRefererInfo, parseDomain} from '../refererDetection.js'; import {findRootDomain} from './rootDomain.js'; -import {deepSetValue, deepAccess, getDefinedParams, getWinDimensions, getDocument, getWindowSelf, getWindowTop, mergeDeep} from '../utils.js'; +import { + deepSetValue, + deepAccess, + getDefinedParams, + getWinDimensions, + getDocument, + getWindowSelf, + getWindowTop, + mergeDeep, + memoize +} from '../utils.js'; import { getDNT } from '../../libraries/dnt/index.js'; import {config} from '../config.js'; import {getHighEntropySUA, getLowEntropySUA} from './sua.js'; @@ -31,6 +41,19 @@ export interface FirstPartyDataConfig { * https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData#returning_high_entropy_values */ uaHints?: string[] + /** + * Control keyword enrichment - `site.keywords`, `dooh.keywords` and/or `app.keywords`. + */ + keywords?: { + /** + * If true (the default), look for keywords in a keyword meta tag () and add them to first party data + */ + meta?: boolean, + /** + * If true (the default), look for keywords in a JSON-LD tag (