Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 66 additions & 4 deletions src/fpd/enrichment.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import {hook} from '../hook.js';
import {getRefererInfo, parseDomain} from '../refererDetection.js';
import {findRootDomain} from './rootDomain.js';
import {deepSetValue, deepAccess, getDefinedParams, getWinDimensions, getDocument, getWindowSelf, getWindowTop, mergeDeep} from '../utils.js';
import {
deepSetValue,
deepAccess,
getDefinedParams,
getWinDimensions,
getDocument,
getWindowSelf,
getWindowTop,
mergeDeep,
memoize
} from '../utils.js';
import { getDNT } from '../../libraries/dnt/index.js';
import {config} from '../config.js';
import {getHighEntropySUA, getLowEntropySUA} from './sua.js';
Expand Down Expand Up @@ -31,6 +41,19 @@ export interface FirstPartyDataConfig {
* https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData#returning_high_entropy_values
*/
uaHints?: string[]
/**
* Control keyword enrichment - `site.keywords`, `dooh.keywords` and/or `app.keywords`.
*/
keywords?: {
/**
* If true (the default), look for keywords in a keyword meta tag (<meta name="keywords">) and add them to first party data
*/
meta?: boolean,
/**
* If true (the default), look for keywords in a JSON-LD tag (<script type="application/json+ld">) and add themm to first party data.
*/
json?: boolean
}
}

declare module '../config' {
Expand Down Expand Up @@ -160,15 +183,54 @@ const ENRICHMENTS = {
}
};

/**
* Detect keywords also from json/ld if this is present
*/
export const getJsonLdKeywords = memoize(() => {
return winFallback((win) => {
const doc = win.document;
const scriptTags: any = Array.from(doc.querySelectorAll('script[type="application/ld+json"]'));
let keywords = [];

for (const scriptTag of scriptTags) {
try {
const jsonData = JSON.parse(scriptTag.textContent);
const jsonObjects = Array.isArray(jsonData) ? jsonData : [jsonData];

for (const obj of jsonObjects) {
if (typeof obj.keywords === 'string') {
const parts = obj.keywords.split(',').map(k => k.trim()).filter(k => k.length > 0);
keywords.push(...parts);
}
}
} catch (error) {
// silent
}
}
return keywords;
})
});

export const getMetaTagKeywords = memoize(() => {
return winFallback((win) => {
return win.document.querySelector('meta[name="keywords"]')?.content?.split(',').map(k => k.trim());
})
});

// Enrichment of properties common across dooh, app and site - will be dropped into whatever
// section is appropriate
function clientEnrichment(ortb2, ri) {
const domain = parseDomain(ri.page, {noLeadingWww: true});
const keywords = winFallback((win) => win.document.querySelector('meta[name=\'keywords\']'))
?.content?.replace?.(/\s/g, '');
const keywords = new Set();
if (config.getConfig('firstPartyData.keywords.meta') ?? true) {
(getMetaTagKeywords() ?? []).forEach(key => keywords.add(key));
}
if (config.getConfig('firstPartyData.keywords.json') ?? true) {
(getJsonLdKeywords() ?? []).forEach(key => keywords.add(key));
}
return removeUndef({
domain,
keywords,
keywords: keywords.size > 0 ? Array.from(keywords.keys()).join(',') : undefined,
publisher: removeUndef({
domain: dep.findRootDomain(domain)
})
Expand Down
107 changes: 88 additions & 19 deletions test/spec/fpd/enrichment_spec.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import {dep, enrichFPD} from '../../../src/fpd/enrichment.js';
import {dep, enrichFPD, getJsonLdKeywords, getMetaTagKeywords} from '../../../src/fpd/enrichment.js';
import {hook} from '../../../src/hook.js';
import {expect} from 'chai/index.mjs';
import {config} from 'src/config.js';
Expand Down Expand Up @@ -34,7 +34,8 @@ describe('FPD enrichment', () => {
language: ''
},
document: {
querySelector: sinon.stub()
querySelector: sinon.stub(),
querySelectorAll: sinon.stub().callsFake((sel) => Array.from(document.querySelectorAll(sel))),
},
screen: {
width: 1,
Expand All @@ -60,15 +61,18 @@ describe('FPD enrichment', () => {

CLIENT_SECTIONS.forEach(section => {
describe(`${section}, when set`, () => {
const ORTB2 = {[section]: {ext: {}}}
let ortb2;
beforeEach(() => {
ortb2 = {[section]: {ext: {}}}
})

it('sets domain and publisher.domain', () => {
const refererInfo = {
page: 'www.example.com',
};
sandbox.stub(dep, 'getRefererInfo').callsFake(() => refererInfo);
sandbox.stub(dep, 'findRootDomain').callsFake((dom) => `publisher.${dom}`);
return fpd(ORTB2).then(ortb2 => {
return fpd(ortb2).then(ortb2 => {
sinon.assert.match(ortb2[section], {
domain: 'example.com',
publisher: {
Expand All @@ -77,31 +81,96 @@ describe('FPD enrichment', () => {
});
});
})

describe('keywords', () => {
let metaTag;
let tagsToRemove;
beforeEach(() => {
metaTag = document.createElement('meta');
metaTag.name = 'keywords';
metaTag.content = 'kw1, kw2';
document.head.appendChild(metaTag);
tagsToRemove = [];
getMetaTagKeywords.clear();
getJsonLdKeywords.clear();
});

afterEach(() => {
document.head.removeChild(metaTag);
});
tagsToRemove.forEach(tag => document.head.removeChild(tag));
})

function addMetaKeywords(keywords = ['kw1', 'kw2']) {
const metaTag = document.createElement('meta');
metaTag.name = 'keywords';
metaTag.content = keywords.join(',')
document.head.appendChild(metaTag);
tagsToRemove.push(metaTag);
}

function addJsonKeywords(keywords) {
// add a JSON-LD script that contains keywords
const scriptTag = document.createElement('script');
scriptTag.type = 'application/ld+json';
scriptTag.textContent = JSON.stringify({
'@context': 'https://schema.org',
'@type': 'WebPage',
keywords: keywords.join(',')
});
document.head.appendChild(scriptTag);
tagsToRemove.push(scriptTag);
}

testWindows(() => window, () => {
it(`sets kewwords from meta tag`, () => {
return fpd(ORTB2).then(ortb2 => {
it('should not set keywords if meta and json tags are not present', () => {
return fpd(ortb2).then(ortb2 => {
expect(ortb2[section].hasOwnProperty('keywords')).to.be.false;
});
});

it(`sets keywords from meta tag`, () => {
addMetaKeywords(['kw1', 'kw2']);
return fpd(ortb2).then(ortb2 => {
expect(ortb2[section].keywords).to.eql('kw1,kw2');
});
});
});
});

it('should not set keywords if meta tag is not present', () => {
return fpd(ORTB2).then(ortb2 => {
expect(ortb2[section].hasOwnProperty('keywords')).to.be.false;
it('should not use meta tag if firstPartyData.keywords.meta is false', () => {
config.setConfig({
firstPartyData: {
keywords: {
meta: false
}
}
})
addMetaKeywords(['kw1', 'kw2']);
return fpd(ortb2).then(ortb2 => {
expect(ortb2[section].keywords).to.not.exist;
});
});

it('uses JSON-LD keywords when present', () => {
addJsonKeywords(['json1', 'json2']);
return fpd(ortb2).then(ortb2 => {
// JSON-LD should be preferred; returned format is a comma-joined string (no spaces)
expect(ortb2[section].keywords).to.eql('json1,json2');
});
});

it('should not pick up JSON keywords if firstPartyData.keywords.json is false', () => {
config.setConfig({
firstPartyData: {
keywords: {
json: false
}
}
});
addJsonKeywords(['json1', 'json2']);
return fpd(ortb2).then(ortb2 => {
expect(ortb2[section].keywords).to.not.exist;
})
});

it('should avoid duplicates', () => {
addMetaKeywords(['kw1', ' kw2']);
addJsonKeywords(['kw2 ', 'kw3']);
return fpd(ortb2).then(ortb2 => {
expect(ortb2[section].keywords).to.eql('kw1,kw2,kw3');
})
})
});
});
})
Expand Down