Skip to content

Commit 879d3df

Browse files
perf: optimize expandedRefIdsForVar to better handle large dimensions (#800)
Fixes #797
1 parent 82066e6 commit 879d3df

3 files changed

Lines changed: 287 additions & 47 deletions

File tree

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
import { indexNamesForSubscript } from '../_shared/subscript.js'
2+
3+
/**
4+
* Given the array of LHS subscript/dimension IDs (already mapped to correspond to the
5+
* RHS positions) and a set of RHS variable instances, return the refIds of the RHS
6+
* instances whose subscript combinations overlap with the LHS combinations at every
7+
* position.
8+
*
9+
* Conceptually, this is equivalent to checking whether any combination in the LHS
10+
* cartesian product matches any combination in a given RHS instance's cartesian
11+
* product. But because positions in a cartesian product are independent, we only
12+
* need to check that each position has at least one index in common between the
13+
* LHS and RHS index sets. This reduces the complexity of the check from
14+
* O(product of dimension sizes) to O(sum of dimension sizes).
15+
*
16+
* For example, suppose DimA={A1,A2} and DimB={B1,B2}, the LHS accesses `[DimA,DimB]`,
17+
* and we want to check whether it overlaps with a RHS variable instance `_x[_dima,_b1]`.
18+
* The full cartesian products look like this:
19+
* LHS combos: { (A1,B1), (A1,B2), (A2,B1), (A2,B2) }
20+
* RHS combos: { (A1,B1), (A2,B1) }
21+
* The two sets share (A1,B1) and (A2,B1), so there is a match. But we don't need
22+
* to enumerate either set — we can check each position independently:
23+
* position 0: LHS {A1,A2} ∩ RHS {A1,A2} = {A1,A2} (non-empty)
24+
* position 1: LHS {B1,B2} ∩ RHS {B1} = {B1} (non-empty)
25+
* Every position has at least one index in common, so we know a full-combo match
26+
* must exist (pick any shared index at each position, e.g., (A2,B1), and it is in
27+
* both products). Conversely, if any position has an empty intersection, no full
28+
* combo can match — for example, if instead the LHS accessed `[A1,DimB]` (a specific
29+
* index at position 0) and the RHS instance were `_x[_a2,_dimb]`, position 0 would
30+
* give LHS {A1} ∩ RHS {A2} = ∅ and we could stop immediately.
31+
*
32+
* @param {string[]} mappedLhsSubIds The array of LHS subscript/dimension IDs at each
33+
* position, mapped to correspond to the RHS variable reference positions.
34+
* @param {{ subscripts: string[], refId: string }[]} rhsVarInstances The array of RHS
35+
* variable instances to filter.
36+
* @returns {string[]} A sorted array of refIds for the RHS instances whose subscripts
37+
* overlap with the LHS at every position.
38+
*/
39+
export function matchingRhsRefIds(mappedLhsSubIds, rhsVarInstances) {
40+
// Build a Set of LHS index names for each position for quick lookup
41+
const lhsIndexSets = mappedLhsSubIds.map(id => new Set(indexNamesForSubscript(id)))
42+
43+
// For each RHS variable instance, check if there is overlap at every subscript
44+
// position between the LHS and RHS index sets
45+
const rhsRefIds = []
46+
for (const rhsVarInstance of rhsVarInstances) {
47+
let matches = true
48+
for (let i = 0; i < rhsVarInstance.subscripts.length; i++) {
49+
const rhsIndices = indexNamesForSubscript(rhsVarInstance.subscripts[i])
50+
let hasOverlap = false
51+
for (const id of rhsIndices) {
52+
if (lhsIndexSets[i].has(id)) {
53+
hasOverlap = true
54+
break
55+
}
56+
}
57+
if (!hasOverlap) {
58+
matches = false
59+
break
60+
}
61+
}
62+
if (matches) {
63+
rhsRefIds.push(rhsVarInstance.refId)
64+
}
65+
}
66+
67+
// Return the sorted array of relevant refIds
68+
// TODO: Sorting is not essential here, but the legacy reader sorted so we will keep
69+
// that behavior now to avoid invalidating tests. Later we should remove this `sort`
70+
// call and update the tests accordingly.
71+
return rhsRefIds.sort()
72+
}
Lines changed: 187 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,187 @@
1+
import { describe, expect, it } from 'vitest'
2+
3+
import { resetSubscriptsAndDimensions } from '../_shared/subscript'
4+
5+
import Model from './model'
6+
import { matchingRhsRefIds } from './read-equations-expand'
7+
8+
import { parseInlineVensimModel } from '../_tests/test-support'
9+
10+
/**
11+
* Set up subscript/dimension state from an inline Vensim model text containing
12+
* only subscript range definitions. This is the minimal setup needed for
13+
* `indexNamesForSubscript` to work in the tests below.
14+
*/
15+
function setupSubscripts(modelText: string): void {
16+
// XXX: This is needed because subscripts are held in module-level storage
17+
resetSubscriptsAndDimensions()
18+
const parsedModel = parseInlineVensimModel(modelText)
19+
Model.read(parsedModel, /*spec=*/ {}, /*extData=*/ undefined, /*directData=*/ undefined, /*modelDir=*/ undefined, {
20+
stopAfterResolveSubscripts: true
21+
})
22+
}
23+
24+
/**
25+
* Build a minimal mock RHS variable instance with the given variable ID and subscripts.
26+
* The refId is synthesized from the two, e.g., `('_x', ['_a1', '_b1'])` produces
27+
* `'_x[_a1,_b1]'`.
28+
*/
29+
function rhsInstance(varId: string, subscripts: string[]): { subscripts: string[]; refId: string } {
30+
const refId = `${varId}[${subscripts.join(',')}]`
31+
return { subscripts, refId }
32+
}
33+
34+
describe('matchingRhsRefIds', () => {
35+
it('should return all instances when LHS and RHS are both apply-to-all on a single dimension', () => {
36+
setupSubscripts(`DimA: A1, A2, A3 ~~|`)
37+
38+
// The LHS references `_dima` at a single position; the RHS has two instances that
39+
// together cover all of `_dima`, so both should be returned.
40+
const result = matchingRhsRefIds(
41+
['_dima'],
42+
[rhsInstance('_x', ['_a1']), rhsInstance('_x', ['_a2']), rhsInstance('_x', ['_a3'])]
43+
)
44+
expect(result).toEqual(['_x[_a1]', '_x[_a2]', '_x[_a3]'])
45+
})
46+
47+
it('should return only the matching instance when the LHS references a specific index', () => {
48+
setupSubscripts(`DimA: A1, A2, A3 ~~|`)
49+
50+
// The LHS accesses only `_a2`, so only that RHS instance should be returned.
51+
const result = matchingRhsRefIds(
52+
['_a2'],
53+
[rhsInstance('_x', ['_a1']), rhsInstance('_x', ['_a2']), rhsInstance('_x', ['_a3'])]
54+
)
55+
expect(result).toEqual(['_x[_a2]'])
56+
})
57+
58+
it('should return an empty array when no RHS instance overlaps with the LHS', () => {
59+
setupSubscripts(`DimA: A1, A2, A3 ~~|`)
60+
61+
const result = matchingRhsRefIds(['_a1'], [rhsInstance('_x', ['_a2']), rhsInstance('_x', ['_a3'])])
62+
expect(result).toEqual([])
63+
})
64+
65+
it('should return an empty array when there are no RHS instances', () => {
66+
setupSubscripts(`DimA: A1, A2 ~~|`)
67+
68+
const result = matchingRhsRefIds(['_dima'], [])
69+
expect(result).toEqual([])
70+
})
71+
72+
it('should match at every subscript position (multi-dimensional)', () => {
73+
setupSubscripts(`
74+
DimA: A1, A2 ~~|
75+
DimB: B1, B2 ~~|
76+
DimC: C1, C2 ~~|
77+
`)
78+
79+
// The LHS is `_dima,_c2,_dimb` (mimicking a `y[DimA,DimB,DimC] :EXCEPT: [DimA,DimB,C1]`
80+
// situation where the LHS is separated and the separated instance covers only `_c2`).
81+
// Only the RHS instance at `_c2` should match.
82+
const result = matchingRhsRefIds(
83+
['_dima', '_c2', '_dimb'],
84+
[rhsInstance('_x', ['_dima', '_c1', '_dimb']), rhsInstance('_x', ['_dima', '_c2', '_dimb'])]
85+
)
86+
expect(result).toEqual(['_x[_dima,_c2,_dimb]'])
87+
})
88+
89+
it('should require overlap at every position (no single mismatch)', () => {
90+
setupSubscripts(`
91+
DimA: A1, A2 ~~|
92+
DimB: B1, B2 ~~|
93+
`)
94+
95+
// The LHS accesses `_a1,_b1`; the RHS instances have mismatches in at least one
96+
// position, so none should be returned.
97+
const result = matchingRhsRefIds(
98+
['_a1', '_b1'],
99+
[rhsInstance('_x', ['_a2', '_b1']), rhsInstance('_x', ['_a1', '_b2']), rhsInstance('_x', ['_a2', '_b2'])]
100+
)
101+
expect(result).toEqual([])
102+
})
103+
104+
it('should return multiple instances when a dimension position overlaps with each', () => {
105+
setupSubscripts(`
106+
DimA: A1, A2 ~~|
107+
DimB: B1, B2, B3 ~~|
108+
`)
109+
110+
// LHS is fully apply-to-all; RHS is separated on DimB, so all three instances match.
111+
const result = matchingRhsRefIds(
112+
['_dima', '_dimb'],
113+
[rhsInstance('_x', ['_dima', '_b1']), rhsInstance('_x', ['_dima', '_b2']), rhsInstance('_x', ['_dima', '_b3'])]
114+
)
115+
expect(result).toEqual(['_x[_dima,_b1]', '_x[_dima,_b2]', '_x[_dima,_b3]'])
116+
})
117+
118+
it('should handle subdimensions on the LHS', () => {
119+
setupSubscripts(`
120+
DimA: A1, A2, A3, A4 ~~|
121+
SubA: A2, A3 ~~|
122+
`)
123+
124+
// The LHS accesses the subdimension `_suba` (which covers only `_a2` and `_a3`),
125+
// so only the RHS instances at those indices should be returned.
126+
const result = matchingRhsRefIds(
127+
['_suba'],
128+
[rhsInstance('_x', ['_a1']), rhsInstance('_x', ['_a2']), rhsInstance('_x', ['_a3']), rhsInstance('_x', ['_a4'])]
129+
)
130+
expect(result).toEqual(['_x[_a2]', '_x[_a3]'])
131+
})
132+
133+
it('should handle subdimensions on the RHS', () => {
134+
setupSubscripts(`
135+
DimA: A1, A2, A3, A4 ~~|
136+
SubA: A2, A3 ~~|
137+
`)
138+
139+
// The LHS accesses `_a3` (a specific index); only the RHS instance whose subdimension
140+
// contains `_a3` should be returned.
141+
const result = matchingRhsRefIds(
142+
['_a3'],
143+
[rhsInstance('_x', ['_a1']), rhsInstance('_x', ['_suba']), rhsInstance('_x', ['_a4'])]
144+
)
145+
expect(result).toEqual(['_x[_suba]'])
146+
})
147+
148+
it('should return refIds in sorted order', () => {
149+
setupSubscripts(`DimA: A1, A2, A3 ~~|`)
150+
151+
// Provide RHS instances in an unsorted order; the result should be sorted.
152+
const result = matchingRhsRefIds(
153+
['_dima'],
154+
[rhsInstance('_x', ['_a3']), rhsInstance('_x', ['_a1']), rhsInstance('_x', ['_a2'])]
155+
)
156+
expect(result).toEqual(['_x[_a1]', '_x[_a2]', '_x[_a3]'])
157+
})
158+
159+
it('should efficiently handle large dimension sizes', () => {
160+
// Generate a model with three large dimensions, similar in spirit to the original
161+
// performance bug (36 × 56 × 22). The old implementation was O(product of sizes)
162+
// which would make this test slow; the new implementation is O(sum of sizes).
163+
const dimASize = 40
164+
const dimBSize = 60
165+
const dimCSize = 25
166+
const dimASubs = Array.from({ length: dimASize }, (_, i) => `A${i + 1}`)
167+
const dimBSubs = Array.from({ length: dimBSize }, (_, i) => `B${i + 1}`)
168+
const dimCSubs = Array.from({ length: dimCSize }, (_, i) => `C${i + 1}`)
169+
setupSubscripts(`
170+
DimA: ${dimASubs.join(', ')} ~~|
171+
DimB: ${dimBSubs.join(', ')} ~~|
172+
DimC: ${dimCSubs.join(', ')} ~~|
173+
`)
174+
175+
// One RHS instance covering the entire cartesian product, plus a decoy instance
176+
// that has a mismatch at a single position.
177+
const rhsVarInstances = [
178+
rhsInstance('_x', ['_dima', '_dimb', '_dimc']),
179+
rhsInstance('_x', ['_dima', '_dimb', '_c1'])
180+
]
181+
182+
// LHS accesses `_c2`, so only the first instance should match (the decoy has
183+
// only `_c1` at position 2).
184+
const result = matchingRhsRefIds(['_dima', '_dimb', '_c2'], rhsVarInstances)
185+
expect(result).toEqual(['_x[_dima,_dimb,_dimc]'])
186+
})
187+
})

packages/compile/src/model/read-equations.js

Lines changed: 28 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import { parseVensimModel } from '@sdeverywhere/parse'
22

3-
import { canonicalName, cartesianProductOf, newDepreciationVarName, newFixedDelayVarName } from '../_shared/helpers.js'
3+
import { canonicalName, newDepreciationVarName, newFixedDelayVarName } from '../_shared/helpers.js'
44

5-
import { hasMapping, indexNamesForSubscript, isDimension, isIndex, sub } from '../_shared/subscript.js'
5+
import { hasMapping, isDimension, isIndex, sub } from '../_shared/subscript.js'
66

77
import Model from './model.js'
88
import { generateDelayVariables } from './read-equation-fn-delay.js'
@@ -11,6 +11,7 @@ import { generateNpvVariables } from './read-equation-fn-npv.js'
1111
import { generateSmoothVariables } from './read-equation-fn-smooth.js'
1212
import { generateTrendVariables } from './read-equation-fn-trend.js'
1313
import { generateLookup } from './read-equation-fn-with-lookup.js'
14+
import { matchingRhsRefIds } from './read-equations-expand.js'
1415
import { readVariables } from './read-variables.js'
1516

1617
class Context {
@@ -967,21 +968,26 @@ function expandedRefIdsForVar(lhsVariable, rhsBaseRefId, rhsSubIds) {
967968
// it must be non-apply-to-all. The goal now is to determine which instances (refIds) are
968969
// relevant for the given `lhsVariable` context.
969970
//
970-
// First, get all combinations of the LHS subscripts that map to the subscripts/dimensions
971-
// in the RHS variable reference. For example:
971+
// First, determine the set of LHS subscript indices accessed at each position of the RHS
972+
// variable reference. For example:
972973
// y[DimA,DimB,DimC] :EXCEPT: [DimA,DimB,C1] = x[DimA,DimC,DimB]
973974
// In this case the `DimC` on the RHS is only "accessed" by `C2` from the LHS, so we would
974-
// build an array of strings representing the possible subset of combinations, like this:
975-
// _a1,_c2,_b1
976-
// _a1,_c2,_b2
977-
// _a2,_c2,_b1
978-
// _a2,_c2,_b2
975+
// build a per-position set of accessed indices, like this:
976+
// position 0 (DimA on RHS): { _a1, _a2 }
977+
// position 1 (DimC on RHS): { _c2 }
978+
// position 2 (DimB on RHS): { _b1, _b2 }
979979
//
980-
// Then, for each RHS variable instance:
981-
// - get all combinations of RHS subscripts that can be accepted by that RHS instance
982-
// (build an array of strings, e.g., ['_a1,_c1,_b1', '_a1,_c1,_b1', ...])
983-
// - see if any of the LHS subscript combos match any of the RHS subscript combos; if
984-
// so, then add the RHS `refId` to the array of variables referenced by the LHS
980+
// Then, for each RHS variable instance, check whether every subscript position has at
981+
// least one index in common between the LHS set and the indices that the RHS instance
982+
// accepts at that position. If so, add the RHS `refId` to the array of variables
983+
// referenced by the LHS.
984+
//
985+
// Conceptually this is equivalent to checking whether any combination in the LHS
986+
// cartesian product matches any combination in the RHS cartesian product, but we can
987+
// avoid computing the cartesian products explicitly because positions in a cartesian
988+
// product are independent: if every position has at least one element in common, then
989+
// there exists a full combination that matches. This reduces the complexity from
990+
// O(product of dimension sizes) to O(sum of dimension sizes).
985991
//
986992
// In the following examples, suppose the referenced RHS variable is non-apply-to-all and
987993
// has two instances:
@@ -1012,43 +1018,18 @@ function expandedRefIdsForVar(lhsVariable, rhsBaseRefId, rhsSubIds) {
10121018
// _x[_dima,_c2,_dimb]
10131019
//
10141020

1015-
// Step 1: Get all combinations of the LHS subscripts that map to the subscripts/dimensions
1016-
// in the RHS variable reference. Here `rhsSubIds` is the array of parsed subscript/dimension
1017-
// IDs that appear in the RHS variable reference. We figure out which LHS subscripts/dimensions
1018-
// are relevant for the RHS subscripts/dimensions given the context of the LHS variable (which
1019-
// may have been separated/expanded).
1021+
// Step 1: Resolve the LHS subscript/dimension at each position of the RHS variable
1022+
// reference. Here `rhsSubIds` is the array of parsed subscript/dimension IDs that
1023+
// appear in the RHS variable reference. We figure out which LHS subscripts/dimensions
1024+
// are relevant for the RHS subscripts/dimensions given the context of the LHS variable
1025+
// (which may have been separated/expanded).
10201026
const lhsSubRefs = lhsVariable.parsedEqn.lhs.varDef.subscriptRefs
10211027
const lhsSubIds = lhsSubRefs?.map(subRef => subRef.subId) || []
10221028
const mappedLhsSubIds = rhsSubIds.map(rhsSubId => resolveRhsSubOrDim(lhsVariable, lhsSubIds, rhsSubId))
10231029

1024-
// Step 2: Build an array of mapped LHS subscript combos (one string of comma-separated
1025-
// subscript IDs for each combo)
1026-
const mappedLhsSubIdsPerPosition = mappedLhsSubIds.map(indexNamesForSubscript)
1027-
const mappedLhsCombos = cartesianProductOf(mappedLhsSubIdsPerPosition).map(combo => combo.join(','))
1028-
1029-
// Step 3: For each RHS variable instance, get all combinations of RHS subscripts that can
1030-
// be accepted by that particular RHS instance
1031-
const rhsRefIds = []
1032-
for (const rhsVarInstance of rhsVarInstances) {
1033-
// Build RHS subscript combos (one string of comma-separated subscript IDs for each combo)
1034-
const rhsVarInstanceSubIdsPerPosition = rhsVarInstance.subscripts.map(indexNamesForSubscript)
1035-
const rhsCombos = cartesianProductOf(rhsVarInstanceSubIdsPerPosition).map(combo => combo.join(','))
1036-
1037-
// See if any of the LHS subscript combos match any of the RHS subscript combos
1038-
for (const lhsCombo of mappedLhsCombos) {
1039-
if (rhsCombos.includes(lhsCombo)) {
1040-
// There was a match; add the refId and break out of the inner loop
1041-
rhsRefIds.push(rhsVarInstance.refId)
1042-
break
1043-
}
1044-
}
1045-
}
1046-
1047-
// Return the sorted array of relevant refIds
1048-
// TODO: Sorting is not essential here, but the legacy reader sorted so we will keep that
1049-
// behavior now to avoid invalidating tests. Later we should remove this `sort` call and
1050-
// update the tests accordingly.
1051-
return rhsRefIds.sort()
1030+
// Step 2: Find the RHS variable instances whose subscripts overlap with the LHS
1031+
// subscripts at every position
1032+
return matchingRhsRefIds(mappedLhsSubIds, rhsVarInstances)
10521033
}
10531034

10541035
/**

0 commit comments

Comments
 (0)