Skip to content

Commit 5538a4f

Browse files
fix: add new implementation of read and code gen phases based on parse package (#413)
Fixes #412
1 parent e18503d commit 5538a4f

32 files changed

Lines changed: 4365 additions & 188 deletions

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# Project files
22
models/**/build*
33
models/**/output*
4+
models/**/compare
45
app_spec.json
56
removals.txt
67
.DS_Store

packages/compile/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
"ci:build": "run-s lint prettier:check type-check test:ci"
1717
},
1818
"dependencies": {
19+
"@sdeverywhere/parse": "^0.1.0",
1920
"antlr4": "4.12.0",
2021
"antlr4-vensim": "0.6.2",
2122
"bufx": "^1.0.5",

packages/compile/src/_tests/test-support.ts

Lines changed: 40 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,25 @@
1+
import { readFileSync } from 'fs'
12
import { resolve } from 'path'
23
import { fileURLToPath } from 'url'
34

5+
import type { Model } from '@sdeverywhere/parse'
6+
47
import type { VensimModelParseTree } from '../parse/parser'
5-
import { parseModel } from '../parse/parser'
68
import { preprocessModel } from '../preprocess/preprocessor'
79
import { canonicalName } from '../_shared/helpers'
10+
import { parseModel } from '../parse-and-generate'
11+
12+
export interface ParsedVensimModel {
13+
kind: 'vensim'
14+
root: Model
15+
}
16+
17+
export interface LegacyParsedVensimModel {
18+
kind: 'vensim-legacy'
19+
parseTree: VensimModelParseTree
20+
}
21+
22+
export type ParsedModel = ParsedVensimModel | LegacyParsedVensimModel
823

924
export type DimModelName = string
1025
export type DimCName = string
@@ -49,6 +64,7 @@ export type VariableType = 'const' | 'aux' | 'level' | 'initial' | 'lookup' | 'd
4964
export interface Variable {
5065
modelLHS: string // 'Target Capacity'
5166
modelFormula: string // 'ACTIVE INITIAL(Capacity*Utilization Adjustment,Initial Target Capacity)'
67+
origModelFormula?: string // 'IF THEN ELSE(cond, x, y)'
5268
varName: string // '_target_capacity'
5369
subscripts: string[] // TODO: sub type
5470
exceptSubscripts: string[] // TODO: This is only used during parsing, doesn't need to be exposed
@@ -140,14 +156,31 @@ export function sampleModelDir(modelName: string): string {
140156
return resolve(__dirname, '..', '..', '..', '..', 'models', modelName)
141157
}
142158

143-
export function parseVensimModel(modelName: string): VensimModelParseTree {
144-
const modelFile = resolve(sampleModelDir(modelName), `${modelName}.mdl`)
145-
const preprocessed = preprocessModel(modelFile, undefined, 'genc', false)
146-
return parseModel(preprocessed)
159+
export function parseVensimModel(modelName: string): ParsedModel {
160+
const modelDir = sampleModelDir(modelName)
161+
const modelFile = resolve(modelDir, `${modelName}.mdl`)
162+
let mdlContent: string
163+
if (process.env.SDE_NONPUBLIC_USE_NEW_PARSE === '1') {
164+
// Note that the new parser implicitly runs the preprocessor on the input model text,
165+
// so we don't need to do that here. (We should make it configurable so that we can
166+
// skip the preprocess step in `parse-and-generate.js` when the input model text has
167+
// already been run through a preprocessor.)
168+
mdlContent = readFileSync(modelFile, 'utf8')
169+
} else {
170+
mdlContent = preprocessModel(modelFile, undefined, 'genc', false)
171+
}
172+
// We currently sort the preprocessed definitions alphabetically for
173+
// compatibility with the legacy preprocessor. Once we drop the legacy code
174+
// we could remove this step and update the tests to use the original order.
175+
return parseModel(mdlContent, modelDir, /*sort=*/ true)
147176
}
148177

149-
export function parseInlineVensimModel(mdl: string): VensimModelParseTree {
150-
return parseModel(mdl)
178+
export function parseInlineVensimModel(mdlContent: string, modelDir?: string): ParsedModel {
179+
// For tests that parse inline model text, in the case of the legacy parser, don't run
180+
// the preprocess step, and in the case of the new parser (which implicitly runs the
181+
// preprocess step), don't sort the definitions. This makes it easier to do apples
182+
// to apples comparisons on the outputs from the two parser implementations.
183+
return parseModel(mdlContent, modelDir, /*sort=*/ false)
151184
}
152185

153186
function prettyVar(variable: Variable): string {

packages/compile/src/generate/code-gen.js

Lines changed: 27 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,15 @@ import { asort, lines, strlist, abend, mapIndexed } from '../_shared/helpers.js'
44
import { sub, allDimensions, allMappings, subscriptFamilies } from '../_shared/subscript.js'
55
import Model from '../model/model.js'
66

7+
import { generateEquation } from './gen-equation.js'
78
import EquationGen from './equation-gen.js'
89
import ModelLHSReader from './model-lhs-reader.js'
910

10-
export function generateCode(parseTree, opts) {
11-
return codeGenerator(parseTree, opts).generate()
11+
export function generateCode(parsedModel, opts) {
12+
return codeGenerator(parsedModel, opts).generate()
1213
}
1314

14-
let codeGenerator = (parseTree, opts) => {
15+
let codeGenerator = (parsedModel, opts) => {
1516
const { spec, operation, extData, directData, modelDirname } = opts
1617
// Set to 'decl', 'init-lookups', 'eval', etc depending on the section being generated.
1718
let mode = ''
@@ -25,13 +26,19 @@ let codeGenerator = (parseTree, opts) => {
2526
outputAllVars = true
2627
}
2728
// Function to generate a section of the code
28-
let generateSection = R.map(v => new EquationGen(v, extData, directData, mode, modelDirname).generate())
29+
let generateSection = R.map(v => {
30+
if (parsedModel.kind === 'vensim-legacy') {
31+
return new EquationGen(v, extData, directData, mode, modelDirname).generate()
32+
} else {
33+
return generateEquation(v, mode, extData, directData, modelDirname)
34+
}
35+
})
2936
let section = R.pipe(generateSection, R.flatten, lines)
3037
function generate() {
3138
// Read variables and subscript ranges from the model parse tree.
3239
// This is the main entry point for code generation and is called just once.
3340
try {
34-
Model.read(parseTree, spec, extData, directData, modelDirname)
41+
Model.read(parsedModel, spec, extData, directData, modelDirname)
3542
// In list mode, print variables to the console instead of generating code.
3643
if (operation === 'printRefIdTest') {
3744
Model.printRefIdTest()
@@ -198,10 +205,21 @@ void ${name}${idx}() {
198205
}
199206
let funcCalls = R.pipe(mapIndexed(funcCall), lines)
200207

201-
// Break the vars into chunks of 30; this number was empirically
202-
// determined by looking at runtime performance and memory usage
203-
// of the En-ROADS model on various devices
204-
let chunks = R.splitEvery(30, vars)
208+
// Break the vars into chunks. The default value of 30 was empirically
209+
// determined by looking at runtime performance and memory usage of the
210+
// En-ROADS model on various devices.
211+
let chunkSize
212+
if (process.env.SDE_CODE_GEN_CHUNK_SIZE) {
213+
chunkSize = parseInt(process.env.SDE_CODE_GEN_CHUNK_SIZE)
214+
} else {
215+
chunkSize = 30
216+
}
217+
let chunks
218+
if (chunkSize > 0) {
219+
chunks = R.splitEvery(chunkSize, vars)
220+
} else {
221+
chunks = [vars]
222+
}
205223

206224
if (!preStep) {
207225
preStep = ''
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import path from 'node:path'
2+
3+
import XLSX from 'xlsx'
4+
5+
import { cdbl, readCsv, readXlsx } from '../_shared/helpers.js'
6+
7+
/**
8+
* Return a `getCellValue` function that reads the CSV or XLS[X] content.
9+
*
10+
* @param {string} fileOrTag The filename (e.g., 'data.xlsx') or tag name (e.g., '?data').
11+
* @param {string} tabOrDelimiter
12+
* @param {'data' | 'constants'} dataKind The kind of `GET DIRECT ...` being used.
13+
* @param {Map<string, any>} directData The mapping of dataset name used in a `GET DIRECT DATA` call (e.g.,
14+
* `?data`) to the tabular data contained in the loaded data file.
15+
* @param {string} modelDir The path to the directory containing the model (used for resolving data files).
16+
* @returns A `getCellValue` function.
17+
*/
18+
export function handleExcelOrCsvFile(fileOrTag, tabOrDelimiter, dataKind, directData, modelDir) {
19+
if (fileOrTag.startsWith('?')) {
20+
// The file is a tag for an Excel file with data in the directData map.
21+
const workbook = directData.get(fileOrTag)
22+
return handleExcelWorkbook(fileOrTag, workbook, tabOrDelimiter, dataKind, 'tagged')
23+
} else {
24+
// The file is a CSV or XLS[X] pathname. Read it now.
25+
const dataPathname = path.resolve(modelDir, fileOrTag)
26+
if (dataPathname.toLowerCase().endsWith('csv')) {
27+
return handleCsvFile(fileOrTag, dataPathname, tabOrDelimiter, dataKind)
28+
} else {
29+
const workbook = readXlsx(dataPathname)
30+
return handleExcelWorkbook(fileOrTag, workbook, tabOrDelimiter, dataKind, 'file')
31+
}
32+
}
33+
}
34+
35+
/**
36+
* Return a `getCellValue` function for the given Excel workbook parsed from an XLS[X] file.
37+
*
38+
* @param {string} fileOrTag The filename (e.g., 'data.xlsx') or tag name (e.g., '?data').
39+
* @param {*} workbook The workbook data loaded from the file.
40+
* @param {string} tab The name of the tab within the workbook.
41+
* @param {'data' | 'constants'} dataKind The kind of `GET DIRECT ...` being used.
42+
* @param {'file' | 'tagged'} dataSource The reference kind, either 'file' or 'tagged'.
43+
* @returns A `getCellValue` function.
44+
*/
45+
function handleExcelWorkbook(fileOrTag, workbook, tab, dataKind, dataSource) {
46+
if (workbook) {
47+
let sheet = workbook.Sheets[tab]
48+
if (sheet) {
49+
return (c, r) => {
50+
let cell = sheet[XLSX.utils.encode_cell({ c, r })]
51+
return cell != null ? cdbl(cell.v) : null
52+
}
53+
} else {
54+
throw new Error(`Direct ${dataKind} worksheet ${tab} in ${dataSource} ${fileOrTag} not found`)
55+
}
56+
} else {
57+
throw new Error(`Direct ${dataKind} workbook ${dataSource} ${fileOrTag} not found`)
58+
}
59+
}
60+
61+
/**
62+
* Return a `getCellValue` function for the given CSV file.
63+
*
64+
* @param {string} file The filename of the data file.
65+
* @param {string} dataFilename The full path to the data file.
66+
* @param {string} delimiter The delimiter for the tabular data.
67+
* @param {'data' | 'constants'} dataKind The kind of `GET DIRECT ...` being used.
68+
* @returns A `getCellValue` function.
69+
*/
70+
function handleCsvFile(file, dataPathname, delimiter, dataKind) {
71+
// Return a `getCellValue` function for the given CSV file.
72+
let data = readCsv(dataPathname, delimiter)
73+
if (data) {
74+
return (c, r) => {
75+
let value = '0.0'
76+
try {
77+
value = data[r] != null && data[r][c] != null ? cdbl(data[r][c]) : null
78+
} catch (error) {
79+
console.error(`${error.message} in ${dataPathname}`)
80+
}
81+
return value
82+
}
83+
} else {
84+
throw new Error(`Direct ${dataKind} file ${file} could not be read`)
85+
}
86+
}
Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
import { cartesianProductOf, cdbl } from '../_shared/helpers.js'
2+
import { isDimension, normalizeSubscripts, sub } from '../_shared/subscript.js'
3+
4+
/**
5+
* Generate code for a single element in a const list definition.
6+
*
7+
* @param {*} variable The `Variable` instance to process.
8+
* @param {*} parsedEqn The parsed equation.
9+
* @return {string[]} An array of strings containing the generated C code for the variable,
10+
* one string per line of code.
11+
*/
12+
export function generateConstListElement(variable, parsedEqn) {
13+
// In the "read variables" phase, const lists are expanded into separated variable
14+
// definitions, so `variable` here will have `subscripts` that represent specific
15+
// subscript indices in normalized order (alphabetized by parent dimension/family
16+
// name). However, we need to consult the LHS subscripts/dimensions, which will
17+
// be in the original order from the model equation.
18+
//
19+
// In the following example,
20+
// we have a 2D variable whose original dimensions are not in normal order:
21+
// DimA: A1, A2 ~~|
22+
// DimB: B1, B2, B3 ~~|
23+
// x[DimB, DimA] = 1, 2; 3, 4; 5, 6; ~~|
24+
//
25+
// The variable `x` will have been separated into:
26+
// x[B1,A1]
27+
// x[B1,A2]
28+
// x[B2,A1]
29+
// ...
30+
//
31+
// Each one will refer to a single element from the original const list. To determine
32+
// which element in the const list goes with which variable instance, we build an array
33+
// of all subscript combinations and then find the index of the one that matches the
34+
// combination used for the separated variable instance.
35+
const lhsSubRefs = variable.parsedEqn.lhs.varDef.subscriptRefs
36+
const lhsSubIds = lhsSubRefs.map(subRef => subRef.subId)
37+
const subIdArrays = lhsSubIds.map(subOrDimId => {
38+
if (isDimension(subOrDimId)) {
39+
// Use the full array of subscripts (indexes) for the dimension at this position
40+
return sub(subOrDimId).value
41+
} else {
42+
// This is a single subscript (index), so use an array with a single element
43+
return [subOrDimId]
44+
}
45+
})
46+
47+
// Continuing with the above example, at this point we will have a 2D array:
48+
// [
49+
// [_b1,_b2,_b3],
50+
// [_a1,_a2]
51+
// ]
52+
// We expand these into the set of all combinations of subscripts in the original
53+
// order of the dimensions from the equation LHS.
54+
const origCombos = cartesianProductOf(subIdArrays)
55+
56+
// Now we have the combinations in original order:
57+
// [_b1,_a1]
58+
// [_b1,_a2]
59+
// [_b2,_a1]
60+
// ...
61+
// But we need to put them into normalized order so that we can find the index of
62+
// `variable.subscripts` (which is already in normalized order).
63+
const normalizedCombos = origCombos.map(normalizeSubscripts)
64+
65+
// Convert to strings to make matching easier. Now we have the strings in normalized order:
66+
// [_a1,_b1]
67+
// [_a2,_b1]
68+
// [_a1,_b2]
69+
// ...
70+
const comboStrings = normalizedCombos.map(combo => combo.map(subId => `[${subId}]`).join(''))
71+
72+
// Convert `variable.subscripts` into the same format so that we can do an array lookup,
73+
// for example if this separated variable instance is x[_a2,_b1], this will be:
74+
// [_a2,_b1]
75+
const lhsComboString = variable.subscripts.map(subId => `[${subId}]`).join('')
76+
77+
// Find the index of the combination that matches `variable.subscripts`
78+
const constIndex = comboStrings.indexOf(lhsComboString)
79+
if (constIndex < 0) {
80+
throw new Error(`Failed to determine index of const list element for ${variable.refId}`)
81+
}
82+
83+
// Determine the LHS and RHS of the const assignment
84+
const lhsVarId = variable.varName
85+
const lhsIndicesString = variable.subscripts.map(subId => `[${sub(subId).value}]`).join('')
86+
const lhsRef = `${lhsVarId}${lhsIndicesString}`
87+
const rhsConstValue = cdbl(parsedEqn.rhs.constants[constIndex].value)
88+
return ` ${lhsRef} = ${rhsConstValue};`
89+
}

0 commit comments

Comments
 (0)