From 115da303178a047d65b9aa75c3a7f4ec3972bd3b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 1 Jul 2025 21:28:06 +0000 Subject: [PATCH 1/4] Initial plan From b51832765099455a3a13975e7be4b9e214e5a53a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 1 Jul 2025 21:56:53 +0000 Subject: [PATCH 2/4] =?UTF-8?q?Implement=20batch=20initialization=20fix=20?= =?UTF-8?q?for=20O(n=C2=B2)=20datatype=20performance=20issue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/datatype_decl_plugin.cpp | 50 ++++++++++++++++++++++++++++++++ src/ast/datatype_decl_plugin.h | 1 + src/cmd_context/cmd_context.cpp | 4 +++ 3 files changed, 55 insertions(+) diff --git a/src/ast/datatype_decl_plugin.cpp b/src/ast/datatype_decl_plugin.cpp index 33de8dd698c..2c0c7f0b54d 100644 --- a/src/ast/datatype_decl_plugin.cpp +++ b/src/ast/datatype_decl_plugin.cpp @@ -1121,6 +1121,56 @@ namespace datatype { return d; } + void util::batch_initialize_constructor_functions(sort * datatype) { + SASSERT(is_datatype(datatype)); + def const& dd = get_def(datatype); + + // Get all constructors for this datatype + ptr_vector const* constructors = get_datatype_constructors(datatype); + if (!constructors) return; + + // Process all constructors in a single pass to avoid O(n²) behavior + for (func_decl * con : *constructors) { + // Initialize recognizer if not already cached + if (!plugin().m_constructor2recognizer.contains(con)) { + symbol r; + for (constructor const* c : dd) { + if (c->name() == con->get_name()) { + r = c->recognizer(); + break; + } + } + parameter ps[2] = { parameter(con), parameter(r) }; + func_decl* d = m.mk_func_decl(fid(), OP_DT_RECOGNISER, 2, ps, 1, &datatype); + plugin().add_ast(con); + plugin().add_ast(d); + plugin().m_constructor2recognizer.insert(con, d); + } + + // Initialize accessors if not already cached + if (!plugin().m_constructor2accessors.contains(con)) { + ptr_vector* res = alloc(ptr_vector); + plugin().add_ast(con); + plugin().m_vectors.push_back(res); + plugin().m_constructor2accessors.insert(con, res); + + if (con->get_arity() > 0) { + // Find the constructor definition and create accessors + for (constructor const* c : dd) { + if (c->name() == con->get_name()) { + for (accessor const* a : *c) { + func_decl_ref fn = a->instantiate(datatype); + res->push_back(fn); + plugin().add_ast(fn); + } + break; + } + } + } + } + } + } + app* util::mk_is(func_decl * c, expr *f) { return m.mk_app(get_constructor_is(c), f); } diff --git a/src/ast/datatype_decl_plugin.h b/src/ast/datatype_decl_plugin.h index 4d91cbf4003..29c0d4364ca 100644 --- a/src/ast/datatype_decl_plugin.h +++ b/src/ast/datatype_decl_plugin.h @@ -373,6 +373,7 @@ namespace datatype { func_decl * get_constructor_recognizer(func_decl * constructor); func_decl * get_constructor_is(func_decl * constructor); ptr_vector const * get_constructor_accessors(func_decl * constructor); + void batch_initialize_constructor_functions(sort * datatype); func_decl * get_accessor_constructor(func_decl * accessor); func_decl * get_recognizer_constructor(func_decl * recognizer) const; func_decl * get_update_accessor(func_decl * update) const; diff --git a/src/cmd_context/cmd_context.cpp b/src/cmd_context/cmd_context.cpp index 4f093e74976..8c32d1c32e9 100644 --- a/src/cmd_context/cmd_context.cpp +++ b/src/cmd_context/cmd_context.cpp @@ -2471,6 +2471,10 @@ cmd_context::dt_eh::dt_eh(cmd_context & owner): void cmd_context::dt_eh::operator()(sort * dt, pdecl* pd) { TRACE(new_dt_eh, tout << "new datatype: "; m_owner.pm().display(tout, dt); tout << "\n";); + + // Batch initialize all constructor functions to avoid O(n²) behavior for large datatypes + m_dt_util.batch_initialize_constructor_functions(dt); + for (func_decl * c : *m_dt_util.get_datatype_constructors(dt)) { TRACE(new_dt_eh, tout << "new constructor: " << c->get_name() << "\n";); m_owner.insert(c); From dfdc31ae3b19765dd1232853ebb255658adbb06c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 2 Jul 2025 04:57:04 +0000 Subject: [PATCH 3/4] =?UTF-8?q?Fix=20the=20real=20O(n=C2=B2)=20bottleneck?= =?UTF-8?q?=20with=20lazy=20hash=20table=20for=20constructor=20name=20look?= =?UTF-8?q?ups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/datatype_decl_plugin.cpp | 77 +++++--------------------------- src/ast/datatype_decl_plugin.h | 14 +++++- src/cmd_context/cmd_context.cpp | 4 -- 3 files changed, 25 insertions(+), 70 deletions(-) diff --git a/src/ast/datatype_decl_plugin.cpp b/src/ast/datatype_decl_plugin.cpp index 2c0c7f0b54d..53882b15865 100644 --- a/src/ast/datatype_decl_plugin.cpp +++ b/src/ast/datatype_decl_plugin.cpp @@ -1076,14 +1076,13 @@ namespace datatype { sort * datatype = con->get_range(); def const& d = get_def(datatype); - for (constructor const* c : d) { - if (c->name() == con->get_name()) { - for (accessor const* a : *c) { - func_decl_ref fn = a->instantiate(datatype); - res->push_back(fn); - plugin().add_ast(fn); - } - break; + // Use O(1) lookup instead of O(n) linear search + constructor* c = d.get_constructor_by_name(con->get_name()); + if (c) { + for (accessor const* a : *c) { + func_decl_ref fn = a->instantiate(datatype); + res->push_back(fn); + plugin().add_ast(fn); } } return res; @@ -1105,13 +1104,11 @@ namespace datatype { sort * datatype = con->get_range(); def const& dd = get_def(datatype); symbol r; - // This should be fixed for perf. - // Option 1: hash-table in dd that maps to constructors instead of iterating over all constructors. - // initialize the hash-table lazily when dd is large. - // Option 2: initialize all calls to plugin() registration in a single pass. - for (constructor const* c : dd) - if (c->name() == con->get_name()) - r = c->recognizer(); + // Use O(1) lookup instead of O(n) linear search + constructor* c = dd.get_constructor_by_name(con->get_name()); + if (c) { + r = c->recognizer(); + } parameter ps[2] = { parameter(con), parameter(r) }; d = m.mk_func_decl(fid(), OP_DT_RECOGNISER, 2, ps, 1, &datatype); SASSERT(d); @@ -1121,56 +1118,6 @@ namespace datatype { return d; } - void util::batch_initialize_constructor_functions(sort * datatype) { - SASSERT(is_datatype(datatype)); - def const& dd = get_def(datatype); - - // Get all constructors for this datatype - ptr_vector const* constructors = get_datatype_constructors(datatype); - if (!constructors) return; - - // Process all constructors in a single pass to avoid O(n²) behavior - for (func_decl * con : *constructors) { - // Initialize recognizer if not already cached - if (!plugin().m_constructor2recognizer.contains(con)) { - symbol r; - for (constructor const* c : dd) { - if (c->name() == con->get_name()) { - r = c->recognizer(); - break; - } - } - parameter ps[2] = { parameter(con), parameter(r) }; - func_decl* d = m.mk_func_decl(fid(), OP_DT_RECOGNISER, 2, ps, 1, &datatype); - plugin().add_ast(con); - plugin().add_ast(d); - plugin().m_constructor2recognizer.insert(con, d); - } - - // Initialize accessors if not already cached - if (!plugin().m_constructor2accessors.contains(con)) { - ptr_vector* res = alloc(ptr_vector); - plugin().add_ast(con); - plugin().m_vectors.push_back(res); - plugin().m_constructor2accessors.insert(con, res); - - if (con->get_arity() > 0) { - // Find the constructor definition and create accessors - for (constructor const* c : dd) { - if (c->name() == con->get_name()) { - for (accessor const* a : *c) { - func_decl_ref fn = a->instantiate(datatype); - res->push_back(fn); - plugin().add_ast(fn); - } - break; - } - } - } - } - } - } - app* util::mk_is(func_decl * c, expr *f) { return m.mk_app(get_constructor_is(c), f); } diff --git a/src/ast/datatype_decl_plugin.h b/src/ast/datatype_decl_plugin.h index 29c0d4364ca..5f260f1151d 100644 --- a/src/ast/datatype_decl_plugin.h +++ b/src/ast/datatype_decl_plugin.h @@ -26,6 +26,7 @@ Revision History: #include "util/buffer.h" #include "util/symbol_table.h" #include "util/obj_hashtable.h" +#include "util/dictionary.h" enum sort_kind { @@ -164,6 +165,7 @@ namespace datatype { sort_ref_vector m_params; mutable sort_ref m_sort; ptr_vector m_constructors; + mutable dictionary m_name2constructor; public: def(ast_manager& m, util& u, symbol const& n, unsigned class_id, unsigned num_params, sort * const* params): m(m), @@ -195,6 +197,17 @@ namespace datatype { util& u() const { return m_util; } param_size::size* sort_size() { return m_sort_size; } void set_sort_size(param_size::size* p) { auto* q = m_sort_size; m_sort_size = p; if (p) p->inc_ref(); if (q) q->dec_ref(); m_sort = nullptr; } + constructor* get_constructor_by_name(symbol const& name) const { + // Lazy initialization of name-to-constructor map for O(1) lookups + if (m_name2constructor.empty() && !m_constructors.empty()) { + for (constructor* c : m_constructors) { + m_name2constructor.insert(c->name(), c); + } + } + constructor* result = nullptr; + m_name2constructor.find(name, result); + return result; + } def* translate(ast_translation& tr, util& u); }; @@ -373,7 +386,6 @@ namespace datatype { func_decl * get_constructor_recognizer(func_decl * constructor); func_decl * get_constructor_is(func_decl * constructor); ptr_vector const * get_constructor_accessors(func_decl * constructor); - void batch_initialize_constructor_functions(sort * datatype); func_decl * get_accessor_constructor(func_decl * accessor); func_decl * get_recognizer_constructor(func_decl * recognizer) const; func_decl * get_update_accessor(func_decl * update) const; diff --git a/src/cmd_context/cmd_context.cpp b/src/cmd_context/cmd_context.cpp index 8c32d1c32e9..4f093e74976 100644 --- a/src/cmd_context/cmd_context.cpp +++ b/src/cmd_context/cmd_context.cpp @@ -2471,10 +2471,6 @@ cmd_context::dt_eh::dt_eh(cmd_context & owner): void cmd_context::dt_eh::operator()(sort * dt, pdecl* pd) { TRACE(new_dt_eh, tout << "new datatype: "; m_owner.pm().display(tout, dt); tout << "\n";); - - // Batch initialize all constructor functions to avoid O(n²) behavior for large datatypes - m_dt_util.batch_initialize_constructor_functions(dt); - for (func_decl * c : *m_dt_util.get_datatype_constructors(dt)) { TRACE(new_dt_eh, tout << "new constructor: " << c->get_name() << "\n";); m_owner.insert(c); From 2788b8f49d5ced32278dac7f193f8fce2219bf24 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 2 Jul 2025 15:11:32 +0000 Subject: [PATCH 4/4] Optimize get_constructor_by_name: use func_decl* parameter, add linear search optimization for small datatypes, and ensure non-null postcondition Co-authored-by: NikolajBjorner <3085284+NikolajBjorner@users.noreply.github.com> --- src/ast/datatype_decl_plugin.cpp | 4 ++-- src/ast/datatype_decl_plugin.h | 26 ++++++++++++++++++++------ 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/ast/datatype_decl_plugin.cpp b/src/ast/datatype_decl_plugin.cpp index 53882b15865..3daceb88c2d 100644 --- a/src/ast/datatype_decl_plugin.cpp +++ b/src/ast/datatype_decl_plugin.cpp @@ -1077,7 +1077,7 @@ namespace datatype { sort * datatype = con->get_range(); def const& d = get_def(datatype); // Use O(1) lookup instead of O(n) linear search - constructor* c = d.get_constructor_by_name(con->get_name()); + constructor* c = d.get_constructor_by_name(con); if (c) { for (accessor const* a : *c) { func_decl_ref fn = a->instantiate(datatype); @@ -1105,7 +1105,7 @@ namespace datatype { def const& dd = get_def(datatype); symbol r; // Use O(1) lookup instead of O(n) linear search - constructor* c = dd.get_constructor_by_name(con->get_name()); + constructor* c = dd.get_constructor_by_name(con); if (c) { r = c->recognizer(); } diff --git a/src/ast/datatype_decl_plugin.h b/src/ast/datatype_decl_plugin.h index 5f260f1151d..7876f10c66d 100644 --- a/src/ast/datatype_decl_plugin.h +++ b/src/ast/datatype_decl_plugin.h @@ -197,15 +197,29 @@ namespace datatype { util& u() const { return m_util; } param_size::size* sort_size() { return m_sort_size; } void set_sort_size(param_size::size* p) { auto* q = m_sort_size; m_sort_size = p; if (p) p->inc_ref(); if (q) q->dec_ref(); m_sort = nullptr; } - constructor* get_constructor_by_name(symbol const& name) const { - // Lazy initialization of name-to-constructor map for O(1) lookups - if (m_name2constructor.empty() && !m_constructors.empty()) { + constructor* get_constructor_by_name(func_decl* con) const { + symbol const& name = con->get_name(); + constructor* result = nullptr; + + // For small datatypes (< 10 constructors), use linear search instead of hash table + if (m_constructors.size() < 10) { for (constructor* c : m_constructors) { - m_name2constructor.insert(c->name(), c); + if (c->name() == name) { + result = c; + break; + } + } + } else { + // Lazy initialization of name-to-constructor map for O(1) lookups + if (m_name2constructor.empty() && !m_constructors.empty()) { + for (constructor* c : m_constructors) { + m_name2constructor.insert(c->name(), c); + } } + m_name2constructor.find(name, result); } - constructor* result = nullptr; - m_name2constructor.find(name, result); + + SASSERT(result); // Post-condition: get_constructor_by_name returns a non-null result return result; } def* translate(ast_translation& tr, util& u);