@@ -1169,33 +1169,53 @@ namespace sls {
11691169 if (is_value (x))
11701170 return false ;
11711171
1172- vector<zstring> conts ;
1172+ vector<lookahead> lookaheads ;
11731173 expr_ref d_r (y, m);
11741174 seq_rewriter seqrw (m);
11751175 for (unsigned i = 0 ; i < s.length (); ++i) {
11761176 verbose_stream () << " Derivative " << s.extract (0 , i) << " : " << d_r << " \n " ;
11771177 if (seq.re .is_empty (d_r))
11781178 break ;
11791179 zstring prefix = s.extract (0 , i);
1180- choose (d_r, 2 , prefix, conts );
1180+ choose (d_r, 2 , prefix, lookaheads );
11811181 expr_ref ch (seq.str .mk_char (s[i]), m);
11821182 d_r = seqrw.mk_derivative (ch, d_r);
11831183 }
1184- if (!seq.re .is_empty (d_r))
1185- choose (d_r, 2 , s, conts);
1184+ unsigned current_min_length = UINT_MAX;
1185+ if (!seq.re .is_empty (d_r)) {
1186+ choose (d_r, 2 , s, lookaheads);
1187+ current_min_length = info.min_length ;
1188+ }
1189+
1190+ unsigned global_min_length = UINT_MAX;
1191+ for (auto & [str, min_length] : lookaheads)
1192+ global_min_length = std::max (min_length, global_min_length);
1193+
1194+ verbose_stream () << " repair in_re " << current_min_length << " "
1195+ << global_min_length << " " << mk_pp (e, m) << " " << s << " \n " ;
11861196
1187- verbose_stream () << " repair in_re " << mk_pp (e, m) << " " << s << " \n " ;
1188- for (auto & str : conts)
1189- verbose_stream () << " prefix " << str << " \n " ;
11901197
11911198 // TODO: do some length analysis to prune out short candidates when there are longer ones.
11921199 // TODO: when matching .*"bcd" with string ab, the extension abc is more interesting than aba.
11931200 if (ctx.is_true (e)) {
1194- for (auto & str : conts)
1195- m_str_updates.push_back ({ x, str, 1 });
1201+ for (auto & [str, min_length] : lookaheads) {
1202+ if (min_length == UINT_MAX && current_min_length < UINT_MAX)
1203+ continue ;
1204+ if (global_min_length < min_length)
1205+ continue ;
1206+ double score = 0.001 ;
1207+ if (min_length < UINT_MAX && s.length () < str.length ()) {
1208+ // reward small lengths
1209+ // penalize size differences (unless min_length decreases)
1210+ score = 1 << (current_min_length - min_length);
1211+ score /= ((double )abs ((int )s.length () - (int )str.length ()) + 1 );
1212+ }
1213+ verbose_stream () << " prefix " << score << " " << min_length << " : " << str << " \n " ;
1214+ m_str_updates.push_back ({ x, str, score });
1215+ }
11961216 }
11971217 else {
1198- for (auto & str : conts )
1218+ for (auto & [ str, min_length] : lookaheads )
11991219 m_str_updates.push_back ({ x, str + zstring (m_chars[ctx.rand (m_chars.size ())]), 1 });
12001220 }
12011221 return apply_update ();
@@ -1249,9 +1269,9 @@ namespace sls {
12491269 }
12501270 }
12511271
1252- void seq_plugin::choose (expr* r, unsigned k, zstring& prefix, vector<zstring >& result) {
1272+ void seq_plugin::choose (expr* r, unsigned k, zstring& prefix, vector<lookahead >& result) {
12531273 auto info = seq.re .get_info (r);
1254- result.push_back (prefix);
1274+ result.push_back ({ prefix, info. min_length } );
12551275 if (k == 0 )
12561276 return ;
12571277 unsigned_vector chars;
0 commit comments