Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"test-markup": "mocha test/markup",
"test-detect": "mocha test/detect",
"test-browser": "mocha test/browser"
"test-parser": "mocha test/parser"
},
"engines": {
"node": "*"
Expand Down
9 changes: 1 addition & 8 deletions src/highlight.js
Original file line number Diff line number Diff line change
Expand Up @@ -498,20 +498,13 @@ const HLJS = function(hljs) {
// considered for a potential match
resumeScanAtSamePosition = false;
} else {
top.matcher.lastIndex = index;
top.matcher.considerAll();
}
top.matcher.lastIndex = index;

const match = top.matcher.exec(codeToHighlight);
// console.log("match", match[0], match.rule && match.rule.begin)

// if our failure to match was the result of a "resumed scan" then we
// need to advance one position and revert to full scanning before we
// decide there are truly no more matches at all to be had
if (!match && top.matcher.resumingScanAtSamePosition()) {
advanceOne();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

advanceOne() is no longer needed.
Otherwise it's all looks good!

continue;
}
if (!match) break;

const beforeMatch = codeToHighlight.substring(index, match.index);
Expand Down
53 changes: 48 additions & 5 deletions src/lib/mode_compiler.js
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ export function compileLanguage(language) {
}

resumingScanAtSamePosition() {
return this.regexIndex != 0;
return this.regexIndex !== 0;
}

considerAll() {
Expand All @@ -160,15 +160,58 @@ export function compileLanguage(language) {
exec(s) {
const m = this.getMatcher(this.regexIndex);
m.lastIndex = this.lastIndex;
const result = m.exec(s);
let result = m.exec(s);

// The following is because we have no easy way to say "resume scanning at the
// existing position but also skip the current rule ONLY". What happens is
// all prior rules are also skipped which can result in matching the wrong
// thing. Example of matching "booger":

// our matcher is [string, "booger", number]
//
// ....booger....

// if "booger" is ignored then we'd really need a regex to scan from the
// SAME position for only: [string, number] but ignoring "booger" (if it
// was the first match), a simple resume would scan ahead who knows how
// far looking only for "number", ignoring potential string matches (or
// future "booger" matches that might be valid.)

// So what we do: We execute two matchers, one resuming at the same
// position, but the second full matcher starting at the position after:

// /--- resume first regex match here (for [number])
// |/---- full match here for [string, "booger", number]
// vv
// ....booger....

// Which ever results in a match first is then used. So this 3-4 step
// process essentially allows us to say "match at this position, excluding
// a prior rule that was ignored".
//
// 1. Match "booger" first, ignore. Also proves that [string] does non match.
// 2. Resume matching for [number]
// 3. Match at index + 1 for [string, "booger", number]
// 4. If #2 and #3 result in matches, which came first?
if (this.resumingScanAtSamePosition()) {
if (result && result.index === this.lastIndex) {
// result is position +0 and therefore a valid
// "resume" match so result stays result
} else { // use the second matcher result
const m2 = this.getMatcher(0);
m2.lastIndex = this.lastIndex + 1;
result = m2.exec(s);
}
}

if (result) {
this.regexIndex += result.position + 1;
if (this.regexIndex === this.count) { // wrap-around
this.regexIndex = 0;
if (this.regexIndex === this.count) {
// wrap-around to considering all matches again
this.considerAll();
}
}

// this.regexIndex = 0;
return result;
}
}
Expand Down
5 changes: 2 additions & 3 deletions test/index.js
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
'use strict';

const hljs = require('../build');
const hljs = require('../build');
hljs.debugMode(); // tests run in debug mode so errors are raised

// Tests specific to the API exposed inside the hljs object.
// Right now, that only includes tests for several common regular expressions.
require('./api');

// Test weird bugs we've fixed over time
require("./parser")
require("./parser");

// Tests for auto detection of languages via `highlightAuto`.
require('./detect');
Expand All @@ -26,4 +26,3 @@ require('./markup');
// isn't actually used to test inside a browser but `jsdom` acts as a virtual
// browser inside of node.js and runs together with all the other tests.
require('./special');

2 changes: 2 additions & 0 deletions test/parser/index.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
'use strict';

const should = require('should');

describe('hljs', function() {
require('./look-ahead-end-matchers');
require('./resume-scan');
Expand Down
31 changes: 22 additions & 9 deletions test/parser/resume-scan.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,26 @@
const hljs = require('../../build');
'use strict';

describe("bugs", function () {
const hljs = require('../../build');
hljs.debugMode(); // tests run in debug mode so errors are raised

describe("bugs", function() {
describe("resume scan when a match is ignored", () => {
it("should continue to highlight later matches", () => {
hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")').value
.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
)
})
})
})
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar")');
result.value.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>)'
);
});
// previously the match rule was resumed but it would scan ahead too far and ignore
// later matches that matched the PRIOR rules... this is because when we "skip" (ignore) a
// rule we really only want to skip searching for THAT rule at that same location, we
// do not want to stop searching for ALL the prior rules at that location...
it("BUT should not skip ahead too far", () => {
const result = hljs.highlight('java', 'ImmutablePair.of(Stuff.class, "bar");\n23');
result.value.should.equal(
'ImmutablePair.of(Stuff.class, <span class="hljs-string">&quot;bar&quot;</span>);\n' +
'<span class="hljs-number">23</span>'
);
});
});
});