-
-
Notifications
You must be signed in to change notification settings - Fork 16
support gitignore filter #9
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,198 @@ | ||||||||
| const std = @import("std"); | ||||||||
| const fs = std.fs; | ||||||||
| const mem = std.mem; | ||||||||
|
|
||||||||
| const testing = std.testing; | ||||||||
|
|
||||||||
| const State = union(enum) { anything: bool, exact: []const u8 }; | ||||||||
| const StateMachine = std.ArrayList(State); | ||||||||
| const PathIter = mem.SplitIterator(u8, .sequence); | ||||||||
| const CheckResult = enum { Ignore, Exclude, None }; | ||||||||
|
|
||||||||
| fn match_iter(states: []const State, paths: []const []const u8) bool { | ||||||||
| if (states.len == 0) { | ||||||||
| return paths.len == 0; | ||||||||
| } | ||||||||
|
|
||||||||
| if (paths.len == 0) { | ||||||||
| for (states) |s| { | ||||||||
| if (.anything != s) { | ||||||||
| return false; | ||||||||
| } | ||||||||
| } | ||||||||
| return true; | ||||||||
| } | ||||||||
|
|
||||||||
| switch (states[0]) { | ||||||||
| .anything => return match_iter(states, paths[1..]) or | ||||||||
| match_iter(states[1..], paths[1..]), | ||||||||
| .exact => |expect| { | ||||||||
| if (std.mem.eql(u8, expect, paths[0])) { | ||||||||
| return match_iter(states[1..], paths[1..]); | ||||||||
| } | ||||||||
|
|
||||||||
| return false; | ||||||||
| }, | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| test "match iter" { | ||||||||
| inline for (.{ | ||||||||
| .{ &[_]State{.{ .anything = true }}, "aaa", true }, | ||||||||
| .{ &[_]State{.{ .anything = true }}, "b", true }, | ||||||||
| .{ &[_]State{.{ .anything = true }}, "", true }, | ||||||||
| .{ &[_]State{ .{ .anything = true }, .{ .exact = "b" } }, "a/a/b", true }, | ||||||||
| .{ &[_]State{ .{ .anything = true }, .{ .exact = "b" } }, "a/a/b/c", false }, | ||||||||
| .{ &[_]State{ .{ .anything = true }, .{ .exact = "b" } }, "a/b/a/b", true }, | ||||||||
| .{ &[_]State{ .{ .anything = true }, .{ .exact = "b" }, .{ .anything = true } }, "a/a/b/c", true }, | ||||||||
| }) |case| { | ||||||||
| const states = case.@"0"; | ||||||||
| const input = case.@"1"; | ||||||||
| const expected = case.@"2"; | ||||||||
| var path_iter = mem.splitSequence(u8, input, "/"); | ||||||||
| var paths = std.ArrayList([]const u8).init(testing.allocator); | ||||||||
| defer paths.deinit(); | ||||||||
| while (path_iter.next()) |v| { | ||||||||
| paths.append(v) catch @panic("OOM"); | ||||||||
| } | ||||||||
| try testing.expectEqual(match_iter(states, paths.items), expected); | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| const IgnoreRule = struct { | ||||||||
| is_dir: bool, | ||||||||
| is_exclude: bool, | ||||||||
| state_machine: StateMachine, | ||||||||
| dir: []const u8, | ||||||||
|
|
||||||||
| const Self = @This(); | ||||||||
|
|
||||||||
| fn init(allocator: std.mem.Allocator, dir: []const u8) Self { | ||||||||
| return .{ | ||||||||
| .is_dir = false, | ||||||||
| .is_exclude = false, | ||||||||
| .state_machine = StateMachine.init(allocator), | ||||||||
| .dir = dir, | ||||||||
| }; | ||||||||
| } | ||||||||
|
|
||||||||
| fn deinit(self: Self) void { | ||||||||
| self.state_machine.deinit(); | ||||||||
| } | ||||||||
|
|
||||||||
| fn pushState(self: *Self, state: State) !void { | ||||||||
| try self.state_machine.append(state); | ||||||||
| } | ||||||||
|
|
||||||||
| fn check(self: Self, path: []const u8, file_entry: fs.IterableDir.Entry) !CheckResult { | ||||||||
| if (self.is_dir and file_entry.kind != .directory) { | ||||||||
| return if (self.is_exclude) .Exclude else .Ignore; | ||||||||
| } | ||||||||
|
|
||||||||
| const remainings = mem.trimLeft(u8, path, self.dir); | ||||||||
| var path_iter = mem.splitSequence(u8, remainings, "/"); | ||||||||
| var paths = std.ArrayList([]const u8); | ||||||||
|
||||||||
| var paths = std.ArrayList([]const u8); | |
| var paths = std.ArrayList([]const u8).init(self.state_machine.allocator); | |
| defer paths.deinit(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There appears to be a critical issue in the parsing logic for rule components. The current approach using it.first() (line 151) to peek at the first component, potentially processing it in lines 153-154, and then iterating with while (it.next()) (line 157) can lead to the first component being processed twice. This is because it.first() does not advance the iterator.
For example, with a pattern like "a/b":
input[start..end]would be"a/b".first_item(line 151) would be"a".- The
ifcondition on line 152 would be true. - Lines 153-154 would push
State{ .anything = true }and thenState{ .exact = "a" }. - The
while (it.next())loop on line 157 would then again process"a"as its first item, followed by"b".
This would result in an incorrect state machine like[any, a, a, b]instead of the expected[any, a, b]for a pattern like"a/b"(which implies**/a/b).
This could lead to incorrect matching behavior. Consider refactoring this section to correctly handle the implicit ** for non-anchored patterns and then iterate through all components once.
A revised approach might involve:
- Checking if
input[start..end]starts with/. - If not (and the pattern is not empty), add the initial
State{ .anything = true }. - Then, use a single
while (it.next())loop to process all components, ensuring to skip any empty components that arise (e.g., from a leading/or//).
// Corrected logic to avoid double-processing of the first component.
const current_pattern_slice = input[start..end];
if (current_pattern_slice.len > 0 && !std.mem.startsWith(u8, current_pattern_slice, "/")) {
// Pattern is not anchored and not empty, so prepend 'anything' (like '**')
try rule.pushState(State{ .anything = true });
}
// 'it' is already initialized from input[start..end] on line 150.
// Now, iterate through its components.
while (it.next()) |component| {
if (component.len == 0) {
// Skip empty components. These arise from:
// - A leading slash (e.g., "/foo" -> "" then "foo"). The "" is skipped.
// - Consecutive slashes (e.g., "foo//bar" -> "foo", "", "bar"). The "" is skipped.
continue;
}
if (std.mem.eql(u8, "**", component)) {
// Add 'anything' state, but avoid duplicates if the last state was also 'anything'.
if (rule.state_machine.items.len == 0 or rule.state_machine.items[rule.state_machine.items.len - 1] != .{.anything = true}) {
try rule.pushState(State{ .anything = true });
}
} else {
try rule.pushState(State{ .exact = component });
}
}
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The current logic for directory-specific rules seems to have an issue. If a rule is specific to directories (i.e.,
self.is_diris true, often from a pattern ending in/), and thefile_entrybeing checked is not a directory, this rule should simply not apply to the entry. In such a case, the function should return.None.Currently, if
self.is_diris true andfile_entry.kind != .directory, it returns.Ignore(or.Excludeifself.is_excludeis true). This could lead to incorrectly ignoring or excluding files.For example, if the rule is
build/(ignore thebuilddirectory) and we are checking a file namedother_file.txt, this rule should not causeother_file.txtto be ignored. However, the current logic would return.Ignore.Could you clarify if this behavior is intended, or if it should return
.Nonewhen a directory-specific rule encounters a non-directory entry?