Skip to content

Commit 187f935

Browse files
dpebotJustinBeckwith
authored andcommitted
feat: add sorting and ordering of results (#147)
1 parent 30042b6 commit 187f935

5 files changed

Lines changed: 470 additions & 24 deletions

File tree

packages/google-privacy-dlp/protos/google/privacy/dlp/v2/dlp.proto

Lines changed: 139 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 Google LLC
1+
// Copyright 2018 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14+
//
1415

1516
syntax = "proto3";
1617

@@ -400,6 +401,59 @@ service DlpService {
400401
}
401402
}
402403

404+
// List of exclude infoTypes.
405+
message ExcludeInfoTypes {
406+
// InfoType list in ExclusionRule rule drops a finding when it overlaps or
407+
// contained within with a finding of an infoType from this list. For
408+
// example, for `InspectionRuleSet.info_types` containing "PHONE_NUMBER"` and
409+
// `exclusion_rule` containing `exclude_info_types.info_types` with
410+
// "EMAIL_ADDRESS" the phone number findings are dropped if they overlap
411+
// with EMAIL_ADDRESS finding.
412+
// That leads to "555-222-2222@example.org" to generate only a single
413+
// finding, namely email address.
414+
repeated InfoType info_types = 1;
415+
}
416+
417+
// The rule that specifies conditions when findings of infoTypes specified in
418+
// `InspectionRuleSet` are removed from results.
419+
message ExclusionRule {
420+
oneof type {
421+
// Dictionary which defines the rule.
422+
CustomInfoType.Dictionary dictionary = 1;
423+
424+
// Regular expression which defines the rule.
425+
CustomInfoType.Regex regex = 2;
426+
427+
// Set of infoTypes for which findings would affect this rule.
428+
ExcludeInfoTypes exclude_info_types = 3;
429+
}
430+
431+
// How the rule is applied, see MatchingType documentation for details.
432+
MatchingType matching_type = 4;
433+
}
434+
435+
// A single inspection rule to be applied to infoTypes, specified in
436+
// `InspectionRuleSet`.
437+
message InspectionRule {
438+
oneof type {
439+
// Hotword-based detection rule.
440+
CustomInfoType.DetectionRule.HotwordRule hotword_rule = 1;
441+
442+
// Exclusion rule.
443+
ExclusionRule exclusion_rule = 2;
444+
}
445+
}
446+
447+
// Rule set for modifying a set of infoTypes to alter behavior under certain
448+
// circumstances, depending on the specific details of the rules within the set.
449+
message InspectionRuleSet {
450+
// List of infoTypes this rule set is applied to.
451+
repeated InfoType info_types = 1;
452+
453+
// Set of rules to be applied to infoTypes. The rules are applied in order.
454+
repeated InspectionRule rules = 2;
455+
}
456+
403457
// Configuration description of the scanning process.
404458
// When used with redactContent only info_types and min_likelihood are currently
405459
// used.
@@ -468,6 +522,11 @@ message InspectConfig {
468522
// List of options defining data content to scan.
469523
// If empty, text, images, and other content will be included.
470524
repeated ContentOption content_options = 8;
525+
526+
// Set of rules to apply to the findings for this InspectConfig.
527+
// Exclusion rules, contained in the set are executed in the end, other
528+
// rules are executed in the order they are specified for each info type.
529+
repeated InspectionRuleSet rule_set = 10;
471530
}
472531

473532
// Container for bytes to inspect or redact.
@@ -2335,6 +2394,21 @@ message ListInspectTemplatesRequest {
23352394
// Optional size of the page, can be limited by server. If zero server returns
23362395
// a page of max size 100.
23372396
int32 page_size = 3;
2397+
2398+
// Optional comma separated list of fields to order by,
2399+
// followed by `asc` or `desc` postfix. This list is case-insensitive,
2400+
// default sorting order is ascending, redundant space characters are
2401+
// insignificant.
2402+
//
2403+
// Example: `name asc,update_time, create_time desc`
2404+
//
2405+
// Supported fields are:
2406+
//
2407+
// - `create_time`: corresponds to time the template was created.
2408+
// - `update_time`: corresponds to time the template was last updated.
2409+
// - `name`: corresponds to template's name.
2410+
// - `display_name`: corresponds to template's display name.
2411+
string order_by = 4;
23382412
}
23392413

23402414
// Response message for ListInspectTemplates.
@@ -2433,9 +2507,11 @@ message ListJobTriggersRequest {
24332507
//
24342508
// Supported fields are:
24352509
//
2436-
// - `create_time`: corresponds to time the triggeredJob was created.
2437-
// - `update_time`: corresponds to time the triggeredJob was last updated.
2510+
// - `create_time`: corresponds to time the JobTrigger was created.
2511+
// - `update_time`: corresponds to time the JobTrigger was last updated.
24382512
// - `name`: corresponds to JobTrigger's name.
2513+
// - `display_name`: corresponds to JobTrigger's display name.
2514+
// - `status`: corresponds to JobTrigger's status.
24392515
string order_by = 4;
24402516
}
24412517

@@ -2646,6 +2722,21 @@ message ListDeidentifyTemplatesRequest {
26462722
// Optional size of the page, can be limited by server. If zero server returns
26472723
// a page of max size 100.
26482724
int32 page_size = 3;
2725+
2726+
// Optional comma separated list of fields to order by,
2727+
// followed by `asc` or `desc` postfix. This list is case-insensitive,
2728+
// default sorting order is ascending, redundant space characters are
2729+
// insignificant.
2730+
//
2731+
// Example: `name asc,update_time, create_time desc`
2732+
//
2733+
// Supported fields are:
2734+
//
2735+
// - `create_time`: corresponds to time the template was created.
2736+
// - `update_time`: corresponds to time the template was last updated.
2737+
// - `name`: corresponds to template's name.
2738+
// - `display_name`: corresponds to template's display name.
2739+
string order_by = 4;
26492740
}
26502741

26512742
// Response message for ListDeidentifyTemplates.
@@ -2798,6 +2889,22 @@ message ListStoredInfoTypesRequest {
27982889
// Optional size of the page, can be limited by server. If zero server returns
27992890
// a page of max size 100.
28002891
int32 page_size = 3;
2892+
2893+
// Optional comma separated list of fields to order by,
2894+
// followed by `asc` or `desc` postfix. This list is case-insensitive,
2895+
// default sorting order is ascending, redundant space characters are
2896+
// insignificant.
2897+
//
2898+
// Example: `name asc, display_name, create_time desc`
2899+
//
2900+
// Supported fields are:
2901+
//
2902+
// - `create_time`: corresponds to time the most recent version of the
2903+
// resource was created.
2904+
// - `state`: corresponds to the state of the resource.
2905+
// - `name`: corresponds to resource name.
2906+
// - `display_name`: corresponds to info type's display name.
2907+
string order_by = 4;
28012908
}
28022909

28032910
// Response message for ListStoredInfoTypes.
@@ -2830,6 +2937,35 @@ enum ContentOption {
28302937
CONTENT_IMAGE = 2;
28312938
}
28322939

2940+
// Type of the match which can be applied to different ways of matching, like
2941+
// Dictionary, regular expression and intersecting with findings of another
2942+
// info type.
2943+
enum MatchingType {
2944+
// Invalid.
2945+
MATCHING_TYPE_UNSPECIFIED = 0;
2946+
2947+
// Full match.
2948+
//
2949+
// - Dictionary: join of Dictionary results matched complete finding quote
2950+
// - Regex: all regex matches fill a finding quote start to end
2951+
// - Exclude info type: completely inside affecting info types findings
2952+
MATCHING_TYPE_FULL_MATCH = 1;
2953+
2954+
// Partial match.
2955+
//
2956+
// - Dictionary: at least one of the tokens in the finding matches
2957+
// - Regex: substring of the finding matches
2958+
// - Exclude info type: intersects with affecting info types findings
2959+
MATCHING_TYPE_PARTIAL_MATCH = 2;
2960+
2961+
// Inverse match.
2962+
//
2963+
// - Dictionary: no tokens in the finding match the dictionary
2964+
// - Regex: finding doesn't match the regex
2965+
// - Exclude info type: no intersection with affecting info types findings
2966+
MATCHING_TYPE_INVERSE_MATCH = 3;
2967+
}
2968+
28332969
// Parts of the APIs which use certain infoTypes.
28342970
enum InfoTypeSupportedBy {
28352971
ENUM_TYPE_UNSPECIFIED = 0;

packages/google-privacy-dlp/protos/google/privacy/dlp/v2/storage.proto

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2018 Google LLC
1+
// Copyright 2018 Google LLC.
22
//
33
// Licensed under the Apache License, Version 2.0 (the "License");
44
// you may not use this file except in compliance with the License.
@@ -11,6 +11,7 @@
1111
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
14+
//
1415

1516
syntax = "proto3";
1617

@@ -95,7 +96,9 @@ message CustomInfoType {
9596

9697
// Message defining a custom regular expression.
9798
message Regex {
98-
// Pattern defining the regular expression.
99+
// Pattern defining the regular expression. Its syntax
100+
// (https://github.com/google/re2/wiki/Syntax) can be found under the
101+
// google/re2 repository on GitHub.
99102
string pattern = 1;
100103
}
101104

@@ -170,8 +173,21 @@ message CustomInfoType {
170173
}
171174
}
172175

173-
// All CustomInfoTypes must have a name
174-
// that does not conflict with built-in InfoTypes or other CustomInfoTypes.
176+
enum ExclusionType {
177+
// A finding of this custom info type will not be excluded from results.
178+
EXCLUSION_TYPE_UNSPECIFIED = 0;
179+
180+
// A finding of this custom info type will be excluded from final results,
181+
// but can still affect rule execution.
182+
EXCLUSION_TYPE_EXCLUDE = 1;
183+
}
184+
185+
// CustomInfoType can either be a new infoType, or an extension of built-in
186+
// infoType, when the name matches one of existing infoTypes and that infoType
187+
// is specified in `InspectContent.info_types` field. Specifying the latter
188+
// adds findings to the one detected by the system. If built-in info type is
189+
// not specified in `InspectContent.info_types` list then the name is treated
190+
// as a custom info type.
175191
InfoType info_type = 1;
176192

177193
// Likelihood to return for this CustomInfoType. This base value can be
@@ -199,6 +215,10 @@ message CustomInfoType {
199215
// Rules are applied in order that they are specified. Not supported for the
200216
// `surrogate_type` CustomInfoType.
201217
repeated DetectionRule detection_rules = 7;
218+
219+
// If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
220+
// to be returned. It still can be used for rules matching.
221+
ExclusionType exclusion_type = 8;
202222
}
203223

204224
// General identifier of a data field in a storage service.
@@ -237,13 +257,13 @@ message DatastoreOptions {
237257
KindExpression kind = 2;
238258
}
239259

240-
// Options defining a file or a set of files (path ending with *) within
241-
// a Google Cloud Storage bucket.
260+
// Options defining a file or a set of files within a Google Cloud Storage
261+
// bucket.
242262
message CloudStorageOptions {
243263
// Set of files to scan.
244264
message FileSet {
245-
// The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the
246-
// path is allowed.
265+
// The Cloud Storage url of the file(s) to scan, in the format
266+
// `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
247267
string url = 1;
248268
}
249269

@@ -261,6 +281,7 @@ message CloudStorageOptions {
261281
RANDOM_START = 2;
262282
}
263283

284+
// The set of one or more files to scan.
264285
FileSet file_set = 1;
265286

266287
// Max number of bytes to scan from a file. If a scanned file's size is bigger

0 commit comments

Comments
 (0)