From 8634799cabfaa551c6ae77cbbbf6379e1496772f Mon Sep 17 00:00:00 2001 From: parmesant Date: Fri, 5 Sep 2025 13:58:09 +0530 Subject: [PATCH 1/3] Refactor date binning logic and SQL query formatting Signed-off-by: parmesant --- src/query/mod.rs | 41 ++++++++++++++++++++++++++--------------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/src/query/mod.rs b/src/query/mod.rs index 6c142c6a6..38bb8578a 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -298,8 +298,10 @@ impl Query { /// Record of counts for a given time bin. #[derive(Debug, Serialize, Clone, Deserialize)] pub struct CountsRecord { + #[serde(rename(deserialize = "_bin_start_time_", serialize = "start_time"))] /// Start time of the bin pub start_time: String, + #[serde(rename(deserialize = "_bin_end_time_", serialize = "end_time"))] /// End time of the bin pub end_time: String, /// Number of logs in the bin @@ -449,36 +451,45 @@ impl CountsRequest { let dur = time_range.end.signed_duration_since(time_range.start); - let date_bin = if dur.num_minutes() <= 60 * 10 { - // date_bin 1 minute + let table_name = &self.stream; + let start_time_col_name = "_bin_start_time_"; + let end_time_col_name = "_bin_end_time_"; + let date_bin = if dur.num_minutes() <= 60 * 1 { + // less than 1 hour = 1 min bin format!( - "CAST(DATE_BIN('1 minute', \"{}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as start_time, DATE_BIN('1 minute', \"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1 minute' as end_time", - self.stream + "CAST(DATE_BIN('1m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('1m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1m' as {end_time_col_name}" ) - } else if dur.num_minutes() > 60 * 10 && dur.num_minutes() < 60 * 240 { - // date_bin 1 hour + } else if dur.num_minutes() > 60 * 1 && dur.num_minutes() < 60 * 24 { + // between 1 hour and 1 day = 5 min bin format!( - "CAST(DATE_BIN('1 hour', \"{}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as start_time, DATE_BIN('1 hour', \"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1 hour' as end_time", - self.stream + "CAST(DATE_BIN('5m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('5m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '5m' as {end_time_col_name}" + ) + } else if dur.num_minutes() > 60 * 24 && dur.num_minutes() < 60 * 24 * 3 { + // between 1 day and 3 day = 1 hour bin + format!( + "CAST(DATE_BIN('1h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('1h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1h' as {end_time_col_name}" + ) + } else if dur.num_minutes() > 60 * 24 * 3 && dur.num_minutes() < 60 * 24 * 10 { + // between 3 day and 10 day = 4 hour bin + format!( + "CAST(DATE_BIN('4h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('4h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '4h' as {end_time_col_name}" ) } else { - // date_bin 1 day + // 1 day format!( - "CAST(DATE_BIN('1 day', \"{}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as start_time, DATE_BIN('1 day', \"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1 day' as end_time", - self.stream + "CAST(DATE_BIN('1d', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('1d', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1d' as {end_time_col_name}" ) }; let query = if let Some(conditions) = &count_conditions.conditions { let f = get_filter_string(conditions).map_err(QueryError::CustomError)?; format!( - "SELECT {date_bin}, COUNT(*) as count FROM \"{}\" WHERE {} GROUP BY end_time,start_time ORDER BY end_time", - self.stream, f + "SELECT {date_bin}, COUNT(*) as count FROM \"{table_name}\" WHERE {} GROUP BY {end_time_col_name},{start_time_col_name} ORDER BY {end_time_col_name}", + f ) } else { format!( - "SELECT {date_bin}, COUNT(*) as count FROM \"{}\" GROUP BY end_time,start_time ORDER BY end_time", - self.stream + "SELECT {date_bin}, COUNT(*) as count FROM \"{table_name}\" GROUP BY {end_time_col_name},{start_time_col_name} ORDER BY {end_time_col_name}", ) }; Ok(query) From 312778c82c92fe8df5567ed8c7abc944d6e34ab8 Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 8 Sep 2025 15:26:48 +0530 Subject: [PATCH 2/3] Remove `numBins` from counts request - removes parameter `numBins` (bins are being calculated by the backend) - Changes to binning logic --- src/handlers/http/query.rs | 1 - src/prism/logstream/mod.rs | 1 - src/query/mod.rs | 42 +++++++++++++++++++++++--------------- 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index 82dd98753..b10da52b9 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -167,7 +167,6 @@ async fn handle_count_query( stream: table_name.to_string(), start_time: query_request.start_time.clone(), end_time: query_request.end_time.clone(), - num_bins: 1, conditions: None, }; let count_records = counts_req.get_bin_density().await?; diff --git a/src/prism/logstream/mod.rs b/src/prism/logstream/mod.rs index 8703f71d4..dbf854b9d 100644 --- a/src/prism/logstream/mod.rs +++ b/src/prism/logstream/mod.rs @@ -351,7 +351,6 @@ impl PrismDatasetRequest { stream: stream.to_owned(), start_time: "1h".to_owned(), end_time: "now".to_owned(), - num_bins: 10, conditions: None, }; diff --git a/src/query/mod.rs b/src/query/mod.rs index 0d3e28636..2f40c60d9 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -332,8 +332,6 @@ pub struct CountsRequest { pub start_time: String, /// Excluded end time for counts query pub end_time: String, - /// Number of bins to divide the time range into - pub num_bins: u64, /// Conditions pub conditions: Option, } @@ -402,9 +400,24 @@ impl CountsRequest { .signed_duration_since(time_range.start) .num_minutes() as u64; + // create number of bins based on total minutes + let num_bins = if total_minutes <= 60 * 5 { + // till 5 hours, 1 bin = 1 min + total_minutes + } else if total_minutes <= 60 * 24 { + // till 1 day, 1 bin = 5 min + total_minutes.div_ceil(5) + } else if total_minutes <= 60 * 24 * 10 { + // till 10 days, 1 bin = 1 hour + total_minutes.div_ceil(60) + } else { + // > 10 days, 1 bin = 1 day + total_minutes.div_ceil(1440) + }; + // divide minutes by num bins to get minutes per bin - let quotient = total_minutes / self.num_bins; - let remainder = total_minutes % self.num_bins; + let quotient = total_minutes / num_bins; + let remainder = total_minutes % num_bins; let have_remainder = remainder > 0; // now create multiple bounds [startTime, endTime) @@ -414,9 +427,9 @@ impl CountsRequest { let mut start = time_range.start; let loop_end = if have_remainder { - self.num_bins + num_bins } else { - self.num_bins - 1 + num_bins - 1 }; // Create bins for all but the last date @@ -454,26 +467,21 @@ impl CountsRequest { let table_name = &self.stream; let start_time_col_name = "_bin_start_time_"; let end_time_col_name = "_bin_end_time_"; - let date_bin = if dur.num_minutes() <= 60 { - // less than 1 hour = 1 min bin + let date_bin = if dur.num_minutes() <= 60 * 5 { + // less than 5 hour = 1 min bin format!( "CAST(DATE_BIN('1m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('1m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1m' as {end_time_col_name}" ) - } else if dur.num_minutes() > 60 && dur.num_minutes() < 60 * 24 { - // between 1 hour and 1 day = 5 min bin + } else if dur.num_minutes() <= 60 * 24 { + // 1 day = 5 min bin format!( "CAST(DATE_BIN('5m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('5m', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '5m' as {end_time_col_name}" ) - } else if dur.num_minutes() > 60 * 24 && dur.num_minutes() < 60 * 24 * 3 { - // between 1 day and 3 day = 1 hour bin + } else if dur.num_minutes() < 60 * 24 * 10 { + // 10 days = 1 hour bin format!( "CAST(DATE_BIN('1h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('1h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '1h' as {end_time_col_name}" ) - } else if dur.num_minutes() > 60 * 24 * 3 && dur.num_minutes() < 60 * 24 * 10 { - // between 3 day and 10 day = 4 hour bin - format!( - "CAST(DATE_BIN('4h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') AS TEXT) as {start_time_col_name}, DATE_BIN('4h', \"{table_name}\".\"{time_column}\", TIMESTAMP '1970-01-01 00:00:00+00') + INTERVAL '4h' as {end_time_col_name}" - ) } else { // 1 day format!( From d4737459d1612197188062d91cf5772dd939b49d Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 8 Sep 2025 16:55:38 +0530 Subject: [PATCH 3/3] make `numBins` optional instead --- src/handlers/http/query.rs | 1 + src/prism/logstream/mod.rs | 1 + src/query/mod.rs | 30 ++++++++++++++++++------------ 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/src/handlers/http/query.rs b/src/handlers/http/query.rs index b10da52b9..2049a110c 100644 --- a/src/handlers/http/query.rs +++ b/src/handlers/http/query.rs @@ -167,6 +167,7 @@ async fn handle_count_query( stream: table_name.to_string(), start_time: query_request.start_time.clone(), end_time: query_request.end_time.clone(), + num_bins: Some(1), conditions: None, }; let count_records = counts_req.get_bin_density().await?; diff --git a/src/prism/logstream/mod.rs b/src/prism/logstream/mod.rs index dbf854b9d..70bd9f7a4 100644 --- a/src/prism/logstream/mod.rs +++ b/src/prism/logstream/mod.rs @@ -351,6 +351,7 @@ impl PrismDatasetRequest { stream: stream.to_owned(), start_time: "1h".to_owned(), end_time: "now".to_owned(), + num_bins: Some(10), conditions: None, }; diff --git a/src/query/mod.rs b/src/query/mod.rs index 2f40c60d9..670bedf5e 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -332,6 +332,8 @@ pub struct CountsRequest { pub start_time: String, /// Excluded end time for counts query pub end_time: String, + /// optional number of bins to divide the time range into + pub num_bins: Option, /// Conditions pub conditions: Option, } @@ -400,19 +402,23 @@ impl CountsRequest { .signed_duration_since(time_range.start) .num_minutes() as u64; - // create number of bins based on total minutes - let num_bins = if total_minutes <= 60 * 5 { - // till 5 hours, 1 bin = 1 min - total_minutes - } else if total_minutes <= 60 * 24 { - // till 1 day, 1 bin = 5 min - total_minutes.div_ceil(5) - } else if total_minutes <= 60 * 24 * 10 { - // till 10 days, 1 bin = 1 hour - total_minutes.div_ceil(60) + let num_bins = if let Some(num_bins) = self.num_bins { + num_bins } else { - // > 10 days, 1 bin = 1 day - total_minutes.div_ceil(1440) + // create number of bins based on total minutes + if total_minutes <= 60 * 5 { + // till 5 hours, 1 bin = 1 min + total_minutes + } else if total_minutes <= 60 * 24 { + // till 1 day, 1 bin = 5 min + total_minutes.div_ceil(5) + } else if total_minutes <= 60 * 24 * 10 { + // till 10 days, 1 bin = 1 hour + total_minutes.div_ceil(60) + } else { + // > 10 days, 1 bin = 1 day + total_minutes.div_ceil(1440) + } }; // divide minutes by num bins to get minutes per bin