Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pom-dependency-tree.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ai.elimu:webapp:war:2.6.25-SNAPSHOT
ai.elimu:webapp:war:2.6.27-SNAPSHOT
+- ai.elimu:model:jar:model-2.0.97:compile
| \- com.google.code.gson:gson:jar:2.13.0:compile
| \- com.google.errorprone:error_prone_annotations:jar:2.37.0:compile
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/ai/elimu/dao/WordAssessmentEventDao.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package ai.elimu.dao;

import java.util.Calendar;
import java.util.List;

import org.springframework.dao.DataAccessException;

import ai.elimu.entity.analytics.WordAssessmentEvent;

public interface WordAssessmentEventDao extends GenericDao<WordAssessmentEvent> {

WordAssessmentEvent read(Calendar timestamp, String androidId, String packageName) throws DataAccessException;

List<WordAssessmentEvent> readAll(String androidId) throws DataAccessException;
}
43 changes: 43 additions & 0 deletions src/main/java/ai/elimu/dao/jpa/WordAssessmentEventDaoJpa.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
package ai.elimu.dao.jpa;

import java.util.Calendar;
import java.util.List;

import org.springframework.dao.DataAccessException;

import ai.elimu.dao.WordAssessmentEventDao;
import ai.elimu.entity.analytics.WordAssessmentEvent;
import jakarta.persistence.NoResultException;

public class WordAssessmentEventDaoJpa extends GenericDaoJpa<WordAssessmentEvent> implements WordAssessmentEventDao {

@Override
public WordAssessmentEvent read(Calendar timestamp, String androidId, String packageName) throws DataAccessException {
try {
return (WordAssessmentEvent) em.createQuery(
"SELECT event " +
"FROM WordAssessmentEvent event " +
"WHERE event.timestamp = :timestamp " +
"AND event.androidId = :androidId " +
"AND event.packageName = :packageName")
.setParameter("timestamp", timestamp)
.setParameter("androidId", androidId)
.setParameter("packageName", packageName)
.getSingleResult();
} catch (NoResultException e) {
logger.info("WordAssessmentEvent (" + timestamp.getTimeInMillis() + ", " + androidId + ", \"" + packageName + "\") was not found");
return null;
}
}

@Override
public List<WordAssessmentEvent> readAll(String androidId) throws DataAccessException {
return em.createQuery(
"SELECT event " +
"FROM WordAssessmentEvent event " +
"WHERE event.androidId = :androidId " +
"ORDER BY event.id")
.setParameter("androidId", androidId)
.getResultList();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package ai.elimu.entity.analytics;

import jakarta.persistence.Entity;
import lombok.Getter;
import lombok.Setter;

@Getter
@Setter
@Entity
public class WordAssessmentEvent extends AssessmentEvent {

/**
* The word text. E.g. <code>"star"</code>.
*/
private String wordText;

/**
* This field might not be included, e.g. if the assessment task was done in a
* 3rd-party app that did not load the content from the elimu.ai Content Provider.
* In this case, the {@link #wordId} will be {@code null}.
*/
private Long wordId;

/**
* A value in the range [0.0, 1.0].
*/
private Float masteryScore;

/**
* The number of milliseconds passed between the student opening the assessment task
* and submitting a response. E.g. <code>15000</code>.
*/
private Long timeSpentMs;
}
Original file line number Diff line number Diff line change
@@ -1,22 +1,12 @@
package ai.elimu.rest.v2.analytics;

import ai.elimu.dao.ApplicationDao;
import ai.elimu.dao.WordDao;
import ai.elimu.model.v2.enums.Language;
import ai.elimu.util.AnalyticsHelper;
import ai.elimu.util.ConfigHelper;
import ai.elimu.util.DiscordHelper;
import jakarta.servlet.http.HttpServletResponse;
import java.io.File;
import java.io.Reader;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import lombok.RequiredArgsConstructor;
import jakarta.servlet.http.HttpServletResponse;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;

import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.springframework.http.HttpStatus;
Expand All @@ -27,100 +17,75 @@
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.multipart.MultipartFile;

/**
* REST API endpoint for receiving word assessment events from the
* <a href="https://github.com/elimu-ai/analytics">Analytics</a> application.
*/
@RestController
@RequestMapping(value = "/rest/v2/analytics/word-assessment-events/csv", produces = MediaType.APPLICATION_JSON_UTF8_VALUE)
@RequiredArgsConstructor
@RequestMapping(value = "/rest/v2/analytics/word-assessment-events/csv", produces = MediaType.APPLICATION_JSON_VALUE)
@Slf4j
public class WordAssessmentEventsRestController {

// @Autowired
// private WordAssessmentEventDao wordAssessmentEventDao;

private final ApplicationDao applicationDao;

private final WordDao wordDao;

@PostMapping
public String handleUploadCsvRequest(
@RequestParam("file") MultipartFile multipartFile,
HttpServletResponse response
) {
log.info("handleUploadCsvRequest");

String name = multipartFile.getName();
log.info("name: " + name);

// Expected format: "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv"
String originalFilename = multipartFile.getOriginalFilename();
log.info("originalFilename: " + originalFilename);

// TODO: Send notification to the #📊-data-collection channel in Discord
// Hide parts of the Android ID, e.g. "7161***51cd_3001012_word-learning-events_2020-04-23.csv"
String anonymizedOriginalFilename = originalFilename.substring(0, 4) + "***" + originalFilename.substring(12);
DiscordHelper.sendChannelMessage("Received dataset: `" + anonymizedOriginalFilename + "`", null, null, null, null);

String androidIdExtractedFromFilename = AnalyticsHelper.extractAndroidIdFromCsvFilename(originalFilename);
log.info("androidIdExtractedFromFilename: \"" + androidIdExtractedFromFilename + "\"");

Integer versionCodeExtractedFromFilename = AnalyticsHelper.extractVersionCodeFromCsvFilename(originalFilename);
log.info("versionCodeExtractedFromFilename: " + versionCodeExtractedFromFilename);

String contentType = multipartFile.getContentType();
log.info("contentType: " + contentType);

JSONObject jsonObject = new JSONObject();

try {
byte[] bytes = multipartFile.getBytes();
log.info("bytes.length: " + bytes.length);

// Store a backup of the original CSV file on the filesystem (in case it will be needed for debugging)
File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
File analyticsDir = new File(languageDir, "analytics");
File androidIdDir = new File(analyticsDir, "android-id-" + androidIdExtractedFromFilename);
File versionCodeDir = new File(androidIdDir, "version-code-" + versionCodeExtractedFromFilename);
File wordAssessmentEventsDir = new File(versionCodeDir, "word-assessment-events");
wordAssessmentEventsDir.mkdirs();
File csvFile = new File(wordAssessmentEventsDir, originalFilename);
log.info("Storing CSV file at " + csvFile);
FileUtils.writeByteArrayToFile(csvFile, bytes);
log.info("csvFile.exists(): " + csvFile.exists());

// Iterate each row in the CSV file
Path csvFilePath = Paths.get(csvFile.toURI());
log.info("csvFilePath: " + csvFilePath);
Reader reader = Files.newBufferedReader(csvFilePath);
CSVFormat csvFormat = CSVFormat.DEFAULT
.withHeader(
"id", // The Room database ID
"time",
"android_id",
"package_name",
"word_id",
"word_text",
"mastery_score",
"time_spent_ms"
)
.withSkipHeaderRecord();
CSVParser csvParser = new CSVParser(reader, csvFormat);
for (CSVRecord csvRecord : csvParser) {
log.info("csvRecord: " + csvRecord);

// Convert from CSV to Java

// TODO
}
} catch (Exception ex) {
log.error(ex.getMessage());

jsonObject.put("result", "error");
jsonObject.put("errorMessage", ex.getMessage());
response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());

@PostMapping
public String handleUploadCsvRequest(
@RequestParam("file") MultipartFile multipartFile,
HttpServletResponse response
) {
log.info("handleUploadCsvRequest");

JSONObject jsonResponseObject = new JSONObject();
try {
String contentType = multipartFile.getContentType();
log.info("contentType: " + contentType);

long size = multipartFile.getSize();
log.info("size: " + size);
if (size == 0) {
throw new IllegalArgumentException("Empty file");
}

// Expected format: "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv"
String originalFilename = multipartFile.getOriginalFilename();
log.info("originalFilename: " + originalFilename);
if (originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
throw new IllegalArgumentException("Unexpected filename");
}
Comment on lines +47 to +52
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Strengthen filename validation to prevent security vulnerabilities.

The current validation only checks filename length but doesn't validate the actual format pattern. This could allow malicious filenames that match the length but contain path traversal characters or other harmful content.

-            if (originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
-                throw new IllegalArgumentException("Unexpected filename");
-            }
+            // Validate filename is not null and matches expected pattern
+            if (originalFilename == null || originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
+                throw new IllegalArgumentException("Unexpected filename");
+            }
+            
+            // Validate filename pattern to prevent path traversal and ensure expected format
+            String expectedPattern = "^[a-f0-9]{16}_\\d{7}_word-assessment-events_\\d{4}-\\d{2}-\\d{2}\\.csv$";
+            if (!originalFilename.matches(expectedPattern)) {
+                throw new IllegalArgumentException("Filename does not match expected pattern");
+            }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// Expected format: "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv"
String originalFilename = multipartFile.getOriginalFilename();
log.info("originalFilename: " + originalFilename);
if (originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
throw new IllegalArgumentException("Unexpected filename");
}
// Expected format: "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv"
String originalFilename = multipartFile.getOriginalFilename();
log.info("originalFilename: " + originalFilename);
- if (originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
- throw new IllegalArgumentException("Unexpected filename");
- }
+ // Validate filename is not null and matches expected length
+ if (originalFilename == null || originalFilename.length() != "7161a85a0e4751cd_3001012_word-assessment-events_2020-04-23.csv".length()) {
+ throw new IllegalArgumentException("Unexpected filename");
+ }
+
+ // Validate filename pattern to prevent path traversal and ensure expected format
+ String expectedPattern = "^[a-f0-9]{16}_\\d{7}_word-assessment-events_\\d{4}-\\d{2}-\\d{2}\\.csv$";
+ if (!originalFilename.matches(expectedPattern)) {
+ throw new IllegalArgumentException("Filename does not match expected pattern");
+ }
🤖 Prompt for AI Agents
In
src/main/java/ai/elimu/rest/v2/analytics/WordAssessmentEventsRestController.java
around lines 47 to 52, the filename validation only checks the length, which is
insufficient and may allow malicious filenames. Replace the length check with a
strict pattern validation using a regular expression that matches the expected
filename format, ensuring it excludes path traversal characters and other
invalid content. This will strengthen security by validating the filename
structure explicitly.


String androidIdExtractedFromFilename = AnalyticsHelper.extractAndroidIdFromCsvFilename(originalFilename);
log.info("androidIdExtractedFromFilename: \"" + androidIdExtractedFromFilename + "\"");

Integer versionCodeExtractedFromFilename = AnalyticsHelper.extractVersionCodeFromCsvFilename(originalFilename);
log.info("versionCodeExtractedFromFilename: " + versionCodeExtractedFromFilename);
Comment on lines +54 to +58
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Validate extracted metadata to prevent injection attacks.

The extracted Android ID and version code should be validated before using them in file paths to prevent directory traversal attacks.

             String androidIdExtractedFromFilename = AnalyticsHelper.extractAndroidIdFromCsvFilename(originalFilename);
+            // Validate Android ID format (should be 16 hex characters)
+            if (!androidIdExtractedFromFilename.matches("^[a-f0-9]{16}$")) {
+                throw new IllegalArgumentException("Invalid Android ID format");
+            }
             log.info("androidIdExtractedFromFilename: \"" + androidIdExtractedFromFilename + "\"");
             
             Integer versionCodeExtractedFromFilename = AnalyticsHelper.extractVersionCodeFromCsvFilename(originalFilename);
+            // Validate version code is positive
+            if (versionCodeExtractedFromFilename <= 0) {
+                throw new IllegalArgumentException("Invalid version code");
+            }
             log.info("versionCodeExtractedFromFilename: " + versionCodeExtractedFromFilename);
🤖 Prompt for AI Agents
In
src/main/java/ai/elimu/rest/v2/analytics/WordAssessmentEventsRestController.java
around lines 54 to 58, the extracted Android ID and version code from the
filename are used without validation, which risks directory traversal or
injection attacks. Add validation checks to ensure the Android ID contains only
allowed characters (e.g., alphanumeric) and the version code is a valid integer
within expected bounds before using them in file paths. Reject or sanitize any
invalid values to prevent malicious input from affecting file system operations.


byte[] bytes = multipartFile.getBytes();
log.info("bytes.length: " + bytes.length);

// Store the original CSV file on the filesystem
File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
File analyticsDir = new File(languageDir, "analytics");
File androidIdDir = new File(analyticsDir, "android-id-" + androidIdExtractedFromFilename);
File versionCodeDir = new File(androidIdDir, "version-code-" + versionCodeExtractedFromFilename);
File wordAssessmentEventsDir = new File(versionCodeDir, "word-assessment-events");
wordAssessmentEventsDir.mkdirs();
File csvFile = new File(wordAssessmentEventsDir, originalFilename);
log.info("Storing CSV file at " + csvFile);
FileUtils.writeByteArrayToFile(csvFile, bytes);
log.info("csvFile.exists(): " + csvFile.exists());
Comment on lines +64 to +74
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve directory creation error handling and add path sanitization.

The mkdirs() method can fail silently, and the file path construction should include additional validation to prevent issues.

             // Store the original CSV file on the filesystem
             File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
             File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
             File analyticsDir = new File(languageDir, "analytics");
             File androidIdDir = new File(analyticsDir, "android-id-" + androidIdExtractedFromFilename);
             File versionCodeDir = new File(androidIdDir, "version-code-" + versionCodeExtractedFromFilename);
             File wordAssessmentEventsDir = new File(versionCodeDir, "word-assessment-events");
-            wordAssessmentEventsDir.mkdirs();
+            
+            // Ensure directory creation succeeds
+            if (!wordAssessmentEventsDir.exists() && !wordAssessmentEventsDir.mkdirs()) {
+                throw new RuntimeException("Failed to create directory: " + wordAssessmentEventsDir.getAbsolutePath());
+            }
+            
             File csvFile = new File(wordAssessmentEventsDir, originalFilename);
+            
+            // Verify the final path is within expected directory (prevent path traversal)
+            if (!csvFile.getCanonicalPath().startsWith(wordAssessmentEventsDir.getCanonicalPath())) {
+                throw new SecurityException("Invalid file path detected");
+            }
+            
             log.info("Storing CSV file at " + csvFile);
             FileUtils.writeByteArrayToFile(csvFile, bytes);
             log.info("csvFile.exists(): " + csvFile.exists());
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
File analyticsDir = new File(languageDir, "analytics");
File androidIdDir = new File(analyticsDir, "android-id-" + androidIdExtractedFromFilename);
File versionCodeDir = new File(androidIdDir, "version-code-" + versionCodeExtractedFromFilename);
File wordAssessmentEventsDir = new File(versionCodeDir, "word-assessment-events");
wordAssessmentEventsDir.mkdirs();
File csvFile = new File(wordAssessmentEventsDir, originalFilename);
log.info("Storing CSV file at " + csvFile);
FileUtils.writeByteArrayToFile(csvFile, bytes);
log.info("csvFile.exists(): " + csvFile.exists());
// Store the original CSV file on the filesystem
File elimuAiDir = new File(System.getProperty("user.home"), ".elimu-ai");
File languageDir = new File(elimuAiDir, "lang-" + Language.valueOf(ConfigHelper.getProperty("content.language")));
File analyticsDir = new File(languageDir, "analytics");
File androidIdDir = new File(analyticsDir, "android-id-" + androidIdExtractedFromFilename);
File versionCodeDir = new File(androidIdDir, "version-code-" + versionCodeExtractedFromFilename);
File wordAssessmentEventsDir = new File(versionCodeDir, "word-assessment-events");
// Ensure directory creation succeeds
if (!wordAssessmentEventsDir.exists() && !wordAssessmentEventsDir.mkdirs()) {
throw new RuntimeException("Failed to create directory: " + wordAssessmentEventsDir.getAbsolutePath());
}
File csvFile = new File(wordAssessmentEventsDir, originalFilename);
// Verify the final path is within expected directory (prevent path traversal)
if (!csvFile.getCanonicalPath().startsWith(wordAssessmentEventsDir.getCanonicalPath())) {
throw new SecurityException("Invalid file path detected");
}
log.info("Storing CSV file at " + csvFile);
FileUtils.writeByteArrayToFile(csvFile, bytes);
log.info("csvFile.exists(): " + csvFile.exists());
🤖 Prompt for AI Agents
In
src/main/java/ai/elimu/rest/v2/analytics/WordAssessmentEventsRestController.java
around lines 64 to 74, the call to mkdirs() for creating directories does not
check if the operation succeeded, which can cause silent failures. Modify the
code to verify the return value of mkdirs() and log or handle errors if
directory creation fails. Additionally, sanitize and validate the components
used in constructing file paths, such as language,
androidIdExtractedFromFilename, versionCodeExtractedFromFilename, and
originalFilename, to prevent invalid or malicious path inputs.


jsonResponseObject.put("result", "success");
jsonResponseObject.put("successMessage", "The CSV file was uploaded");
response.setStatus(HttpStatus.OK.value());
} catch (Exception ex) {
log.error(ex.getMessage());

jsonResponseObject.put("result", "error");
jsonResponseObject.put("errorMessage", ex.getMessage());
response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());
Comment on lines +79 to +84
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Improve exception handling specificity.

The broad catch (Exception ex) clause may mask important error details and security exceptions should be handled differently from validation errors.

-        } catch (Exception ex) {
-            log.error(ex.getMessage());
+        } catch (IllegalArgumentException ex) {
+            log.warn("Validation error: " + ex.getMessage());
+            
+            jsonResponseObject.put("result", "error");
+            jsonResponseObject.put("errorMessage", ex.getMessage());
+            response.setStatus(HttpStatus.BAD_REQUEST.value());
+        } catch (SecurityException ex) {
+            log.error("Security violation: " + ex.getMessage());
+            
+            jsonResponseObject.put("result", "error");
+            jsonResponseObject.put("errorMessage", "Invalid request");
+            response.setStatus(HttpStatus.BAD_REQUEST.value());
+        } catch (Exception ex) {
+            log.error("Unexpected error: " + ex.getMessage(), ex);
             
             jsonResponseObject.put("result", "error");
-            jsonResponseObject.put("errorMessage", ex.getMessage());
+            jsonResponseObject.put("errorMessage", "Internal server error");
             response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
} catch (Exception ex) {
log.error(ex.getMessage());
jsonResponseObject.put("result", "error");
jsonResponseObject.put("errorMessage", ex.getMessage());
response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());
} catch (IllegalArgumentException ex) {
log.warn("Validation error: " + ex.getMessage());
jsonResponseObject.put("result", "error");
jsonResponseObject.put("errorMessage", ex.getMessage());
response.setStatus(HttpStatus.BAD_REQUEST.value());
} catch (SecurityException ex) {
log.error("Security violation: " + ex.getMessage());
jsonResponseObject.put("result", "error");
jsonResponseObject.put("errorMessage", "Invalid request");
response.setStatus(HttpStatus.BAD_REQUEST.value());
} catch (Exception ex) {
log.error("Unexpected error: " + ex.getMessage(), ex);
jsonResponseObject.put("result", "error");
jsonResponseObject.put("errorMessage", "Internal server error");
response.setStatus(HttpStatus.INTERNAL_SERVER_ERROR.value());
🤖 Prompt for AI Agents
In
src/main/java/ai/elimu/rest/v2/analytics/WordAssessmentEventsRestController.java
between lines 79 and 84, replace the broad catch (Exception ex) with more
specific exception handling. Identify and catch distinct exceptions such as
validation exceptions and security-related exceptions separately, handling each
appropriately. This will improve error clarity and allow different responses or
logging for different error types instead of a generic catch-all.

}

String jsonResponse = jsonResponseObject.toString();
log.info("jsonResponse: " + jsonResponse);
return jsonResponse;
}

String jsonResponse = jsonObject.toString();
log.info("jsonResponse: " + jsonResponse);
return jsonResponse;
}
}
Loading
Loading