Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
95 commits
Select commit Hold shift + click to select a range
f34e016
Elasticsearch code.
May 31, 2024
70c4ce0
Adding basic responses for elasticsearch.
Jun 7, 2024
51a5fa8
Saving highlights.
Jun 11, 2024
52606f4
Saving code cleanup.
Jun 11, 2024
c489de4
Adding EsSearch.
Jun 11, 2024
f05bfb1
Saving partial progress.
Jun 12, 2024
b5667f2
Refinements.
Jun 12, 2024
e78bafb
Small bug fixes.
Jun 12, 2024
9f9abfe
Fixing alignment
Jun 12, 2024
545f7cb
Migrating es jobs naming to be specific.
Jun 12, 2024
ec4c0c6
Adding boilerplate for dataset es search
Jun 12, 2024
897f1be
Adding datasets.
Jun 12, 2024
38c8db6
Adding polish for more data.
Jun 12, 2024
6713e1d
Empty state and other small enhancements.
Jun 12, 2024
bb5ce2d
Adding arrow key functionality.
Jun 18, 2024
cffd9cc
Removing console log
Jun 18, 2024
6f62898
Spotless
Jun 18, 2024
4a5f1aa
Refinements to queries.
Jun 25, 2024
40bc497
Adding debounce.
Jun 26, 2024
19c76c8
Fixing alignment issues.
Jun 26, 2024
c7f5860
Saving updates for password setting via env config for elasticsearch.
Jun 26, 2024
fd37df9
Setting up startup scripts and adding corresponding waits.
Jun 26, 2024
0a4cdcc
Adding logs and more fields for jobs.
Jun 27, 2024
f9023fb
Resolving jackson serialization issue.
Jun 27, 2024
96ab8f8
Small updates for search display.
Jul 1, 2024
e828b2b
Adding onClick handlers.
Jul 3, 2024
5133c42
Fixing null cases, adding more search options for datasets.
phixMe Jul 6, 2024
9241307
Handling enter key.
phixMe Jul 6, 2024
259fea7
Fixing minor encoding and layout issues for spark related open lineag…
phixMe Jul 8, 2024
5440917
Additional fixes for text overflow on names and namespaces.
phixMe Jul 8, 2024
df43ab8
Fixing indexing problem.
phixMe Jul 8, 2024
b6f4691
Transitioning to opensearch.
phixMe Jul 9, 2024
82e00e2
Removing elasticsearch references.
phixMe Jul 9, 2024
37e80ed
Isolation of search code, calling services.
phixMe Jul 9, 2024
843baaf
Adding config to support multiple instances.
phixMe Jul 10, 2024
4faba34
Spotless
phixMe Jul 12, 2024
df0bc84
Adding helm files.
phixMe Jul 12, 2024
bf459ef
Adding in stronger password for search.
phixMe Jul 15, 2024
5d40b2e
Handling debouncing.
phixMe Jul 22, 2024
085a3ef
Adding "ADVANCED_SEARCH" configurable variable for web.
phixMe Jul 29, 2024
635c27b
Fixing some tests.
phixMe Aug 2, 2024
df446e7
Moving indexing down a row.
phixMe Aug 2, 2024
a8cee81
Spotless
phixMe Aug 2, 2024
986cd22
Putting back removed code.
phixMe Aug 2, 2024
fc1bc5d
Merge branch 'refs/heads/main' into feature/es-client
phixMe Aug 3, 2024
3db2bda
Merge spotless resolution.
phixMe Aug 3, 2024
22c0be4
Skipping over search for db migration tests.
phixMe Aug 3, 2024
cd3fa50
Adding search back to migration
phixMe Aug 3, 2024
06dadb3
Trying out ci config setting.
phixMe Aug 3, 2024
04d9546
Removing search from base config as a whole.
phixMe Aug 4, 2024
038cfc9
Pushing out header updates.
phixMe Aug 4, 2024
2767eda
Review comment on search service init.
phixMe Aug 5, 2024
ec4ab41
Fixing up dependencies in docker to apply migrations.
phixMe Aug 5, 2024
593d7b8
Code review updates and naming changes.
phixMe Aug 6, 2024
d13bb06
newline
phixMe Aug 6, 2024
d099b77
Updating for beta vs. non beta endpoints in search resource.
phixMe Aug 6, 2024
e8bfb3b
Moving search resource to its own place.
phixMe Aug 6, 2024
7608b58
Removing prints.
phixMe Aug 6, 2024
2c9cd6c
Removing all helm changes for this work stream.
phixMe Aug 6, 2024
3cb90b2
Adding back lock file contents.
phixMe Aug 6, 2024
247b79d
Merge branch 'main' into feature/es-client
phixMe Aug 6, 2024
4158d54
Adding header
phixMe Aug 6, 2024
e672bf1
Merge remote-tracking branch 'origin/feature/es-client' into feature/…
phixMe Aug 6, 2024
f05bd15
Adding middleware proxy.
phixMe Aug 9, 2024
61672cd
Code review updates.
phixMe Aug 9, 2024
fdeaaa1
Moving from outer gradle to api gradle.
phixMe Aug 9, 2024
f04e9b1
Merge branch 'refs/heads/main' into feature/es-client
phixMe Aug 9, 2024
dfd60e6
Removing extra containers.
phixMe Aug 10, 2024
d4649ff
Removing extra containers.
phixMe Aug 10, 2024
ac5102b
Resolve merge conflicts
wslulciuc Aug 10, 2024
b83254c
Set timeout for seed container to 60s
wslulciuc Aug 10, 2024
73726a7
Fixing `--no-search` and frontend config.
phixMe Aug 12, 2024
907d96e
Merge remote-tracking branch 'origin/feature/es-client' into feature/…
phixMe Aug 12, 2024
15c6697
Add check before indexing ol event
wslulciuc Aug 12, 2024
d47a6c4
feat: add search subproject
yanlibert Aug 13, 2024
2c2040c
feat: init search engine
yanlibert Aug 13, 2024
f6dd50b
feat: add healthcheck
yanlibert Aug 13, 2024
b4f6930
feat: replicate Opensearch API behavior
yanlibert Aug 13, 2024
ecf172a
feat: properly index and search datasets and jobs
yanlibert Aug 22, 2024
e906d37
chore: solve conflicts
yanlibert Sep 3, 2024
e9609a4
feat: working search response
yanlibert Sep 4, 2024
dc77ead
feat: changing the analyzer to an NGramAnalyzer
yanlibert Sep 4, 2024
c506eee
WIP: load event at startup
yanlibert Sep 4, 2024
61631bb
WIP: dedup event at startup
yanlibert Sep 5, 2024
4fd9535
WIP: remove log
yanlibert Sep 5, 2024
dda476e
WIP: batch load event from lineage_events
yanlibert Sep 5, 2024
c7921f5
WIP: reduce memory footprint by removing duplicates
yanlibert Sep 6, 2024
f37d6b5
WIP: remove debug log
yanlibert Sep 6, 2024
00455d6
WIP: thread safe index writing
yanlibert Sep 6, 2024
b8589bc
WIP: load init events in background
yanlibert Sep 6, 2024
17c0d85
WIP: remove the ERROR StatusLogger due to unnecessary opensearch deps
yanlibert Sep 7, 2024
bf3808a
WIP: spotlessApply
yanlibert Sep 7, 2024
7422cbd
WIP: tidy up
yanlibert Sep 7, 2024
52a088d
WIP: add a banner
yanlibert Sep 7, 2024
5689a67
Merge branch 'MarquezProject:main' into feature/lucene-search-engine
yanlibert Sep 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ subprojects {
archivesBaseName = 'marquez-api'
}

project(':search') {
apply plugin: 'application'
archivesBaseName = 'marquez-search'
}

project(':clients:java') {
apply plugin: 'java-library'
archivesBaseName = 'marquez-java'
Expand Down
2 changes: 0 additions & 2 deletions docker-compose.seed.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,6 @@ services:
- ./docker/wait-for-it.sh:/usr/src/app/wait-for-it.sh
- ./docker/seed.sh:/usr/src/app/seed.sh
- ./docker/metadata.json:/usr/src/app/metadata.json
links:
- "db:postgres"
depends_on:
- api
entrypoint: ["./wait-for-it.sh", "api:${API_PORT}", "--timeout=120", "--", "./seed.sh"]
2 changes: 0 additions & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ services:
- "${API_ADMIN_PORT}:${API_ADMIN_PORT}"
volumes:
- data:/opt/marquez
links:
- "db:postgres"
depends_on:
- db
entrypoint: ["/opt/marquez/wait-for-it.sh", "db:${POSTGRES_PORT}", "--", "./entrypoint.sh"]
Expand Down
50 changes: 50 additions & 0 deletions search/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import com.github.jengelman.gradle.plugins.shadow.transformers.ServiceFileTransformer

ext {
luceneVersion = '9.11.1'
}

dependencies {
implementation project(':api')

implementation "org.apache.lucene:lucene-core:${luceneVersion}"
implementation "org.apache.lucene:lucene-queryparser:${luceneVersion}"
implementation "org.apache.lucene:lucene-analysis-common:${luceneVersion}"
implementation "org.apache.lucene:lucene-highlighter:${luceneVersion}"
implementation 'org.jdbi:jdbi3-core:3.45.4'
implementation 'org.jdbi:jdbi3-sqlobject:3.45.4'
implementation "org.slf4j:slf4j-api:${slf4jVersion}"


implementation "io.dropwizard:dropwizard-core:${dropwizardVersion}"
}

application {
mainClassName = 'marquez.searchengine.SearchApplication'
}

runShadow {
args = ['server', 'search.yml']
}

shadowJar {
archiveClassifier.set('')
transform(ServiceFileTransformer)
from(projectDir) {
include 'LICENSE'
}
manifest {
attributes(
'Created-By': "Gradle ${gradle.gradleVersion}",
'Built-By': System.getProperty('user.name'),
'Build-Jdk': System.getProperty('java.version'),
'Implementation-Title': project.name,
'Implementation-Version': project.version,
'Main-Class': application.mainClass)
}
}

tasks.distZip.dependsOn tasks.shadowJar
tasks.distTar.dependsOn tasks.shadowJar
tasks.startScripts.dependsOn tasks.shadowJar
tasks.shadowJar.dependsOn tasks.jar
7 changes: 7 additions & 0 deletions search/search.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
server:
applicationConnectors:
- type: http
port: 9000
adminConnectors:
- type: http
port: 9001
36 changes: 36 additions & 0 deletions search/src/main/java/marquez/searchengine/SearchApplication.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package marquez.searchengine;

import io.dropwizard.Application;
import io.dropwizard.setup.Bootstrap;
import io.dropwizard.setup.Environment;
import java.io.IOException;
import lombok.extern.slf4j.Slf4j;
import marquez.searchengine.db.DatabaseConnection;
import marquez.searchengine.health.SearchHealthCheck;
import marquez.searchengine.resources.SearchResource;
import org.jdbi.v3.core.Jdbi;

@Slf4j
public class SearchApplication extends Application<SearchConfig> {

public static void main(String[] args) throws Exception {
new SearchApplication().run(args);
}

@Override
public String getName() {
return "search-service";
}

@Override
public void initialize(Bootstrap<SearchConfig> bootstrap) {}

@Override
public void run(SearchConfig configuration, Environment environment) throws IOException {
log.info("Application starting...");
Jdbi jdbi = DatabaseConnection.initializeJdbi();
final SearchResource searchResource = new SearchResource(jdbi);
environment.jersey().register(searchResource);
environment.healthChecks().register("search-health-check", new SearchHealthCheck());
}
}
13 changes: 13 additions & 0 deletions search/src/main/java/marquez/searchengine/SearchConfig.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package marquez.searchengine;

import com.fasterxml.jackson.annotation.JsonProperty;
import io.dropwizard.Configuration;

public class SearchConfig extends Configuration {

@JsonProperty private boolean enabled = true;

public boolean isEnabled() {
return enabled;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
package marquez.searchengine.db;

import org.jdbi.v3.core.Jdbi;

public class DatabaseConnection {

public static Jdbi initializeJdbi() {
String jdbcUrl = "jdbc:postgresql://localhost:5432/marquez";
String username = "marquez";
String password = "marquez";

return Jdbi.create(jdbcUrl, username, password);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package marquez.searchengine.health;

import com.codahale.metrics.health.HealthCheck;

public class SearchHealthCheck extends HealthCheck {

@Override
protected Result check() throws Exception {
return Result.healthy();
}
}
105 changes: 105 additions & 0 deletions search/src/main/java/marquez/searchengine/models/IndexResponse.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package marquez.searchengine.models;

import com.fasterxml.jackson.annotation.JsonProperty;

public class IndexResponse {

@JsonProperty("_index")
private final String index;

@JsonProperty("_id")
private final String id;

@JsonProperty("_version")
private final long version;

@JsonProperty("result")
private final String result;

@JsonProperty("_shards")
private final ShardInfo shardInfo;

@JsonProperty("_seq_no")
private final long seqNo;

@JsonProperty("_primary_term")
private final long primaryTerm;

// Constructor to initialize all final fields
public IndexResponse(
String index,
String id,
long version,
String result,
ShardInfo shardInfo,
long seqNo,
long primaryTerm) {
this.index = index;
this.id = id;
this.version = version;
this.result = result;
this.shardInfo = shardInfo;
this.seqNo = seqNo;
this.primaryTerm = primaryTerm;
}

// Getters
public String getIndex() {
return index;
}

public String getId() {
return id;
}

public long getVersion() {
return version;
}

public String getResult() {
return result;
}

public ShardInfo getShardInfo() {
return shardInfo;
}

public long getSeqNo() {
return seqNo;
}

public long getPrimaryTerm() {
return primaryTerm;
}

// ShardInfo inner class
public static class ShardInfo {
@JsonProperty("total")
private final int total;

@JsonProperty("successful")
private final int successful;

@JsonProperty("failed")
private final int failed;

public ShardInfo(int total, int successful, int failed) {
this.total = total;
this.successful = successful;
this.failed = failed;
}

// Getters for ShardInfo
public int getTotal() {
return total;
}

public int getSuccessful() {
return successful;
}

public int getFailed() {
return failed;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package marquez.searchengine.models;

import java.util.List;
import java.util.Map;

public class SearchRequest {
private Highlight highlight;
private Query query;

public static class Highlight {
private Map<String, Map<String, String>> fields;

// Getters and setters
public Map<String, Map<String, String>> getFields() {
return fields;
}

public void setFields(Map<String, Map<String, String>> fields) {
this.fields = fields;
}
}

public static class Query {
private MultiMatch multi_match;

public static class MultiMatch {
private List<String> fields;
private String operator;
private String query;
private String type;

// Getters and setters
public List<String> getFields() {
return fields;
}

public void setFields(List<String> fields) {
this.fields = fields;
}

public String getOperator() {
return operator;
}

public void setOperator(String operator) {
this.operator = operator;
}

public String getQuery() {
return query;
}

public void setQuery(String query) {
this.query = query;
}

public String getType() {
return type;
}

public void setType(String type) {
this.type = type;
}
}

// Getters and setters
public MultiMatch getMulti_match() {
return multi_match;
}

public void setMulti_match(MultiMatch multi_match) {
this.multi_match = multi_match;
}
}

// Getters and setters for SearchRequest
public Highlight getHighlight() {
return highlight;
}

public void setHighlight(Highlight highlight) {
this.highlight = highlight;
}

public Query getQuery() {
return query;
}

public void setQuery(Query query) {
this.query = query;
}
}
Loading