diff --git a/Makefile b/Makefile index 909c06614..d546bbc96 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ NAME="github.com/odpf/meteor" VERSION=$(shell git describe --always --tags 2>/dev/null) COVERFILE="/tmp/app.coverprofile" -PROTON_COMMIT := "2d2177aa02ee885bae094af283ff79a1d800791a" +PROTON_COMMIT := "5267e1fdf3abc8d9a06938290e202efdd060f665" .PHONY: all build clean test all: build diff --git a/agent/agent.go b/agent/agent.go index 6102d87c0..caae27ac0 100644 --- a/agent/agent.go +++ b/agent/agent.go @@ -3,6 +3,7 @@ package agent import ( "context" "fmt" + "runtime/debug" "sync" "time" @@ -170,8 +171,10 @@ func (r *Agent) Run(ctx context.Context, recipe recipe.Recipe) (run Run) { // while stream is listening via stream.Listen(). go func() { defer func() { - if r := recover(); r != nil { - run.Error = fmt.Errorf("agent run: close stream: panic: %s", r) + if rcvr := recover(); rcvr != nil { + r.logger.Error("panic recovered") + r.logger.Info(string(debug.Stack())) + run.Error = fmt.Errorf("agent run: close stream: panic: %s", rcvr) } stream.Close() }() @@ -280,8 +283,8 @@ func (r *Agent) setupSink(ctx context.Context, sr recipe.PluginRecipe, stream *s return err }, defaultBatchSize) - //TODO: the sink closes even though some records remain unpublished - //TODO: once fixed, file sink's Close needs to close *File + // TODO: the sink closes even though some records remain unpublished + // TODO: once fixed, file sink's Close needs to close *File stream.onClose(func() { if err = sink.Close(); err != nil { r.logger.Warn("error closing sink", "sink", sr.Name, "error", err) diff --git a/agent/retrier.go b/agent/retrier.go index 84a246b9a..16c047e99 100644 --- a/agent/retrier.go +++ b/agent/retrier.go @@ -40,7 +40,7 @@ func (r *retrier) retry(ctx context.Context, operation func() error, notify func return err } // if err is RetryError, returns err directly to retry - if errors.Is(err, plugins.RetryError{}) { + if errors.As(err, &plugins.RetryError{}) { return err } // if err is not RetryError, wraps error to prevent retrying diff --git a/docs/docs/reference/extractors.md b/docs/docs/reference/extractors.md index 2d50adbe9..8d49b226a 100644 --- a/docs/docs/reference/extractors.md +++ b/docs/docs/reference/extractors.md @@ -64,11 +64,17 @@ Meteor currently supports metadata extraction on these data sources. To perform |:------------------------------------|:----------|:----------|:------------|:-------| | [`caramlstore`][caramlstore-readme] | ✗ | ✅ | ✗ | ✅ | -### Service +### Application -| Type | Ownership | Upstreams | Downstreams | Custom | -|:-------------------------------|:----------|:----------|:------------|:-------| -| [`service_yaml`][service-yaml] | ✅ | ✅ | ✅ | ✅ | ✅ | +| Type | Ownership | Upstreams | Downstreams | Custom | +|:----------------------------------------------|:----------|:----------|:------------|:-------| +| [`application_yaml`][application-yaml-readme] | ✅ | ✅ | ✅ | ✅ | ✅ | + +### Machine Learning Model + +| Type | Ownership | Upstreams | Downstreams | Custom | +|:--------------------------|:----------|:----------|:------------|:-------| +| [`merlin`][merlin-readme] | ✅ | ✅ | ✗ | ✅ | ✅ | @@ -96,4 +102,5 @@ Meteor currently supports metadata extraction on these data sources. To perform [gcs-readme]: https://github.com/odpf/meteor/tree/main/plugins/extractors/gcs/README.md [optimus-readme]: https://github.com/odpf/meteor/tree/main/plugins/extractors/optimus/README.md [caramlstore-readme]: https://github.com/odpf/meteor/tree/main/plugins/extractors/caramlstore/README.md -[service-yaml]: https://github.com/odpf/meteor/tree/main/plugins/extractors/service_yaml/README.md +[application-yaml-readme]: https://github.com/odpf/meteor/tree/main/plugins/extractors/application_yaml/README.md +[merlin-readme]: https://github.com/odpf/meteor/tree/main/plugins/extractors/merlin/README.md diff --git a/docs/docs/reference/metadata_models.md b/docs/docs/reference/metadata_models.md index 0e246ad1e..4a894d206 100644 --- a/docs/docs/reference/metadata_models.md +++ b/docs/docs/reference/metadata_models.md @@ -1,45 +1,79 @@ # Meteor Metadata Model -We have a set of defined metadata models which define the structure of metadata that meteor will yield. -To visit the metadata models being used by different extractors please visit [here](../reference/extractors.md). -We are currently using the following metadata models: +We have a set of defined metadata models which define the structure of metadata +that meteor will yield. To visit the metadata models being used by different +extractors please visit [here](extractors.md). We are currently using the +following metadata models: -- [Bucket](https://github.com/odpf/proton/blob/main/odpf/assets/bucket.proto): - Used for metadata being extracted from buckets. Buckets are the basic containers in google cloud services, or Amazon S3, etc that are used fot data storage, and quite popular because of their features of access management, aggregation of usage and services and ease of configurations. - Currently, Meteor provides a metadata extractor for the buckets mentioned [here](../reference/extractors.md) +- [Bucket][proton-bucket]: Used for metadata being extracted from buckets. + Buckets are the basic containers in google cloud services, or Amazon S3, etc + that are used fot data storage, and quite popular because of their features of + access management, aggregation of usage and services and ease of + configurations. Currently, Meteor provides a metadata extractor for the + buckets mentioned [here](extractors.md#bucket) -- [Dashboard](https://github.com/odpf/proton/blob/main/odpf/assets/dashboard.proto): - Dashboards are an essential part of data analysis and are used to track, analyze and visualize. - These Dashboard metadata model includes some basic fields like `urn` and `source`, etc and a list of `Chart`. - There are multiple dashboards that are essential for Data Analysis such as metabase, grafana, tableau, etc. - Please refer to the list of Dashboards meteor currently supports [here](../reference/extractors.md). +- [Dashboard][proton-dashboard]: Dashboards are an essential part of data + analysis and are used to track, analyze and visualize. These Dashboard + metadata model includes some basic fields like `urn` and `source`, etc and a + list of `Chart`. There are multiple dashboards that are essential for Data + Analysis such as metabase, grafana, tableau, etc. Please refer to the list of + 'Dashboard' extractors meteor currently + supports [here](extractors.md#dashboard). -- [Chart](https://github.com/odpf/proton/blob/main/odpf/assets/chart.proto): - Charts are included in all the Dashboard and are the result of certain queries in a Dashboard. - Information about them includes the information of the query and few similar details. + - [Chart][proton-dashboard]: Charts are included in all the Dashboard and are + the result of certain queries in a Dashboard. Information about them + includes the information of the query and few similar details. -- [User](https://github.com/odpf/proton/blob/main/odpf/assets/user.proto): - This metadata model is used for defining the output of extraction on Users accounts. - Some of these sources can be GitHub, Workday, Google Suite, LDAP. - Please refer to the list of user meteor currently supports [here](../reference/extractors.md). +- [User][proton-user]: This metadata model is used for defining the output of + extraction on User accounts. Some of these sources can be GitHub, Workday, + Google Suite, LDAP. Please refer to the list of 'User' extractors meteor + currently supports [here](extractors.md#user). -- [Table](https://github.com/odpf/proton/blob/main/odpf/assets/table.proto): - This metadata model is being used by extractors based around `databases` or for the ones that store data in tabular format. - It contains various fields that include `schema` of the table and other access related information. +- [Table][proton-table]: This metadata model is being used by extractors based + around databases, typically for the ones that store data in tabular format. It + contains various fields that include `schema` of the table and other access + related information. Please refer to the list of 'Table' extractors meteor + currently supports [here](extractors.md#table). -- [Job](https://github.com/odpf/proton/blob/main/odpf/assets/job.proto): - Most of the data is being streamed as queues by kafka or other stack in DE pipeline. - And hence Job is a metadata model built for this purpose. +- [Job][proton-job]: A job can represent a scheduled or recurring task that + performs some transformation in the data engineering pipeline. Job is a + metadata model built for this purpose. Please refer to the list of 'Job' + extractors meteor currently supports [here](extractors.md#table). -`Proto` has been used to define these metadata models. -To check their implementation please refer [here](https://github.com/odpf/proton/tree/main/odpf/assets). +- [Topic][proton-topic]: A topic represents a virtual group for logical group of + messages in message bus like kafka, pubsub, pulsar etc. Please refer to the + list of 'Topic' extractors meteor currently + supports [here](extractors.md#topic). + +- [Machine Learning Feature Table][proton-featuretable]: A Feature Table is a + table or view that represents a logical group of time-series feature data as + it is found in a data source. Please refer to the list of 'Feature Table' + extractors meteor currently + supports [here](extractors.md#machine-learning-feature-table). + +- [Application][proton-application]: An application represents a service that + typically communicates over well-defined APIs. Please refer to the list of ' + Application' extractors meteor currently + supports [here](extractors.md#application). + +- [Machine Learning Model][proton-model]: A Model represents a Data Science + Model commonly used for Machine Learning(ML). Models are algorithms trained on + data to find patterns or make predictions. Models typically consume ML + features to generate a meaningful output. Please refer to the list of 'Model' + extractors meteor currently + supports [here](extractors.md#machine-learning-model). + +`Proto` has been used to define these metadata models. To check their +implementation please refer [here][proton-assets]. ## Usage +[//]: # (@formatter:off) + ```golang import( -"github.com/odpf/meteor/models/odpf/assets/v1beta1" -"github.com/odpf/meteor/models/odpf/assets/facets/v1beta1" + assetsv1beta1 "github.com/odpf/meteor/models/odpf/assets/v1beta1" + "github.com/odpf/meteor/models/odpf/assets/facets/v1beta1" ) func main(){ @@ -64,3 +98,26 @@ func main(){ } } ``` + +[//]: # (@formatter:on) + + +[proton-bucket]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/bucket.proto + +[proton-dashboard]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/dashboard.proto + +[proton-user]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/user.proto + +[proton-table]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/table.proto + +[proton-job]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/job.proto + +[proton-topic]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/topic.proto + +[proton-featuretable]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/feature_table.proto + +[proton-application]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/application.proto + +[proton-model]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2/model.proto + +[proton-assets]: https://github.com/odpf/proton/tree/main/odpf/assets/v1beta2 diff --git a/models/odpf/assets/README.md b/models/odpf/assets/README.md index 5fea2200e..bcc4e984f 100644 --- a/models/odpf/assets/README.md +++ b/models/odpf/assets/README.md @@ -1,18 +1,24 @@ # Metadata Models -Metadata models are structs in which metadata of a certain kind will be extracted in order to mainatain the integrity across similar data sources. -For e.g, MySQL and Postgres are supposed to provide similar struct for metadata since both are SQL based databases. -Currently meteor provides the extracted metadata as one of the following metadata models: +Metadata models are structs in which metadata of a certain kind will be +extracted in order to mainatain the integrity across similar data sources. For +e.g, MySQL and Postgres are supposed to provide similar struct for metadata +since both are SQL based databases. Currently meteor provides the extracted +metadata as one of the following metadata models: -* [Bucket](bucket.pb.gp) -* [Chart](chart.pb.go) -* [Dashboard](dashboard.pb.go) -* [Group](group.pb.go) -* [Job](job.pb.go) -* [Table](table.pb.go) -* [Topic](topic.pb.go) -* [User](user.pb.go) +* [`Bucket`](bucket.pb.gp) +* [`Chart`](chart.pb.go) +* [`Dashboard`](dashboard.pb.go) +* [`Group`](group.pb.go) +* [`Job`](job.pb.go) +* [`Table`](table.pb.go) +* [`Topic`](topic.pb.go) +* [`User`](user.pb.go) +* [`FeatureTable`](feature_table.pb.go) +* [`Application`](application.pb.go) +* [`Model`](model.pb.go) -While adding an extractor one needs to provide metadata supported by these models. -If you want some other data model added to the list feel free to raise a issue. -Please refer [docs](../../../docs/data%20models/README.md) for easier reference of how data models are being used. +While adding an extractor one needs to provide metadata supported by these +models. If you want some other data model added to the list feel free to raise a +issue. Please refer [docs](../../../docs/docs/reference/metadata_models.md) for +easier reference of how data models are being used. diff --git a/models/odpf/assets/v1beta2/feature_table.pb.go b/models/odpf/assets/v1beta2/feature_table.pb.go index 2f739058c..cf18a309b 100644 --- a/models/odpf/assets/v1beta2/feature_table.pb.go +++ b/models/odpf/assets/v1beta2/feature_table.pb.go @@ -204,6 +204,10 @@ type FeatureTable_Entity struct { JoinKeys []string `protobuf:"bytes,2,rep,name=join_keys,json=joinKeys,proto3" json:"join_keys,omitempty"` // Optional: Arbitrary metadata. Labels map[string]string `protobuf:"bytes,3,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` + // Optional: Description of the entity. + Description string `protobuf:"bytes,4,opt,name=description,proto3" json:"description,omitempty"` + // Optional: Data type of the entity. + Type string `protobuf:"bytes,5,opt,name=type,proto3" json:"type,omitempty"` } func (x *FeatureTable_Entity) Reset() { @@ -259,6 +263,20 @@ func (x *FeatureTable_Entity) GetLabels() map[string]string { return nil } +func (x *FeatureTable_Entity) GetDescription() string { + if x != nil { + return x.Description + } + return "" +} + +func (x *FeatureTable_Entity) GetType() string { + if x != nil { + return x.Type + } + return "" +} + var File_odpf_assets_v1beta2_feature_table_proto protoreflect.FileDescriptor var file_odpf_assets_v1beta2_feature_table_proto_rawDesc = []byte{ @@ -275,7 +293,7 @@ var file_odpf_assets_v1beta2_feature_table_proto_rawDesc = []byte{ 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, 0x1f, 0x0a, 0x0b, 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0a, - 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0xeb, 0x03, 0x0a, 0x0c, 0x46, + 0x65, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0xa1, 0x04, 0x0a, 0x0c, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x44, 0x0a, 0x08, 0x65, 0x6e, 0x74, @@ -294,7 +312,7 @@ var file_odpf_assets_v1beta2_feature_table_proto_rawDesc = []byte{ 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x66, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0a, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, - 0x69, 0x6d, 0x65, 0x1a, 0xc2, 0x01, 0x0a, 0x06, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x12, 0x12, + 0x69, 0x6d, 0x65, 0x1a, 0xf8, 0x01, 0x0a, 0x06, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x6a, 0x6f, 0x69, 0x6e, 0x5f, 0x6b, 0x65, 0x79, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x08, 0x6a, 0x6f, 0x69, 0x6e, 0x4b, 0x65, 0x79, 0x73, 0x12, @@ -302,17 +320,20 @@ var file_odpf_assets_v1beta2_feature_table_proto_rawDesc = []byte{ 0x34, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x54, 0x61, 0x62, 0x6c, 0x65, 0x2e, 0x45, 0x6e, 0x74, 0x69, 0x74, 0x79, 0x2e, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, - 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x1a, 0x39, 0x0a, - 0x0b, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, - 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, - 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, - 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x53, 0x0a, 0x0e, 0x69, 0x6f, 0x2e, 0x6f, - 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x42, 0x0c, 0x46, 0x65, 0x61, 0x74, - 0x75, 0x72, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, - 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x64, 0x70, 0x66, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x6e, - 0x2f, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2f, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x3b, - 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x62, 0x06, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x12, 0x20, 0x0a, + 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0b, 0x64, 0x65, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x12, + 0x12, 0x0a, 0x04, 0x74, 0x79, 0x70, 0x65, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x74, + 0x79, 0x70, 0x65, 0x1a, 0x39, 0x0a, 0x0b, 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, + 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x42, 0x53, + 0x0a, 0x0e, 0x69, 0x6f, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, + 0x42, 0x0c, 0x46, 0x65, 0x61, 0x74, 0x75, 0x72, 0x65, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, + 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x64, 0x70, 0x66, 0x2f, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x6e, 0x2f, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2f, 0x76, 0x31, + 0x62, 0x65, 0x74, 0x61, 0x32, 0x3b, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x76, 0x31, 0x62, 0x65, + 0x74, 0x61, 0x32, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/models/odpf/assets/v1beta2/model.pb.go b/models/odpf/assets/v1beta2/model.pb.go index 3bb438ac4..cdf6cb812 100644 --- a/models/odpf/assets/v1beta2/model.pb.go +++ b/models/odpf/assets/v1beta2/model.pb.go @@ -22,29 +22,79 @@ const ( _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) ) -// Model represents a Data Science Model commonly used for Machine Learning -// (ML). Models are algorithms trained on data to find patterns or make -// predictions. Models typically consume ML features to generate a meaningful -// output. The inputs can also include contextual information that is made -// available in realtime as part of the request to the model server. -type Model struct { +// Schema of the model's inputs and outputs. Strongly inspired by +// https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature. +type ModelSignature struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - // Optional: Model's namespace or project. - Namespace string `protobuf:"bytes,1,opt,name=namespace,proto3" json:"namespace,omitempty"` - // Flavor of the ML Model. ex: pytorch, tensorflow etc. - Flavor string `protobuf:"bytes,2,opt,name=flavor,proto3" json:"flavor,omitempty"` - // Optional: Algorithm used to train the ML Model. - Algorithm string `protobuf:"bytes,3,opt,name=algorithm,proto3" json:"algorithm,omitempty"` - // The schema of a model’s inputs and outputs. - Signature *Model_Signature `protobuf:"bytes,4,opt,name=signature,proto3" json:"signature,omitempty"` - // Status of the model. ex: pending/ready/serving/terminated etc. - State string `protobuf:"bytes,5,opt,name=state,proto3" json:"state,omitempty"` + Inputs []*ModelSignature_Parameter `protobuf:"bytes,1,rep,name=inputs,proto3" json:"inputs,omitempty"` + Outputs []*ModelSignature_Parameter `protobuf:"bytes,2,rep,name=outputs,proto3" json:"outputs,omitempty"` +} + +func (x *ModelSignature) Reset() { + *x = ModelSignature{} + if protoimpl.UnsafeEnabled { + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *ModelSignature) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*ModelSignature) ProtoMessage() {} + +func (x *ModelSignature) ProtoReflect() protoreflect.Message { + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use ModelSignature.ProtoReflect.Descriptor instead. +func (*ModelSignature) Descriptor() ([]byte, []int) { + return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{0} +} + +func (x *ModelSignature) GetInputs() []*ModelSignature_Parameter { + if x != nil { + return x.Inputs + } + return nil +} + +func (x *ModelSignature) GetOutputs() []*ModelSignature_Parameter { + if x != nil { + return x.Outputs + } + return nil +} + +type ModelVersion struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // The schema of a model version’s inputs and outputs. + Signature *ModelSignature `protobuf:"bytes,1,opt,name=signature,proto3" json:"signature,omitempty"` + // Status of the model version. ex: pending/ready/serving/terminated etc. + Status string `protobuf:"bytes,2,opt,name=status,proto3" json:"status,omitempty"` // Version of the model - Version string `protobuf:"bytes,6,opt,name=version,proto3" json:"version,omitempty"` - // List of attributes the model has. This could include the following: + Version string `protobuf:"bytes,3,opt,name=version,proto3" json:"version,omitempty"` + // List of attributes the model version has. This could include the following: + // - mlflow_run_id[string]: MLFlow expriment run ID associated with the model + // version. Relevant for Merlin models. + // - mlflow_run_url[string]: URL of MLFlow experiment run associated with + // the model. Relevant for Merlin models. // - endpoint_url[string]: Endpoint that the model is serving requests on. // Ex: http://-... // - version_endpoint_url[string]: Endpoint that the model is serving @@ -52,32 +102,35 @@ type Model struct { // http://-... // - traffic[double]: Percentage of traffic being served by this version of // the model. + // - weight[double]: Weightage for the model version endpoint. // - params[map]: Parameters for the Model's run. // - metrics[map]: Metrics for the model's run. - Attributes *structpb.Struct `protobuf:"bytes,7,opt,name=attributes,proto3" json:"attributes,omitempty"` + Attributes *structpb.Struct `protobuf:"bytes,4,opt,name=attributes,proto3" json:"attributes,omitempty"` + // Optional. List of labels the model version has. + Labels map[string]string `protobuf:"bytes,5,rep,name=labels,proto3" json:"labels,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` // The timestamp of the model's creation. CreateTime *timestamppb.Timestamp `protobuf:"bytes,101,opt,name=create_time,json=createTime,proto3" json:"create_time,omitempty"` // The timestamp when the model was last modified. UpdateTime *timestamppb.Timestamp `protobuf:"bytes,102,opt,name=update_time,json=updateTime,proto3" json:"update_time,omitempty"` } -func (x *Model) Reset() { - *x = Model{} +func (x *ModelVersion) Reset() { + *x = ModelVersion{} if protoimpl.UnsafeEnabled { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[0] + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[1] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *Model) String() string { +func (x *ModelVersion) String() string { return protoimpl.X.MessageStringOf(x) } -func (*Model) ProtoMessage() {} +func (*ModelVersion) ProtoMessage() {} -func (x *Model) ProtoReflect() protoreflect.Message { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[0] +func (x *ModelVersion) ProtoReflect() protoreflect.Message { + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[1] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -88,102 +141,114 @@ func (x *Model) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use Model.ProtoReflect.Descriptor instead. -func (*Model) Descriptor() ([]byte, []int) { - return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{0} +// Deprecated: Use ModelVersion.ProtoReflect.Descriptor instead. +func (*ModelVersion) Descriptor() ([]byte, []int) { + return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{1} } -func (x *Model) GetNamespace() string { +func (x *ModelVersion) GetSignature() *ModelSignature { if x != nil { - return x.Namespace + return x.Signature } - return "" + return nil } -func (x *Model) GetFlavor() string { +func (x *ModelVersion) GetStatus() string { if x != nil { - return x.Flavor + return x.Status } return "" } -func (x *Model) GetAlgorithm() string { +func (x *ModelVersion) GetVersion() string { if x != nil { - return x.Algorithm + return x.Version } return "" } -func (x *Model) GetSignature() *Model_Signature { +func (x *ModelVersion) GetAttributes() *structpb.Struct { if x != nil { - return x.Signature + return x.Attributes } return nil } -func (x *Model) GetState() string { +func (x *ModelVersion) GetLabels() map[string]string { if x != nil { - return x.State - } - return "" -} - -func (x *Model) GetVersion() string { - if x != nil { - return x.Version - } - return "" -} - -func (x *Model) GetAttributes() *structpb.Struct { - if x != nil { - return x.Attributes + return x.Labels } return nil } -func (x *Model) GetCreateTime() *timestamppb.Timestamp { +func (x *ModelVersion) GetCreateTime() *timestamppb.Timestamp { if x != nil { return x.CreateTime } return nil } -func (x *Model) GetUpdateTime() *timestamppb.Timestamp { +func (x *ModelVersion) GetUpdateTime() *timestamppb.Timestamp { if x != nil { return x.UpdateTime } return nil } -// Schema of the model's inputs and outputs. Strongly inspired by -// https://mlflow.org/docs/latest/python_api/mlflow.models.html#mlflow.models.ModelSignature. -type Model_Signature struct { +// Model represents a Data Science Model commonly used for Machine Learning +// (ML). Models are algorithms trained on data to find patterns or make +// predictions. Models typically consume ML features to generate a meaningful +// output. The inputs can also include contextual information that is made +// available in realtime as part of the request to the model server. +type Model struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields - Inputs []*Model_Signature_Parameter `protobuf:"bytes,1,rep,name=inputs,proto3" json:"inputs,omitempty"` - Outputs []*Model_Signature_Parameter `protobuf:"bytes,2,rep,name=outputs,proto3" json:"outputs,omitempty"` + // Optional: Model's namespace or project. + Namespace string `protobuf:"bytes,1,opt,name=namespace,proto3" json:"namespace,omitempty"` + // Flavor of the ML Model. ex: pytorch, tensorflow etc. + Flavor string `protobuf:"bytes,2,opt,name=flavor,proto3" json:"flavor,omitempty"` + // Optional: Algorithm used to train the ML Model. + Algorithm string `protobuf:"bytes,3,opt,name=algorithm,proto3" json:"algorithm,omitempty"` + // Status of the model. ex: active/deleted etc. + Status string `protobuf:"bytes,4,opt,name=status,proto3" json:"status,omitempty"` + // Versions of the model, similar to experiment runs in MLFlow and model + // version in Merlin. + Versions []*ModelVersion `protobuf:"bytes,5,rep,name=versions,proto3" json:"versions,omitempty"` + // List of attributes the model version has. This could include the following: + // - project_id[double]: ID of project the model is present in. + // - project_name[string]: Name of project the model is present in. + // - mlflow_experiment_id[double]: MLFlow experiment ID associated with the + // model. Relevant for Merlin models. + // - mlflow_experiment_url[string]: URL of MLFlow experiment associated with + // the model. Relevant for Merlin models. + // - endpoint_urls[repeated string]: List of URLs associated with endpoints + // that are serving requests for the model. Relevant for Merlin models. + Attributes *structpb.Struct `protobuf:"bytes,6,opt,name=attributes,proto3" json:"attributes,omitempty"` + // The timestamp of the model's creation. + CreateTime *timestamppb.Timestamp `protobuf:"bytes,101,opt,name=create_time,json=createTime,proto3" json:"create_time,omitempty"` + // The timestamp when the model was last modified. + UpdateTime *timestamppb.Timestamp `protobuf:"bytes,102,opt,name=update_time,json=updateTime,proto3" json:"update_time,omitempty"` } -func (x *Model_Signature) Reset() { - *x = Model_Signature{} +func (x *Model) Reset() { + *x = Model{} if protoimpl.UnsafeEnabled { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[1] + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[2] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *Model_Signature) String() string { +func (x *Model) String() string { return protoimpl.X.MessageStringOf(x) } -func (*Model_Signature) ProtoMessage() {} +func (*Model) ProtoMessage() {} -func (x *Model_Signature) ProtoReflect() protoreflect.Message { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[1] +func (x *Model) ProtoReflect() protoreflect.Message { + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[2] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -194,27 +259,69 @@ func (x *Model_Signature) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use Model_Signature.ProtoReflect.Descriptor instead. -func (*Model_Signature) Descriptor() ([]byte, []int) { - return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{0, 0} +// Deprecated: Use Model.ProtoReflect.Descriptor instead. +func (*Model) Descriptor() ([]byte, []int) { + return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{2} } -func (x *Model_Signature) GetInputs() []*Model_Signature_Parameter { +func (x *Model) GetNamespace() string { if x != nil { - return x.Inputs + return x.Namespace + } + return "" +} + +func (x *Model) GetFlavor() string { + if x != nil { + return x.Flavor + } + return "" +} + +func (x *Model) GetAlgorithm() string { + if x != nil { + return x.Algorithm + } + return "" +} + +func (x *Model) GetStatus() string { + if x != nil { + return x.Status + } + return "" +} + +func (x *Model) GetVersions() []*ModelVersion { + if x != nil { + return x.Versions } return nil } -func (x *Model_Signature) GetOutputs() []*Model_Signature_Parameter { +func (x *Model) GetAttributes() *structpb.Struct { if x != nil { - return x.Outputs + return x.Attributes + } + return nil +} + +func (x *Model) GetCreateTime() *timestamppb.Timestamp { + if x != nil { + return x.CreateTime + } + return nil +} + +func (x *Model) GetUpdateTime() *timestamppb.Timestamp { + if x != nil { + return x.UpdateTime } return nil } // Specification of name and type of a single column in a dataset. -type Model_Signature_Parameter struct { +type ModelSignature_Parameter struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields @@ -227,23 +334,23 @@ type Model_Signature_Parameter struct { Shape []int64 `protobuf:"varint,3,rep,packed,name=shape,proto3" json:"shape,omitempty"` } -func (x *Model_Signature_Parameter) Reset() { - *x = Model_Signature_Parameter{} +func (x *ModelSignature_Parameter) Reset() { + *x = ModelSignature_Parameter{} if protoimpl.UnsafeEnabled { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[2] + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[3] ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) ms.StoreMessageInfo(mi) } } -func (x *Model_Signature_Parameter) String() string { +func (x *ModelSignature_Parameter) String() string { return protoimpl.X.MessageStringOf(x) } -func (*Model_Signature_Parameter) ProtoMessage() {} +func (*ModelSignature_Parameter) ProtoMessage() {} -func (x *Model_Signature_Parameter) ProtoReflect() protoreflect.Message { - mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[2] +func (x *ModelSignature_Parameter) ProtoReflect() protoreflect.Message { + mi := &file_odpf_assets_v1beta2_model_proto_msgTypes[3] if protoimpl.UnsafeEnabled && x != nil { ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) if ms.LoadMessageInfo() == nil { @@ -254,26 +361,26 @@ func (x *Model_Signature_Parameter) ProtoReflect() protoreflect.Message { return mi.MessageOf(x) } -// Deprecated: Use Model_Signature_Parameter.ProtoReflect.Descriptor instead. -func (*Model_Signature_Parameter) Descriptor() ([]byte, []int) { - return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{0, 0, 0} +// Deprecated: Use ModelSignature_Parameter.ProtoReflect.Descriptor instead. +func (*ModelSignature_Parameter) Descriptor() ([]byte, []int) { + return file_odpf_assets_v1beta2_model_proto_rawDescGZIP(), []int{0, 0} } -func (x *Model_Signature_Parameter) GetName() string { +func (x *ModelSignature_Parameter) GetName() string { if x != nil { return x.Name } return "" } -func (x *Model_Signature_Parameter) GetDataType() string { +func (x *ModelSignature_Parameter) GetDataType() string { if x != nil { return x.DataType } return "" } -func (x *Model_Signature_Parameter) GetShape() []int64 { +func (x *ModelSignature_Parameter) GetShape() []int64 { if x != nil { return x.Shape } @@ -290,52 +397,78 @@ var file_odpf_assets_v1beta2_model_proto_rawDesc = []byte{ 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x73, 0x74, 0x72, 0x75, 0x63, 0x74, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x1a, 0x1f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2f, 0x74, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x2e, - 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xf6, 0x04, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x12, - 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, 0x16, 0x0a, - 0x06, 0x66, 0x6c, 0x61, 0x76, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x66, - 0x6c, 0x61, 0x76, 0x6f, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, 0x74, - 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, 0x69, - 0x74, 0x68, 0x6d, 0x12, 0x42, 0x0a, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, - 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x24, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, - 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, 0x6f, 0x64, - 0x65, 0x6c, 0x2e, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x52, 0x09, 0x73, 0x69, - 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x18, 0x0a, - 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x37, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, - 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x07, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, - 0x72, 0x75, 0x63, 0x74, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, - 0x12, 0x3b, 0x0a, 0x0b, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, - 0x65, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, - 0x70, 0x52, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x3b, 0x0a, - 0x0b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x66, 0x20, 0x01, - 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, - 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0a, - 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x1a, 0xf1, 0x01, 0x0a, 0x09, 0x53, - 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x46, 0x0a, 0x06, 0x69, 0x6e, 0x70, 0x75, - 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2e, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xf4, 0x01, 0x0a, 0x0e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, + 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x12, 0x45, 0x0a, 0x06, 0x69, 0x6e, 0x70, 0x75, + 0x74, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, - 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x2e, 0x50, - 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x52, 0x06, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x73, - 0x12, 0x48, 0x0a, 0x07, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, - 0x0b, 0x32, 0x2e, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, - 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x2e, 0x53, 0x69, - 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x2e, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, - 0x72, 0x52, 0x07, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73, 0x1a, 0x52, 0x0a, 0x09, 0x50, 0x61, - 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x64, - 0x61, 0x74, 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x64, 0x61, 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x68, 0x61, 0x70, - 0x65, 0x18, 0x03, 0x20, 0x03, 0x28, 0x03, 0x52, 0x05, 0x73, 0x68, 0x61, 0x70, 0x65, 0x42, 0x51, - 0x0a, 0x0e, 0x69, 0x6f, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, - 0x42, 0x0a, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, 0x69, - 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x64, 0x70, 0x66, 0x2f, 0x70, 0x72, - 0x6f, 0x74, 0x6f, 0x6e, 0x2f, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2f, 0x76, 0x31, 0x62, 0x65, - 0x74, 0x61, 0x32, 0x3b, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, - 0x32, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x2e, 0x50, 0x61, + 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x52, 0x06, 0x69, 0x6e, 0x70, 0x75, 0x74, 0x73, 0x12, + 0x47, 0x0a, 0x07, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73, 0x18, 0x02, 0x20, 0x03, 0x28, 0x0b, + 0x32, 0x2d, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, + 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x69, 0x67, 0x6e, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x2e, 0x50, 0x61, 0x72, 0x61, 0x6d, 0x65, 0x74, 0x65, 0x72, 0x52, + 0x07, 0x6f, 0x75, 0x74, 0x70, 0x75, 0x74, 0x73, 0x1a, 0x52, 0x0a, 0x09, 0x50, 0x61, 0x72, 0x61, + 0x6d, 0x65, 0x74, 0x65, 0x72, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x1b, 0x0a, 0x09, 0x64, 0x61, 0x74, + 0x61, 0x5f, 0x74, 0x79, 0x70, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x64, 0x61, + 0x74, 0x61, 0x54, 0x79, 0x70, 0x65, 0x12, 0x14, 0x0a, 0x05, 0x73, 0x68, 0x61, 0x70, 0x65, 0x18, + 0x03, 0x20, 0x03, 0x28, 0x03, 0x52, 0x05, 0x73, 0x68, 0x61, 0x70, 0x65, 0x22, 0xb8, 0x03, 0x0a, + 0x0c, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x41, 0x0a, + 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x23, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, + 0x31, 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x53, 0x69, 0x67, 0x6e, + 0x61, 0x74, 0x75, 0x72, 0x65, 0x52, 0x09, 0x73, 0x69, 0x67, 0x6e, 0x61, 0x74, 0x75, 0x72, 0x65, + 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x37, 0x0a, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, + 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, + 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x12, 0x45, 0x0a, 0x06, 0x6c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x6f, 0x64, + 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, 0x31, 0x62, 0x65, 0x74, 0x61, + 0x32, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x2e, 0x4c, + 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x6c, 0x61, 0x62, 0x65, + 0x6c, 0x73, 0x12, 0x3b, 0x0a, 0x0b, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, + 0x65, 0x18, 0x65, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x52, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x12, + 0x3b, 0x0a, 0x0b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x66, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, + 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, + 0x52, 0x0a, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x1a, 0x39, 0x0a, 0x0b, + 0x4c, 0x61, 0x62, 0x65, 0x6c, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, 0x0a, 0x03, 0x6b, + 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, 0x12, 0x14, 0x0a, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x05, 0x76, 0x61, + 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0xe5, 0x02, 0x0a, 0x05, 0x4d, 0x6f, 0x64, 0x65, + 0x6c, 0x12, 0x1c, 0x0a, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x6e, 0x61, 0x6d, 0x65, 0x73, 0x70, 0x61, 0x63, 0x65, 0x12, + 0x16, 0x0a, 0x06, 0x66, 0x6c, 0x61, 0x76, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x06, 0x66, 0x6c, 0x61, 0x76, 0x6f, 0x72, 0x12, 0x1c, 0x0a, 0x09, 0x61, 0x6c, 0x67, 0x6f, 0x72, + 0x69, 0x74, 0x68, 0x6d, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x09, 0x61, 0x6c, 0x67, 0x6f, + 0x72, 0x69, 0x74, 0x68, 0x6d, 0x12, 0x16, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, + 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x3d, 0x0a, + 0x08, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x05, 0x20, 0x03, 0x28, 0x0b, 0x32, + 0x21, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2e, 0x76, 0x31, + 0x62, 0x65, 0x74, 0x61, 0x32, 0x2e, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x52, 0x08, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x37, 0x0a, 0x0a, + 0x61, 0x74, 0x74, 0x72, 0x69, 0x62, 0x75, 0x74, 0x65, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, + 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x0a, 0x61, 0x74, 0x74, 0x72, 0x69, + 0x62, 0x75, 0x74, 0x65, 0x73, 0x12, 0x3b, 0x0a, 0x0b, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x5f, + 0x74, 0x69, 0x6d, 0x65, 0x18, 0x65, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, + 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0a, 0x63, 0x72, 0x65, 0x61, 0x74, 0x65, 0x54, 0x69, + 0x6d, 0x65, 0x12, 0x3b, 0x0a, 0x0b, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, + 0x65, 0x18, 0x66, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, + 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, + 0x61, 0x6d, 0x70, 0x52, 0x0a, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x42, + 0x51, 0x0a, 0x0e, 0x69, 0x6f, 0x2e, 0x6f, 0x64, 0x70, 0x66, 0x2e, 0x61, 0x73, 0x73, 0x65, 0x74, + 0x73, 0x42, 0x0a, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x50, 0x72, 0x6f, 0x74, 0x6f, 0x5a, 0x33, 0x67, + 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6f, 0x64, 0x70, 0x66, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x6e, 0x2f, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x2f, 0x76, 0x31, 0x62, + 0x65, 0x74, 0x61, 0x32, 0x3b, 0x61, 0x73, 0x73, 0x65, 0x74, 0x73, 0x76, 0x31, 0x62, 0x65, 0x74, + 0x61, 0x32, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -350,26 +483,33 @@ func file_odpf_assets_v1beta2_model_proto_rawDescGZIP() []byte { return file_odpf_assets_v1beta2_model_proto_rawDescData } -var file_odpf_assets_v1beta2_model_proto_msgTypes = make([]protoimpl.MessageInfo, 3) +var file_odpf_assets_v1beta2_model_proto_msgTypes = make([]protoimpl.MessageInfo, 5) var file_odpf_assets_v1beta2_model_proto_goTypes = []interface{}{ - (*Model)(nil), // 0: odpf.assets.v1beta2.Model - (*Model_Signature)(nil), // 1: odpf.assets.v1beta2.Model.Signature - (*Model_Signature_Parameter)(nil), // 2: odpf.assets.v1beta2.Model.Signature.Parameter - (*structpb.Struct)(nil), // 3: google.protobuf.Struct - (*timestamppb.Timestamp)(nil), // 4: google.protobuf.Timestamp + (*ModelSignature)(nil), // 0: odpf.assets.v1beta2.ModelSignature + (*ModelVersion)(nil), // 1: odpf.assets.v1beta2.ModelVersion + (*Model)(nil), // 2: odpf.assets.v1beta2.Model + (*ModelSignature_Parameter)(nil), // 3: odpf.assets.v1beta2.ModelSignature.Parameter + nil, // 4: odpf.assets.v1beta2.ModelVersion.LabelsEntry + (*structpb.Struct)(nil), // 5: google.protobuf.Struct + (*timestamppb.Timestamp)(nil), // 6: google.protobuf.Timestamp } var file_odpf_assets_v1beta2_model_proto_depIdxs = []int32{ - 1, // 0: odpf.assets.v1beta2.Model.signature:type_name -> odpf.assets.v1beta2.Model.Signature - 3, // 1: odpf.assets.v1beta2.Model.attributes:type_name -> google.protobuf.Struct - 4, // 2: odpf.assets.v1beta2.Model.create_time:type_name -> google.protobuf.Timestamp - 4, // 3: odpf.assets.v1beta2.Model.update_time:type_name -> google.protobuf.Timestamp - 2, // 4: odpf.assets.v1beta2.Model.Signature.inputs:type_name -> odpf.assets.v1beta2.Model.Signature.Parameter - 2, // 5: odpf.assets.v1beta2.Model.Signature.outputs:type_name -> odpf.assets.v1beta2.Model.Signature.Parameter - 6, // [6:6] is the sub-list for method output_type - 6, // [6:6] is the sub-list for method input_type - 6, // [6:6] is the sub-list for extension type_name - 6, // [6:6] is the sub-list for extension extendee - 0, // [0:6] is the sub-list for field type_name + 3, // 0: odpf.assets.v1beta2.ModelSignature.inputs:type_name -> odpf.assets.v1beta2.ModelSignature.Parameter + 3, // 1: odpf.assets.v1beta2.ModelSignature.outputs:type_name -> odpf.assets.v1beta2.ModelSignature.Parameter + 0, // 2: odpf.assets.v1beta2.ModelVersion.signature:type_name -> odpf.assets.v1beta2.ModelSignature + 5, // 3: odpf.assets.v1beta2.ModelVersion.attributes:type_name -> google.protobuf.Struct + 4, // 4: odpf.assets.v1beta2.ModelVersion.labels:type_name -> odpf.assets.v1beta2.ModelVersion.LabelsEntry + 6, // 5: odpf.assets.v1beta2.ModelVersion.create_time:type_name -> google.protobuf.Timestamp + 6, // 6: odpf.assets.v1beta2.ModelVersion.update_time:type_name -> google.protobuf.Timestamp + 1, // 7: odpf.assets.v1beta2.Model.versions:type_name -> odpf.assets.v1beta2.ModelVersion + 5, // 8: odpf.assets.v1beta2.Model.attributes:type_name -> google.protobuf.Struct + 6, // 9: odpf.assets.v1beta2.Model.create_time:type_name -> google.protobuf.Timestamp + 6, // 10: odpf.assets.v1beta2.Model.update_time:type_name -> google.protobuf.Timestamp + 11, // [11:11] is the sub-list for method output_type + 11, // [11:11] is the sub-list for method input_type + 11, // [11:11] is the sub-list for extension type_name + 11, // [11:11] is the sub-list for extension extendee + 0, // [0:11] is the sub-list for field type_name } func init() { file_odpf_assets_v1beta2_model_proto_init() } @@ -379,7 +519,7 @@ func file_odpf_assets_v1beta2_model_proto_init() { } if !protoimpl.UnsafeEnabled { file_odpf_assets_v1beta2_model_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Model); i { + switch v := v.(*ModelSignature); i { case 0: return &v.state case 1: @@ -391,7 +531,7 @@ func file_odpf_assets_v1beta2_model_proto_init() { } } file_odpf_assets_v1beta2_model_proto_msgTypes[1].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Model_Signature); i { + switch v := v.(*ModelVersion); i { case 0: return &v.state case 1: @@ -403,7 +543,19 @@ func file_odpf_assets_v1beta2_model_proto_init() { } } file_odpf_assets_v1beta2_model_proto_msgTypes[2].Exporter = func(v interface{}, i int) interface{} { - switch v := v.(*Model_Signature_Parameter); i { + switch v := v.(*Model); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_odpf_assets_v1beta2_model_proto_msgTypes[3].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*ModelSignature_Parameter); i { case 0: return &v.state case 1: @@ -421,7 +573,7 @@ func file_odpf_assets_v1beta2_model_proto_init() { GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_odpf_assets_v1beta2_model_proto_rawDesc, NumEnums: 0, - NumMessages: 3, + NumMessages: 5, NumExtensions: 0, NumServices: 0, }, diff --git a/plugins/errors.go b/plugins/errors.go index 7aeed75d5..4d76cd0a6 100644 --- a/plugins/errors.go +++ b/plugins/errors.go @@ -65,11 +65,6 @@ func (e RetryError) Unwrap() error { return e.Err } -func (e RetryError) Is(target error) bool { - _, ok := target.(RetryError) - return ok -} - func NewRetryError(err error) error { if err == nil { return nil diff --git a/plugins/extractors/application_yaml/README.md b/plugins/extractors/application_yaml/README.md index f967ad94f..fcadc057d 100644 --- a/plugins/extractors/application_yaml/README.md +++ b/plugins/extractors/application_yaml/README.md @@ -91,6 +91,6 @@ Refer to the [contribution guidelines](../../../docs/docs/contribute/guide.md#adding-a-new-extractor) for information on contributing to this module. -[proton-asset]: https://github.com/odpf/proton/blob/2d2177a/odpf/assets/v1beta2/asset.proto#L14 +[proton-asset]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/asset.proto#L14 -[proton-application]: https://github.com/odpf/proton/blob/2d2177a/odpf/assets/v1beta2/application.proto#L11 +[proton-application]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/application.proto#L11 diff --git a/plugins/extractors/caramlstore/README.md b/plugins/extractors/caramlstore/README.md index 076e77bc7..a752fb0c3 100644 --- a/plugins/extractors/caramlstore/README.md +++ b/plugins/extractors/caramlstore/README.md @@ -64,5 +64,5 @@ for information on contributing to this module. [caraml-store]: https://github.com/caraml-dev/caraml-store [coreservice.proto]: https://github.com/caraml-dev/caraml-store/blob/v0.1.1/caraml-store-protobuf/src/main/proto/feast/core/CoreService.proto#L12 -[proton-asset]: https://github.com/odpf/proton/blob/5350bde/odpf/assets/v1beta2/asset.proto#L14 -[proton-featuretable]: https://github.com/odpf/proton/blob/5350bde/odpf/assets/v1beta2/feature_table.proto#L32 +[proton-asset]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/asset.proto#L14 +[proton-featuretable]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/feature_table.proto#L32 diff --git a/plugins/extractors/caramlstore/asset_builder.go b/plugins/extractors/caramlstore/asset_builder.go index dfd1b8743..aa51f68e2 100644 --- a/plugins/extractors/caramlstore/asset_builder.go +++ b/plugins/extractors/caramlstore/asset_builder.go @@ -4,7 +4,6 @@ import ( "errors" "fmt" - "github.com/odpf/meteor/models" v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins" "github.com/odpf/meteor/plugins/extractors/caramlstore/internal/core" @@ -52,7 +51,7 @@ func (b featureTableBuilder) buildAsset(ft *core.FeatureTable) (*v1beta2.Asset, } return &v1beta2.Asset{ - Urn: models.NewURN(service, b.scope, typ, b.project+"-"+ft.Spec.Name), + Urn: plugins.CaraMLStoreURN(b.scope, b.project, ft.Spec.Name), Name: ft.Spec.Name, Service: service, Type: typ, @@ -146,7 +145,7 @@ func (b featureTableBuilder) buildUpstreams(ft *core.FeatureTable) ([]*v1beta2.R } ups = append(ups, &v1beta2.Resource{ - Urn: models.NewURN("kafka", plugins.KafkaServersToScope(opts.BootstrapServers), "topic", opts.Topic), + Urn: plugins.KafkaURN(opts.BootstrapServers, opts.Topic), Service: "kafka", Type: "topic", }) diff --git a/plugins/extractors/caramlstore/caramlstore_test.go b/plugins/extractors/caramlstore/caramlstore_test.go index d67d3e480..4b739ab80 100644 --- a/plugins/extractors/caramlstore/caramlstore_test.go +++ b/plugins/extractors/caramlstore/caramlstore_test.go @@ -167,7 +167,7 @@ func TestExtract(t *testing.T) { Filter: &core.ListEntitiesRequest_Filter{Project: "dobu"}, }).Return(&core.ListEntitiesResponse{}, nil) var featureTablesResp core.ListFeatureTablesResponse - testutils.LoadJSONIntoProto(t, "testdata/mocked-feature-tables-sauron.json", &featureTablesResp) + testutils.LoadJSON(t, "testdata/mocked-feature-tables-sauron.json", &featureTablesResp) m.EXPECT().ListFeatureTables(testutils.OfTypeContext(), &core.ListFeatureTablesRequest{ Filter: &core.ListFeatureTablesRequest_Filter{Project: "dobu"}, }).Return(&featureTablesResp, nil) @@ -197,23 +197,23 @@ func TestExtract(t *testing.T) { ) var entitiesResp core.ListEntitiesResponse - testutils.LoadJSONIntoProto(t, "testdata/mocked-entities-sauron.json", &entitiesResp) + testutils.LoadJSON(t, "testdata/mocked-entities-sauron.json", &entitiesResp) mockServer.EXPECT().ListEntities(testutils.OfTypeContext(), &core.ListEntitiesRequest{ Filter: &core.ListEntitiesRequest_Filter{Project: "sauron"}, }).Return(&entitiesResp, nil) var featureTablesResp core.ListFeatureTablesResponse - testutils.LoadJSONIntoProto(t, "testdata/mocked-feature-tables-sauron.json", &featureTablesResp) + testutils.LoadJSON(t, "testdata/mocked-feature-tables-sauron.json", &featureTablesResp) mockServer.EXPECT().ListFeatureTables(testutils.OfTypeContext(), &core.ListFeatureTablesRequest{ Filter: &core.ListFeatureTablesRequest_Filter{Project: "sauron"}, }).Return(&featureTablesResp, nil) - testutils.LoadJSONIntoProto(t, "testdata/mocked-entities-food-tensoba.json", &entitiesResp) + testutils.LoadJSON(t, "testdata/mocked-entities-food-tensoba.json", &entitiesResp) mockServer.EXPECT().ListEntities(testutils.OfTypeContext(), &core.ListEntitiesRequest{ Filter: &core.ListEntitiesRequest_Filter{Project: "food-tensoba"}, }).Return(&entitiesResp, nil) - testutils.LoadJSONIntoProto(t, "testdata/mocked-feature-tables-food-tensoba.json", &featureTablesResp) + testutils.LoadJSON(t, "testdata/mocked-feature-tables-food-tensoba.json", &featureTablesResp) mockServer.EXPECT().ListFeatureTables(testutils.OfTypeContext(), &core.ListFeatureTablesRequest{ Filter: &core.ListFeatureTablesRequest_Filter{Project: "food-tensoba"}, }).Return(&featureTablesResp, nil) diff --git a/plugins/extractors/caramlstore/testdata/expected-assets.json b/plugins/extractors/caramlstore/testdata/expected-assets.json index 7a513c894..0d8c21376 100644 --- a/plugins/extractors/caramlstore/testdata/expected-assets.json +++ b/plugins/extractors/caramlstore/testdata/expected-assets.json @@ -1,6 +1,6 @@ [ { - "urn": "urn:caramlstore:test-caramlstore:feature_table:sauron-merchant_uuid_t2_discovery", + "urn": "urn:caramlstore:test-caramlstore:feature_table:sauron.merchant_uuid_t2_discovery", "name": "merchant_uuid_t2_discovery", "service": "caramlstore", "type": "feature_table", @@ -88,7 +88,7 @@ } }, { - "urn": "urn:caramlstore:test-caramlstore:feature_table:sauron-avg_dispatch_arrival_time_10_mins", + "urn": "urn:caramlstore:test-caramlstore:feature_table:sauron.avg_dispatch_arrival_time_10_mins", "name": "avg_dispatch_arrival_time_10_mins", "service": "caramlstore", "type": "feature_table", @@ -136,7 +136,7 @@ } }, { - "urn": "urn:caramlstore:test-caramlstore:feature_table:food-tensoba-merchant_uuid_t2_discovery", + "urn": "urn:caramlstore:test-caramlstore:feature_table:food-tensoba.merchant_uuid_t2_discovery", "name": "merchant_uuid_t2_discovery", "service": "caramlstore", "type": "feature_table", @@ -224,7 +224,7 @@ } }, { - "urn": "urn:caramlstore:test-caramlstore:feature_table:food-tensoba-avg_dispatch_arrival_time_10_mins", + "urn": "urn:caramlstore:test-caramlstore:feature_table:food-tensoba.avg_dispatch_arrival_time_10_mins", "name": "avg_dispatch_arrival_time_10_mins", "service": "caramlstore", "type": "feature_table", diff --git a/plugins/extractors/merlin/README.md b/plugins/extractors/merlin/README.md new file mode 100644 index 000000000..e8f7890ff --- /dev/null +++ b/plugins/extractors/merlin/README.md @@ -0,0 +1,125 @@ +# merlin + +Extractor for Machine Learning(ML) Models from [Merlin][merlin]. + +The extractor uses the REST API exposed by Merlin to extract models. The REST +API has been documented with Swagger and can be seen [here][merlin-swagger]. + +## Usage + +```yaml +source: + name: merlin + scope: staging + config: + url: my-company.com/api/merlin/ + service_account_base64: | + ____base64_encoded_service_account_credentials____ +``` + +## Inputs + +| Key | Value | Example | Description | Required? | +|:-------------------------|:---------|:-----------------------------------------|:--------------------------------------------------------------------------|:----------| +| `url` | `string` | `my-company.com/api/merlin/` | Merlin's API base URL | ✅ | +| `service_account_base64` | `string` | `____BASE64_ENCODED_SERVICE_ACCOUNT____` | Service Account credentials in base64 encoded string. | ❌ | +| `request_timeout` | `string` | `10s` | Timeout for HTTP requests to Merlin API | ❌ | +| `worker_count` | `int` | `5` | Number of workers to spawn for extracting projects parallely from Merlin. | ❌ | + +### Notes + +- Leaving `service_account_base64` blank will default + to [Google's default authentication][google-default-auth]. It is recommended + if Meteor instance runs inside the same Google Cloud environment as the + BigQuery project. + +## Outputs + +The models are mapped to an [`Asset`][proton-asset] with model specific metadata +stored using [`Model`][proton-model]. Please refer the proto definitions for +more information. + +A single model asset includes all the active model versions. A model version is +considered active if it has an endpoint. + +| Field | Value | Sample Value | +|:-----------------------------------|:-----------------------------------------------------------------------|:--------------------------------------------------------------| +| `resource.urn` | `urn:merlin:{scope}:model:{model.project_id}.{model.id}` | `urn:merlin:staging:model:15.1512` | +| `resource.name` | `{model.name}` | `tensorflow-sample` | +| `resource.service` | `merlin` | `merlin` | +| `resource.type` | `model` | `model` | +| `resource.url` | `{model.endpoints[0].url}` | `tensorflow-sample.integration-test.models.mycompany.com` | +| `namespace` | `{project.name}` | `integration-test` | +| `flavor` | `model.type` | `pyfunc` | +| `versions` | [`[]ModelVersion`](#modelversion) | | +| `attributes.project_id` | `project.id` | `23` | +| `attributes.mlflow_experiment_id` | `model.mlflow_experiment_id` | `721` | +| `attributes.mlflow_experiment_url` | `model.mlflow_url` | `http://mlflow.mycompany.com/#/experiments/721` | +| `attributes.endpoint_urls[]` | `model.endpoints[].url` | `["tensorflow-sample.integration-test.models.mycompany.com"]` | +| `create_time` | `model.created_at` | `2021-03-01T18:42:50.564685Z` | +| `update_time` | `model.updated_at` | `2022-01-27T10:21:26.121941Z` | +| `resource.owners[].urn` | `{project.administrators[]}` | `giga.chad@knowyourmeme.com` | +| `resource.owners[].email` | `{project.administrators[]}` | `giga.chad@knowyourmeme.com` | +| `lineage.upstreams` | [`[]Resource` upstreams](#resource-upstreams) | | +| `resource.labels` | `{"team": {project.team}, "stream": {project.stream} + project.labels` | `{"stream": "relevance","team": "search"}` | + +### `ModelVersion` + +A [`ModelVersion`][proton-modelversion] is used to represent each combination of +Merlin model's version and it's 'endpoint' destination. A single model version +will have an 'endpoint' for each environment it is deployed in. Please refer the +proto definitions for more information. + +| Field | Value | Sample Value | +|:----------------------------------|:---------------------------------------|:---------------------------------------------------------------------------------------------------| +| `status` | `model_version.status` | `running` | +| `version` | `model_version.id` | `11` | +| `attributes.endpoint_id` | `endpoint.id` | `187` | +| `attributes.mlflow_run_id` | `model_version.mlflow_run_id` | `3c7067f3770441ebbd66a0dce91b8724` | +| `attributes.mlflow_run_url` | `model_version.mlflow_url` | `http://mlflow.mycompany.com/#/experiments/721/runs/3c7067f3770441ebbd66a0dce91b8724` | +| `attributes.endpoint_url` | `endpoint.url` | `tensorflow-sample.integration-test.models.mycompany.com` | +| `attributes.version_endpoint_url` | `version_endpoint.url` | `http://tensorflow-sample-11.integration-test.models.mycompany.com/v1/models/tensorflow-sample-11` | +| `attributes.monitoring_url` | `version_endpoint.monitoring_url` | `https://grafana.mycompany.com/graph/d/z9MBKR1Az/model-version-dashboard?params` | +| `attributes.message` | `version_endpoint.message` | `timeout creating inference service` | +| `attributes.environment_name` | `endpoint.environment_name` | `aws-staging` | +| `attributes.deployment_mode` | `version_endpoint.deployment_mode` | `serverless` | +| `attributes.service_name` | `version_endpoint.service_name` | `tensorflow-sample-11-predictor-default.integration-test.models.mycompany.com` | +| `attributes.env_vars` | `version_endpoint.env_vars` | `{"INIT_HEAP_SIZE_IN_MB": "2250","WORKERS": "1"}` | +| `attributes.transformer` | `version_endpoint.transformer` | Attributes including `transformer.{enabled, type, image, command, args, env_vars}` | +| `attributes.weight` | `endpoint.rule.destinationsp[].weight` | `100` | +| `labels` | `model_version.labels` | | +| `create_time` | `model_version.created_at` | `2022-11-13T07:21:07.888150Z` | +| `update_time` | `model_version.updated_at` | `2022-11-13T07:21:07.888150Z` | + +### `Resource` upstreams + +The extractor currently has limited support for constructing the upstreams for +Model that utilises the env vars for `standard` transformer. It parses the +feature table specs that specify the project name and feature table name of the +[CaraML Store][caraml-store] Feature Table from the env vars. This information +is used to construct the upstreams for the model. + +| Field | Value | Sample Value | +|:----------|:---------------------------------------------------------------|:-----------------------------------------------------------| +| `urn` | `urn:caramlstore:{scope}:feature_table:{ft.project}.{ft.name}` | `urn:kafka:int-kafka.yonkou.io:topic:staging_30min_demand` | +| `type` | `feature_table` | `topic` | +| `service` | `caramlstore` | `kafka` | + +## Contributing + +Refer to the [contribution guidelines](../../../docs/docs/contribute/guide.md#adding-a-new-extractor) +for information on contributing to this module. + +[merlin]: https://github.com/gojek/merlin + +[merlin-swagger]: https://github.com/gojek/merlin/blob/v0.24.0/swagger.yaml + +[google-default-auth]: https://cloud.google.com/docs/authentication/production#automatically + +[proton-asset]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/asset.proto#L14 + +[proton-model]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/model.proto#L73 + +[proton-modelversion]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/model.proto#L31 + +[caraml-store]: https://github.com/caraml-dev/caraml-store diff --git a/plugins/extractors/merlin/internal/merlin/api_error.go b/plugins/extractors/merlin/internal/merlin/api_error.go new file mode 100644 index 000000000..0cd1b379e --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/api_error.go @@ -0,0 +1,17 @@ +package merlin + +import "fmt" + +type APIError struct { + Method string + Endpoint string + Status int + Msg string +} + +func (e *APIError) Error() string { + return fmt.Sprintf( + "[%s]: %s: unexpected response status '%d': %s", + e.Method, e.Endpoint, e.Status, e.Msg, + ) +} diff --git a/plugins/extractors/merlin/internal/merlin/merlin_client.go b/plugins/extractors/merlin/internal/merlin/merlin_client.go new file mode 100644 index 000000000..669522414 --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/merlin_client.go @@ -0,0 +1,189 @@ +package merlin + +import ( + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "regexp" + "time" + + "golang.org/x/oauth2" + "golang.org/x/oauth2/google" +) + +var authScopes = []string{"https://www.googleapis.com/auth/userinfo.email"} + +type Client struct { + urlb URLBuilderSource + http *http.Client + timeout time.Duration +} + +type ClientParams struct { + BaseURL string + ServiceAccountJSON []byte + Timeout time.Duration +} + +func NewClient(ctx context.Context, params ClientParams) (Client, error) { + httpClient, err := authenticatedClient(ctx, params.ServiceAccountJSON, authScopes...) + if err != nil { + return Client{}, fmt.Errorf("new Merlin client: %w", err) + } + + urlb, err := NewURLBuilderSource(params.BaseURL) + if err != nil { + return Client{}, fmt.Errorf("new Merlin client: %w", err) + } + + return Client{ + urlb: urlb, + http: httpClient, + timeout: params.Timeout, + }, nil +} + +func (c Client) Projects(ctx context.Context) ([]Project, error) { + ctx, cancel := context.WithTimeout(ctx, c.timeout) + defer cancel() + + u := c.urlb.New().Path("/v1/projects").URL() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return nil, fmt.Errorf("merlin client: fetch projects: new request: %w", err) + } + + var projects []Project + if err := c.exec(req, &projects); err != nil { + return nil, fmt.Errorf("merlin client: fetch projects: %w", err) + } + + return projects, nil +} + +func (c Client) Models(ctx context.Context, projectID int64) ([]Model, error) { + ctx, cancel := context.WithTimeout(ctx, c.timeout) + defer cancel() + + u := c.urlb.New(). + Path("/v1/projects/{projectID}/models"). + PathParamInt("projectID", projectID). + URL() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return nil, fmt.Errorf("merlin client: project ID '%d': fetch models: new request: %w", projectID, err) + } + + var models []Model + if err := c.exec(req, &models); err != nil { + return nil, fmt.Errorf("merlin client: project ID '%d': fetch models: %w", projectID, err) + } + + return models, nil +} + +func (c Client) ModelVersion(ctx context.Context, modelID, versionID int64) (ModelVersion, error) { + ctx, cancel := context.WithTimeout(ctx, c.timeout) + defer cancel() + + u := c.urlb.New(). + Path("/v1/models/{modelID}/versions/{versionID}"). + PathParamInt("modelID", modelID). + PathParamInt("versionID", versionID). + URL() + req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil) + if err != nil { + return ModelVersion{}, fmt.Errorf( + "merlin client: model ID '%d': fetch version '%d': new request: %w", modelID, versionID, err, + ) + } + + var result ModelVersion + if err := c.exec(req, &result); err != nil { + return ModelVersion{}, fmt.Errorf( + "merlin client: project ID '%d': fetch version '%d': %w", modelID, versionID, err, + ) + } + + return result, nil +} + +func (c Client) exec(req *http.Request, result interface{}) error { + resp, err := c.http.Do(req) + defer drainBody(resp) + if err != nil { + return fmt.Errorf("execute request: %w", err) + } + + if resp.StatusCode != http.StatusOK { + msg, err := failureMsg(resp) + if err != nil { + return err + } + + return &APIError{ + Method: req.Method, + Endpoint: req.URL.String(), + Status: resp.StatusCode, + Msg: msg, + } + } + + if err := json.NewDecoder(resp.Body).Decode(&result); err != nil { + return fmt.Errorf("decode response: %w", err) + } + + return nil +} + +func authenticatedClient(ctx context.Context, serviceAccountJSON []byte, scopes ...string) (*http.Client, error) { + if len(serviceAccountJSON) == 0 { + return google.DefaultClient(ctx, scopes...) + } + + creds, err := google.CredentialsFromJSON(ctx, serviceAccountJSON, authScopes...) + if err != nil { + return nil, fmt.Errorf("google credentials from JSON: %w", err) + } + + return oauth2.NewClient(ctx, creds.TokenSource), nil +} + +// drainBody drains and closes the response body to avoid the following +// gotcha: +// http://devs.cloudimmunity.com/gotchas-and-common-mistakes-in-go-golang/index.html#close_http_resp_body +func drainBody(resp *http.Response) { + if resp == nil { + return + } + + _, _ = io.Copy(io.Discard, resp.Body) + _ = resp.Body.Close() +} + +func failureMsg(resp *http.Response) (string, error) { + data, err := io.ReadAll(resp.Body) + if err != nil { + return "", fmt.Errorf("read response body: %w", err) + } + + if !isJSONContent(resp.Header.Get("Content-Type")) || !json.Valid(data) { + return (string)(data), nil + } + + var body struct { + Error string `json:"error"` + } + if err := json.Unmarshal(data, &body); err != nil { + return "", fmt.Errorf("decode failure response: %w", err) + } + + return body.Error, nil +} + +// Source: https://github.com/go-resty/resty/blob/v2.2.0/client.go#L64 +var jsonCheck = regexp.MustCompile(`(?i:(application|text)/(json|.*\+json|json\-.*)(;|$))`) + +func isJSONContent(ct string) bool { return jsonCheck.MatchString(ct) } diff --git a/plugins/extractors/merlin/internal/merlin/merlin_client_test.go b/plugins/extractors/merlin/internal/merlin/merlin_client_test.go new file mode 100644 index 000000000..e42abd84f --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/merlin_client_test.go @@ -0,0 +1,575 @@ +//go:build plugins +// +build plugins + +package merlin + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + testutils "github.com/odpf/meteor/test/utils" + "github.com/stretchr/testify/assert" + "golang.org/x/oauth2" +) + +var ( + ctx = context.Background() + credsJSON = []byte(`{"type":"service_account","project_id":"company-data-platform","private_key_id":"698vxv308w3i68p938040bz817r95b1e0k4kmvqs","private_key":"-----BEGIN PRIVATE KEY-----\nMIIEoQIBAAKCAQBVzQ0WPuaqdwMNapCGKdKUR/MOgWNByruT60SJwd5lY/2Sjx1w\nQ4sJ6xk/+Tz7bT3CgNBAPQ+rZfLD2fdQJIBeYElRcHw6a2PA/6TaX2e4qq0+5xk3\ngnItlqZm0hQElZd76LNlMcItHNmneLICowTOdzl0hUd2IgrqLB545v3KOGfwoEAp\nz3mPm/iF1+zTPWy041w7ajvWK2N3mRygKoP79ne2gDuN2+QHmW8wPFzQ3pdQZU65\n1npgP9N4wRHQT8vowTUSYdSRZG1p1MPKCXsrqhMUv7yLdrOcacAvcZqYOcMhJQ1p\nbYpsinDR65ARDduMeKoEUkFb3hf2zPUcdYNhAgMBAAECggEAQldOxCGUlr94o7n+\nz02tHavYGiIfDfLkQIYLs3wsKjc7DEQOHgyLh/q4xkc/SKR5uVeCLflIkV09bQOu\nftAKVW6bohWYaE86jTLdU1+rQhTt6ZIkZFA/WlJ+jUfn5HeJ7mvJsffcTKde/2eK\nNBG6GK4Exbx7ubKuv8unMBJiryUycioPykWZEVYl72+0IBsKCQOX39Fd/pgJF9jL\nFPelgCsrvPA/3lodgQu3m8VENlu4G6z3kPQghAvI37xC9NlUNVvx1yxCukQhf0zQ\nQ55kUTwgZ9sIGGcI/2K6H1YHv+m3vnM5D5iL9eTHn1HnlGtplQJhmhKjCxXIpbHx\nQToOwQKBgQCcEZP6H3nq3eH7d5ro1fvA6YEoERfzIzaU4Kk3Sb9e1tXjYSz8ccNv\nK3gZsHV2YZy3q9mCYnc0oPwwx5dSwhzpOrBrwvyopPbkKpD9WCXtZtRkwRTN7CXR\nE+2eSSpu2y14SKysPQoDZmyJo8bs7rseLQTiZeUPlYdlP6adOGSX+QKBgQCMvVqE\n6nbX41DcLJuUxT026T9zncnpRu3gkfyY0O5QF8/Vcq6y5LxdQtyMNbcbkDY8isAM\nwTP4KaXPul38TOCjfG3MODDbzmeQ27qKL/9Ueyi812BN4XIrpguoPKgFtlyi1JNH\nZiUtimedOoNG4LuuDEqeNyW1Qm/WlQu5fqKwqQKBgGscuVW6Ep+6RuWisePJMO62\nk9ke2jQZ39UP17NFXx1FDyjuQcTEg2AiElx3OjbUSY3ZWP/eenfZYRxNb7Lx3IvJ\nptleyq8oAPaZrEbkH6uunmjEB3ZI869qIPQ4vPG2ZZ+fKTtQ7TVmL2nLyLRGKJBO\nT4LecfZfJry7katnz8ppAoGAI0FXyI33YVNHMTBXdOgH0paRV4QCTVaARk4rqZhE\n6nlcjcqhqpyT9wTFvLXD/bqda4MSYt+PBi5go+26l3Ymm62Sz6KP0rAcz3PLgcxO\nOLp1VQDa1geQkxCQQP+Y032ALSX1EuCqlYLjO8aplfq76PiZRJLp9kMDQwypGDl5\nxakCgYAm7pO0LA/hTvdrZ7zGUIfTTZxf1qD+W0iUh2MtyaZM9uQhDoaahf7f2TT/\nt2+wlyIlHMdUxfDYf8U5owl9IysqaPMZsQmYNgYmXpW8/AhNcKFnslyrtd57Of3C\nlFHpNwfjNlxDTsql2kWbcwJbY0EblPRItplE7gDlUvfgSNTj+g==\n-----END PRIVATE KEY-----\n","client_email":"systems-meteor@company-data-platform.iam.gserviceaccount.com","client_id":"043161688880430795893","auth_uri":"https://accounts.google.com/o/oauth2/auth","token_uri":"https://oauth2.googleapis.com/token","auth_provider_x509_cert_url":"https://www.googleapis.com/oauth2/v1/certs","client_x509_cert_url":"https://www.googleapis.com/robot/v1/metadata/x509/systems-meteor%40company-data-platform.iam.gserviceaccount.com"}`) +) + +func TestNewClient(t *testing.T) { + cases := []struct { + name string + params ClientParams + errStr string + }{ + { + name: "Valid", + params: ClientParams{ + BaseURL: "http://company.com/api/merlin/", + ServiceAccountJSON: credsJSON, + }, + }, + { + name: "WithoutCredentials", + params: ClientParams{BaseURL: "http://company.com/api/merlin/"}, + errStr: "google: could not find default credentials", + }, + { + name: "WithInvalidCredentials", + params: ClientParams{ + BaseURL: "http://company.com/api/merlin/", + ServiceAccountJSON: []byte(`{"chuck": "norris"}`), + }, + errStr: "google credentials from JSON: missing 'type' field in credentials", + }, + { + name: "WithInvalidURL", + params: ClientParams{ + BaseURL: "Gintama - Yorozuya Gin-chan", + ServiceAccountJSON: credsJSON, + }, + errStr: `invalid input: parse "http://Gintama - Yorozuya Gin-chan": invalid character " " in host name`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + _, err := NewClient(ctx, tc.params) + if tc.errStr != "" { + assert.ErrorContains(t, err, tc.errStr) + } else { + assert.NoError(t, err) + } + }) + } +} + +func TestProjects(t *testing.T) { + cases := []struct { + name string + status int + response interface{} + expected []Project + expectedErr error + }{ + { + name: "Valid", + status: http.StatusOK, + response: `[{"id":1,"name":"one-piece","mlflow_tracking_url":"http://mlflow.company.com","administrators":["s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com","gol.d.roger@onepiece.com"],"readers":null,"team":"pirates","stream":"roger","labels":null,"created_at":"2019-11-19T11:05:11.41501Z","updated_at":"2022-06-23T09:31:25.834714Z"},{"id":100,"name":"strongest-man","mlflow_tracking_url":"http://mlflow.company.com","administrators":["edward.newgate@onepiece.com"],"readers":["ace.d.portgas@onepiece.com"],"team":"pirates","stream":"whitebeard","labels":null,"created_at":"2021-09-20T05:20:53.540571Z","updated_at":"2021-09-20T05:20:53.540571Z"},{"id":200,"name":"kurohige","mlflow_tracking_url":"http://mlflow.company.com","administrators":["teach.d.marshall@onepiece.com","s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com"],"readers":["jesus.burgess@gojek.com"],"team":"pirates","stream":"blackbeard","labels":null,"created_at":"2021-10-29T02:12:43.142433Z","updated_at":"2022-05-08T17:56:32.924721Z"}]`, + expected: []Project{ + { + ID: 1, + Name: "one-piece", + MlflowTrackingURL: "http://mlflow.company.com", + Administrators: []string{"s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com", "gol.d.roger@onepiece.com"}, + Team: "pirates", + Stream: "roger", + CreatedAt: "2019-11-19T11:05:11.41501Z", + UpdatedAt: "2022-06-23T09:31:25.834714Z", + }, + { + ID: 100, + Name: "strongest-man", + MlflowTrackingURL: "http://mlflow.company.com", + Administrators: []string{"edward.newgate@onepiece.com"}, + Readers: []string{"ace.d.portgas@onepiece.com"}, + Team: "pirates", + Stream: "whitebeard", + CreatedAt: "2021-09-20T05:20:53.540571Z", + UpdatedAt: "2021-09-20T05:20:53.540571Z", + }, + { + ID: 200, + Name: "kurohige", + MlflowTrackingURL: "http://mlflow.company.com", + Administrators: []string{"teach.d.marshall@onepiece.com", "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com"}, + Readers: []string{"jesus.burgess@gojek.com"}, + Team: "pirates", + Stream: "blackbeard", + CreatedAt: "2021-10-29T02:12:43.142433Z", + UpdatedAt: "2022-05-08T17:56:32.924721Z", + }, + }, + }, + { + name: "ValidWithNoProjects", + status: http.StatusOK, + response: `[]`, + expected: []Project{}, + }, + { + name: "Unavailable", + status: http.StatusServiceUnavailable, + response: `Service unavailable, go away`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/projects", + Status: http.StatusServiceUnavailable, + Msg: "Service unavailable, go away", + }, + }, + { + name: "UnavailableWithJSONResponse", + status: http.StatusServiceUnavailable, + response: `{"error": "Service unavailable, go away"}`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/projects", + Status: http.StatusServiceUnavailable, + Msg: "Service unavailable, go away", + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + token := "MyIncrediblyPowerfulAccessToken" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, r.Method, http.MethodGet) + assert.Equal(t, r.URL.Path, "/api/merlin/v1/projects") + assert.Equal(t, r.Header.Get("Authorization"), "Bearer "+token) + + testutils.Respond(t, w, tc.status, tc.response) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, token), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: 1 * time.Second, + }) + assert.NoError(t, err) + + projects, err := c.Projects(ctx) + if tc.expectedErr != nil { + assert.ErrorIs(t, err, tc.expectedErr) + assert.Nil(t, projects) + return + } + + assert.NoError(t, err) + assert.Equal(t, tc.expected, projects) + }) + } + + t.Run("Timeout", func(t *testing.T) { + timeout := 100 * time.Millisecond + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(timeout * 2) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, "MyIncrediblyPowerfulAccessToken"), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: timeout, + }) + assert.NoError(t, err) + + _, err = c.Projects(ctx) + assert.ErrorIs(t, err, context.DeadlineExceeded) + }) +} + +func TestModels(t *testing.T) { + cases := []struct { + name string + projectID int64 + status int + response interface{} + expected []Model + expectedErr error + assertErr func(*testing.T, error) + }{ + { + name: "Valid", + projectID: 9999, + status: http.StatusOK, + response: `[{"id":12,"project_id":1,"mlflow_experiment_id":12,"name":"model.bst","type":"xgboost","mlflow_url":"http://mlflow.company.com/#/experiments/12","endpoints":[],"created_at":"2020-01-10T08:18:46.887143Z","updated_at":"2020-01-10T08:18:46.887143Z"},{"id":80,"project_id":1,"mlflow_experiment_id":83,"name":"resource-request","type":"sklearn","mlflow_url":"http://mlflow.company.com/#/experiments/83","endpoints":[{"id":450,"status":"terminated","url":"resource-request.sample.models.company.com","rule":{"destinations":[{"version_endpoint_id":"ad247eca-6e37-4d7f-90ba-f482890b5896","version_endpoint":{"id":"ad247eca-6e37-4d7f-90ba-f482890b5896","version_id":2,"status":"running","url":"http://resource-request-2.sample.models.company.com/v1/models/resource-request-2","service_name":"resource-request-2-predictor-default.sample.models.company.com","environment_name":"staging","monitoring_url":"","message":"","env_vars":null,"transformer":{"enabled":false,"transformer_type":"custom","image":"","command":"","args":"","env_vars":[],"created_at":"2022-05-30T07:52:32.790496Z","updated_at":"2022-05-30T07:52:51.047734Z"},"deployment_mode":"","created_at":"2022-05-30T07:43:21.252389Z","updated_at":"2022-05-30T07:52:51.046666Z"},"weight":100}]},"environment_name":"staging","created_at":"2022-05-30T07:54:04.137016Z","updated_at":"2022-06-14T07:12:09.538048Z"}],"created_at":"2020-04-23T10:07:40.105711Z","updated_at":"2022-06-14T07:12:09.536419Z"},{"id":689,"project_id":1,"mlflow_experiment_id":692,"name":"pyfunc-standard-transfo","type":"pyfunc","mlflow_url":"http://mlflow.company.com/#/experiments/692","endpoints":[{"id":174,"status":"terminated","url":"pyfunc-standard-transfo.sample.models.company.com","rule":{"destinations":[{"version_endpoint_id":"b53752c9-a4cb-48ac-b955-de468b75b2eb","version_endpoint":{"id":"b53752c9-a4cb-48ac-b955-de468b75b2eb","version_id":7,"status":"running","url":"http://pyfunc-standard-transfo-7.sample.models.company.com/v1/models/pyfunc-standard-transfo-7","service_name":"pyfunc-standard-transfo-7-predictor-default.sample.models.company.com","environment_name":"staging","monitoring_url":"","message":"","env_vars":[{"name":"MODEL_NAME","value":"pyfunc-standard-transfo-7"},{"name":"MODEL_DIR","value":"gs://mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model"},{"name":"WORKERS","value":"1"}],"transformer":{"enabled":true,"transformer_type":"standard","image":"asia.gcr.io/company-staging/merlin-transformer:v0.10.0-rc2","command":"","args":"","env_vars":[{"name":"LOG_LEVEL","value":"DEBUG"},{"name":"STANDARD_TRANSFORMER_CONFIG","value":"{\"transformerConfig\":{\"feast\":[{\"project\":\"default\",\"entities\":[{\"name\":\"merchant_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.merchants[*].id\"}],\"features\":[{\"name\":\"merchant_t1_discovery:t1_estimate\",\"valueType\":\"DOUBLE\",\"defaultValue\":\"0\"}]}]}}"},{"name":"FEAST_FEATURE_STATUS_MONITORING_ENABLED","value":"true"},{"name":"FEAST_FEATURE_VALUE_MONITORING_ENABLED","value":"true"}],"created_at":"2021-02-05T05:26:42.759879Z","updated_at":"2021-02-05T05:40:20.092802Z"},"deployment_mode":"","created_at":"2021-02-05T05:26:42.768235Z","updated_at":"2021-02-05T05:40:20.091784Z"},"weight":100}]},"environment_name":"staging","created_at":"2021-02-05T08:33:26.204561Z","updated_at":"2021-07-22T08:13:34.64483Z"}],"created_at":"2021-02-04T13:05:31.593956Z","updated_at":"2021-07-22T08:13:34.642949Z"}]`, + expected: []Model{ + { + ID: 12, + ProjectID: 1, + MlflowExperimentID: 12, + Name: "model.bst", + Type: "xgboost", + MlflowURL: "http://mlflow.company.com/#/experiments/12", + Endpoints: []ModelEndpoint{}, + CreatedAt: time.Date(2020, time.January, 10, 8, 18, 46, 887143000, time.UTC), + UpdatedAt: time.Date(2020, time.January, 10, 8, 18, 46, 887143000, time.UTC), + }, + { + ID: 80, + ProjectID: 1, + MlflowExperimentID: 83, + Name: "resource-request", + Type: "sklearn", + MlflowURL: "http://mlflow.company.com/#/experiments/83", + Endpoints: []ModelEndpoint{{ + ID: 450, + Status: "terminated", + URL: "resource-request.sample.models.company.com", + Rule: ModelEndpointRule{Destinations: []ModelEndpointRuleDestination{{ + VersionEndpointID: "ad247eca-6e37-4d7f-90ba-f482890b5896", + VersionEndpoint: &VersionEndpoint{ + ID: "ad247eca-6e37-4d7f-90ba-f482890b5896", + VersionID: 2, + Status: "running", + URL: "http://resource-request-2.sample.models.company.com/v1/models/resource-request-2", + ServiceName: "resource-request-2-predictor-default.sample.models.company.com", + EnvironmentName: "staging", + Transformer: Transformer{ + TransformerType: "custom", + EnvVars: []EnvVar{}, + CreatedAt: time.Date(2022, time.May, 30, 7, 52, 32, 790496000, time.UTC), + UpdatedAt: time.Date(2022, time.May, 30, 7, 52, 51, 47734000, time.UTC), + }, + CreatedAt: time.Date(2022, time.May, 30, 7, 43, 21, 252389000, time.UTC), + UpdatedAt: time.Date(2022, time.May, 30, 7, 52, 51, 46666000, time.UTC), + }, + Weight: 100, + }}}, + EnvironmentName: "staging", + CreatedAt: time.Date(2022, time.May, 30, 7, 54, 4, 137016000, time.UTC), + UpdatedAt: time.Date(2022, time.June, 14, 7, 12, 9, 538048000, time.UTC), + }}, + CreatedAt: time.Date(2020, time.April, 23, 10, 7, 40, 105711000, time.UTC), + UpdatedAt: time.Date(2022, time.June, 14, 7, 12, 9, 536419000, time.UTC), + }, + { + ID: 689, + ProjectID: 1, + MlflowExperimentID: 692, + Name: "pyfunc-standard-transfo", + Type: "pyfunc", + MlflowURL: "http://mlflow.company.com/#/experiments/692", + Endpoints: []ModelEndpoint{{ + ID: 174, + Status: "terminated", + URL: "pyfunc-standard-transfo.sample.models.company.com", + Rule: ModelEndpointRule{Destinations: []ModelEndpointRuleDestination{{ + VersionEndpointID: "b53752c9-a4cb-48ac-b955-de468b75b2eb", + VersionEndpoint: &VersionEndpoint{ + ID: "b53752c9-a4cb-48ac-b955-de468b75b2eb", + VersionID: 7, + Status: "running", + URL: "http://pyfunc-standard-transfo-7.sample.models.company.com/v1/models/pyfunc-standard-transfo-7", + ServiceName: "pyfunc-standard-transfo-7-predictor-default.sample.models.company.com", + EnvironmentName: "staging", + EnvVars: []EnvVar{ + {Name: "MODEL_NAME", Value: "pyfunc-standard-transfo-7"}, + {Name: "MODEL_DIR", Value: "gs://mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model"}, + {Name: "WORKERS", Value: "1"}, + }, + Transformer: Transformer{ + Enabled: true, + TransformerType: "standard", + Image: "asia.gcr.io/company-staging/merlin-transformer:v0.10.0-rc2", + EnvVars: []EnvVar{ + {Name: "LOG_LEVEL", Value: "DEBUG"}, + {Name: "STANDARD_TRANSFORMER_CONFIG", Value: `{"transformerConfig":{"feast":[{"project":"default","entities":[{"name":"merchant_id","valueType":"STRING","jsonPath":"$.merchants[*].id"}],"features":[{"name":"merchant_t1_discovery:t1_estimate","valueType":"DOUBLE","defaultValue":"0"}]}]}}`}, + {Name: "FEAST_FEATURE_STATUS_MONITORING_ENABLED", Value: "true"}, + {Name: "FEAST_FEATURE_VALUE_MONITORING_ENABLED", Value: "true"}, + }, + CreatedAt: time.Date(2021, time.February, 5, 5, 26, 42, 759879000, time.UTC), + UpdatedAt: time.Date(2021, time.February, 5, 5, 40, 20, 92802000, time.UTC), + }, + CreatedAt: time.Date(2021, time.February, 5, 5, 26, 42, 768235000, time.UTC), + UpdatedAt: time.Date(2021, time.February, 5, 5, 40, 20, 91784000, time.UTC), + }, + Weight: 100, + }}}, + EnvironmentName: "staging", + CreatedAt: time.Date(2021, time.February, 5, 8, 33, 26, 204561000, time.UTC), + UpdatedAt: time.Date(2021, time.July, 22, 8, 13, 34, 644830000, time.UTC), + }}, + CreatedAt: time.Date(2021, time.February, 4, 13, 5, 31, 593956000, time.UTC), + UpdatedAt: time.Date(2021, time.July, 22, 8, 13, 34, 642949000, time.UTC), + }, + }, + }, + { + name: "ValidWithNoModels", + projectID: 876, + status: http.StatusOK, + response: `[]`, + expected: []Model{}, + }, + { + name: "NotFound", + projectID: 808, + status: http.StatusNotFound, + response: `Project with project_id 808 not found`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/projects/808/models", + Status: http.StatusNotFound, + Msg: "Project with project_id 808 not found", + }, + }, + { + name: "Unavailable", + projectID: 503, + status: http.StatusServiceUnavailable, + response: `{"error": "Service unavailable, go away"}`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/projects/503/models", + Status: http.StatusServiceUnavailable, + Msg: "Service unavailable, go away", + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + token := "MyIncrediblyPowerfulAccessToken" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, r.Method, http.MethodGet) + assert.Equal(t, r.URL.Path, fmt.Sprintf("/api/merlin/v1/projects/%d/models", tc.projectID)) + assert.Equal(t, r.Header.Get("Authorization"), "Bearer "+token) + + testutils.Respond(t, w, tc.status, tc.response) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, token), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: 1 * time.Second, + }) + assert.NoError(t, err) + + models, err := c.Models(ctx, tc.projectID) + if tc.expectedErr != nil { + assert.ErrorIs(t, err, tc.expectedErr) + assert.Nil(t, models) + return + } + + assert.NoError(t, err) + assert.Equal(t, tc.expected, models) + }) + } + + t.Run("Timeout", func(t *testing.T) { + timeout := 100 * time.Millisecond + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(timeout * 2) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, "MyIncrediblyPowerfulAccessToken"), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: timeout, + }) + assert.NoError(t, err) + + _, err = c.Models(ctx, 10) + assert.ErrorIs(t, err, context.DeadlineExceeded) + }) +} + +func TestModelVersion(t *testing.T) { + cases := []struct { + name string + modelID int64 + versionID int64 + status int + response interface{} + expected ModelVersion + expectedErr error + assertErr func(*testing.T, error) + }{ + { + name: "Valid", + modelID: 9999, + status: http.StatusOK, + response: `{"id":2,"model_id":80,"mlflow_run_id":"040ewv8v3b9kpb1rs9i388z86kv7m53x","mlflow_url":"http://mlflow.company.com/#/experiments/83/runs/040ewv8v3b9kpb1rs9i388z86kv7m53x","endpoints":[{"id":"ad247eca-6e37-4d7f-90ba-f482890b5896","version_id":2,"status":"terminated","url":"http://resource-request-2.sample.models.company.com/v1/models/resource-request-2","service_name":"resource-request-2-predictor-default.sample.models.company.com","environment_name":"id-staging","message":"","env_vars":null,"transformer":{"enabled":false,"transformer_type":"custom","image":"","command":"","args":"","env_vars":[],"created_at":"2022-05-30T07:52:32.790496Z","updated_at":"2022-06-14T07:12:13.513532Z"},"deployment_mode":"serverless","created_at":"2022-05-30T07:43:21.252389Z","updated_at":"2022-06-14T07:12:13.512746Z"}],"labels":null,"created_at":"2022-05-30T07:43:18.120248Z","updated_at":"2022-05-30T07:43:18.120248Z"}`, + expected: ModelVersion{ + ID: 2, + ModelD: 80, + MlflowRunID: "040ewv8v3b9kpb1rs9i388z86kv7m53x", + MlflowURL: "http://mlflow.company.com/#/experiments/83/runs/040ewv8v3b9kpb1rs9i388z86kv7m53x", + Endpoints: []VersionEndpoint{{ + ID: "ad247eca-6e37-4d7f-90ba-f482890b5896", + VersionID: 2, + Status: "terminated", + URL: "http://resource-request-2.sample.models.company.com/v1/models/resource-request-2", + ServiceName: "resource-request-2-predictor-default.sample.models.company.com", + EnvironmentName: "id-staging", + EnvVars: []EnvVar(nil), + Transformer: Transformer{ + TransformerType: "custom", + EnvVars: []EnvVar{}, + CreatedAt: time.Date(2022, time.May, 30, 7, 52, 32, 790496000, time.UTC), + UpdatedAt: time.Date(2022, time.June, 14, 7, 12, 13, 513532000, time.UTC), + }, + DeploymentMode: "serverless", + CreatedAt: time.Date(2022, time.May, 30, 7, 43, 21, 252389000, time.UTC), + UpdatedAt: time.Date(2022, time.June, 14, 7, 12, 13, 512746000, time.UTC), + }}, + Labels: map[string]string(nil), + CreatedAt: time.Date(2022, time.May, 30, 7, 43, 18, 120248000, time.UTC), + UpdatedAt: time.Date(2022, time.May, 30, 7, 43, 18, 120248000, time.UTC), + }, + }, + { + name: "NotFound", + modelID: 808, + versionID: 202, + status: http.StatusNotFound, + response: `{"error":"Model version 808 for version 202"}`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/models/808/versions/202", + Status: http.StatusNotFound, + Msg: "Model version 808 for version 202", + }, + }, + { + name: "Unavailable", + modelID: 503, + versionID: 3, + status: http.StatusServiceUnavailable, + response: `Service unavailable, go away`, + expectedErr: &APIError{ + Method: http.MethodGet, + Endpoint: "/api/merlin/v1/models/503/versions/3", + Status: http.StatusServiceUnavailable, + Msg: "Service unavailable, go away", + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + token := "MyIncrediblyPowerfulAccessToken" + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + assert.Equal(t, r.Method, http.MethodGet) + assert.Equal(t, r.URL.Path, fmt.Sprintf( + "/api/merlin/v1/models/%d/versions/%d", tc.modelID, tc.versionID, + )) + assert.Equal(t, r.Header.Get("Authorization"), "Bearer "+token) + + testutils.Respond(t, w, tc.status, tc.response) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, token), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: 1 * time.Second, + }) + assert.NoError(t, err) + + mdlv, err := c.ModelVersion(ctx, tc.modelID, tc.versionID) + if tc.expectedErr != nil { + assert.ErrorIs(t, err, tc.expectedErr) + assert.Zero(t, mdlv) + return + } + + assert.NoError(t, err) + assert.Equal(t, tc.expected, mdlv) + }) + } + + t.Run("Timeout", func(t *testing.T) { + timeout := 100 * time.Millisecond + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + time.Sleep(timeout * 2) + })) + defer srv.Close() + + c, err := NewClient(ctxWithClient(t, "MyIncrediblyPowerfulAccessToken"), ClientParams{ + BaseURL: srv.URL + "/api/merlin", + ServiceAccountJSON: credsJSON, + Timeout: timeout, + }) + assert.NoError(t, err) + + _, err = c.ModelVersion(ctx, 1, 2) + assert.ErrorIs(t, err, context.DeadlineExceeded) + }) +} + +func ctxWithClient(t *testing.T, token string) context.Context { + return context.WithValue(ctx, oauth2.HTTPClient, &http.Client{ + Transport: mockOauthRoundTripper{ + T: t, + AccessToken: token, + Base: http.DefaultTransport, + }, + }) +} + +type mockOauthRoundTripper struct { + T *testing.T + AccessToken string + Base http.RoundTripper +} + +func (m mockOauthRoundTripper) RoundTrip(req *http.Request) (*http.Response, error) { + if m.match(req) { + return &http.Response{ + Status: http.StatusText(http.StatusOK), + StatusCode: http.StatusOK, + Proto: "HTTP/1.1", + ProtoMajor: 1, + ProtoMinor: 1, + Header: make(http.Header), + Body: testutils.ValueAsJSONReader(m.T, map[string]interface{}{ + "access_token": m.AccessToken, + "expires_in": 3599, + "token_type": "Bearer", + }), + Uncompressed: true, + }, nil + } + + return m.Base.RoundTrip(req) +} + +func (m mockOauthRoundTripper) match(r *http.Request) bool { + return r.Method == http.MethodPost && + r.URL.Host == "oauth2.googleapis.com" && + r.URL.Path == "/token" +} + +func (e *APIError) Is(err error) bool { + // Override comparison just in tests to simplify matching without host in + // the endpoint. + other, ok := err.(*APIError) + if !ok { + return false + } + + return e.Method == other.Method && + strings.Contains(e.Endpoint, other.Endpoint) && + e.Status == other.Status && + e.Msg == other.Msg +} diff --git a/plugins/extractors/merlin/internal/merlin/model.go b/plugins/extractors/merlin/internal/merlin/model.go new file mode 100644 index 000000000..3af9045af --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/model.go @@ -0,0 +1,75 @@ +package merlin + +import "time" + +type Model struct { + ID int64 `json:"id"` + ProjectID int64 `json:"project_id"` + MlflowExperimentID int64 `json:"mlflow_experiment_id"` + Name string `json:"name"` + Type string `json:"type"` + MlflowURL string `json:"mlflow_url"` + Endpoints []ModelEndpoint `json:"endpoints"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` +} + +type ModelEndpoint struct { + ID int64 `json:"id"` + Status string `json:"status"` // pending/running/serving/failed/terminated + URL string `json:"url"` + Rule ModelEndpointRule `json:"rule"` + EnvironmentName string `json:"environment_name"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + // ModelID int64 `json:"model_id"` + // Environment *Environment `json:"environment"` +} + +type ModelEndpointRule struct { + Destinations []ModelEndpointRuleDestination `json:"destinations"` + // Mirror *VersionEndpoint `json:"mirror"` +} + +type ModelEndpointRuleDestination struct { + VersionEndpointID string `json:"version_endpoint_id"` + VersionEndpoint *VersionEndpoint `json:"version_endpoint"` + Weight int64 `json:"weight"` +} + +type VersionEndpoint struct { + ID string `json:"id"` + VersionID int64 `json:"version_id"` + Status string `json:"status"` // pending/running/serving/failed/terminated + URL string `json:"url"` + ServiceName string `json:"service_name"` + EnvironmentName string `json:"environment_name"` + MonitoringURL string `json:"monitoring_url"` + Message string `json:"message"` + EnvVars []EnvVar `json:"env_vars"` + Transformer Transformer `json:"transformer"` + DeploymentMode string `json:"deployment_mode"` // serverless/raw_deployment + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + // Environment *Environment `json:"environment"` + // ResourceRequest *ResourceRequest `json:"resource_request"` + // AutoscalingPolicy *AutoscalingPolicy `json:"autoscaling_policy"` + // Logger *Logger `json:"logger"` +} + +type EnvVar struct { + Name string `json:"name"` + Value string `json:"value"` +} + +type Transformer struct { + Enabled bool `json:"enabled"` + TransformerType string `json:"transformer_type"` + Image string `json:"image"` + Command string `json:"command"` + Args string `json:"args"` + EnvVars []EnvVar `json:"env_vars"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + // ResourceRequest *ResourceRequest `json:"resource_request"` +} diff --git a/plugins/extractors/merlin/internal/merlin/model_version.go b/plugins/extractors/merlin/internal/merlin/model_version.go new file mode 100644 index 000000000..195d41e20 --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/model_version.go @@ -0,0 +1,17 @@ +package merlin + +import "time" + +type ModelVersion struct { + ID int64 `json:"id"` + ModelD int64 `json:"model_id"` + MlflowRunID string `json:"mlflow_run_id"` + MlflowURL string `json:"mlflow_url"` + Endpoints []VersionEndpoint `json:"endpoints"` + Labels map[string]string `json:"labels"` + CreatedAt time.Time `json:"created_at"` + UpdatedAt time.Time `json:"updated_at"` + // ArtifactURI string `json:"artifact_uri"` + // Properties interface{} `json:"properties"` + // CustomPredictor *CustomPredictor `json:"custom_predictor"` +} diff --git a/plugins/extractors/merlin/internal/merlin/project.go b/plugins/extractors/merlin/internal/merlin/project.go new file mode 100644 index 000000000..c5a3c8a98 --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/project.go @@ -0,0 +1,19 @@ +package merlin + +type Project struct { + ID int64 `json:"id"` + Name string `json:"name"` + MlflowTrackingURL string `json:"mlflow_tracking_url"` + Administrators []string `json:"administrators"` // List of emails + Readers []string `json:"readers"` + Team string `json:"team"` + Stream string `json:"stream"` + Labels []Label `json:"labels"` + CreatedAt string `json:"created_at"` + UpdatedAt string `json:"updated_at"` +} + +type Label struct { + Key string `json:"key"` + Value string `json:"value"` +} diff --git a/plugins/extractors/merlin/internal/merlin/url_builder.go b/plugins/extractors/merlin/internal/merlin/url_builder.go new file mode 100644 index 000000000..6a166f6dc --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/url_builder.go @@ -0,0 +1,199 @@ +package merlin + +import ( + "fmt" + "net/url" + "path" + "strconv" + "strings" +) + +// URLBuilderSource is used to create URLBuilder instances. +// +// The URLBuilder created using URLBuilderSource will include the base +// URL and query parameters present on the URLBuilderSource instance. +// +// Ideally, URLBuilderSource instance should be created only once for a +// given base URL. +type URLBuilderSource struct { + base *url.URL + qry url.Values +} + +// NewURLBuilderSource builds a URLBuilderSource instance by parsing the +// baseURL. +// +// The baseURL is expected to specify the host. If no scheme is +// specified, it defaults to http scheme. +func NewURLBuilderSource(baseURL string) (URLBuilderSource, error) { + u, err := url.Parse(baseURL) + if err != nil { + return URLBuilderSource{}, fmt.Errorf("new url builder: invalid input: %w", err) + } + + if u.Scheme == "" { + return NewURLBuilderSource("http://" + baseURL) + } + + return URLBuilderSource{ + base: u, + qry: u.Query(), + }, nil +} + +// New creates a new instance of URLBuilder with the base URL +// and query parameters carried over from URLBuilderSource. +func (b URLBuilderSource) New() *URLBuilder { + u := *b.base // create a copy + return &URLBuilder{ + url: &u, + pathParams: make(map[string]string), + qry: urlValuesCopy(b.qry), + } +} + +// URLBuilder is used to build a URL. +// +// URLBuilderSource should be used to create an instance of URLBuilder. +// +// b, err := httputil.NewURLBuilderSource("https://api.example.com/") +// if err != nil { +// // handle error +// } +// +// u := b.New(). +// Path("/users/{id}/posts"). +// PathParam("id", id). +// QueryParam("limit", limit). +// QueryParam("offset", offset). +// URL() +// +// // { id: 123, limit: 10, offset: 120 } +// // https://api.example.com/users/123/posts?limit=10&offset=120 +// +// r, err := http.NewRequestWithContext(context.Background(), http.MethodGet, u.String(), nil) +// if err != nil { +// // handle error +// } +// +// // send HTTP request. +type URLBuilder struct { + url *url.URL + path string + pathParams map[string]string + qry url.Values +} + +// Path sets the path template for the URL. +// +// Path parameters of the format "{paramName}" are supported and can be +// substituted using PathParam*. +func (u *URLBuilder) Path(p string) *URLBuilder { + u.path = p + return u +} + +// PathParam sets the path parameter name and value which needs to be +// substituted in the path template. Substitution happens when the URL +// is built using URLBuilder.URL() +func (u *URLBuilder) PathParam(name, value string) *URLBuilder { + u.pathParams[name] = value + return u +} + +// PathParamInt sets the path parameter name and value which needs to be +// substituted in the path template. Substitution happens when the URL +// is built using URLBuilder.URL() +func (u *URLBuilder) PathParamInt(name string, value int64) *URLBuilder { + u.pathParams[name] = strconv.FormatInt(value, 10) + return u +} + +// PathParams sets the path parameter names and values which need to be +// substituted in the path template. Substitution happens when the URL +// is built using URLBuilder.URL() +func (u *URLBuilder) PathParams(params map[string]string) *URLBuilder { + for name, value := range params { + u.pathParams[name] = value + } + return u +} + +// QueryParam sets the query parameter with the given values. If a value +// was previously set, it is replaced. +func (u *URLBuilder) QueryParam(key string, values ...string) *URLBuilder { + u.qry.Del(key) + for _, v := range values { + u.qry.Add(key, v) + } + return u +} + +// QueryParamInt sets the query parameter with the given values. If a +// value was previously set, it is replaced. +func (u *URLBuilder) QueryParamInt(key string, values ...int64) *URLBuilder { + u.qry.Del(key) + for _, v := range values { + u.qry.Add(key, strconv.FormatInt(v, 10)) + } + return u +} + +// QueryParamBool sets the query parameter with the given value. If a +// value was previously set, it is replaced. +func (u *URLBuilder) QueryParamBool(key string, value bool) *URLBuilder { + u.qry.Set(key, strconv.FormatBool(value)) + return u +} + +// QueryParamFloat sets the query parameter with the given values. If a +// value was previously set, it is replaced. +func (u *URLBuilder) QueryParamFloat(key string, values ...float64) *URLBuilder { + u.qry.Del(key) + for _, v := range values { + u.qry.Add(key, strconv.FormatFloat(v, 'f', -1, 64)) + } + return u +} + +// QueryParams sets the query parameters. If a value was previously set +// for any of the given parameters, it is replaced. +func (u *URLBuilder) QueryParams(params url.Values) *URLBuilder { + for key, values := range params { + u.qry.Del(key) + for _, v := range values { + u.qry.Add(key, v) + } + } + return u +} + +// URL constructs and returns an instance of URL. +// +// The constructed URL has the complete path and query parameters setup. +// The path parameters are substituted before being joined with the base +// URL. +func (u *URLBuilder) URL() *url.URL { + urlv := u.url + + p := u.path + for name, value := range u.pathParams { + p = strings.Replace(p, "{"+name+"}", url.PathEscape(value), -1) + } + urlv.Path = path.Join(urlv.Path, p) + + urlv.RawQuery = u.qry.Encode() + + return urlv +} + +func urlValuesCopy(src url.Values) url.Values { + dst := make(url.Values, len(src)) + for key, values := range src { + dst[key] = make([]string, 0, len(values)) + for _, v := range values { + dst.Add(key, v) + } + } + return dst +} diff --git a/plugins/extractors/merlin/internal/merlin/url_builder_test.go b/plugins/extractors/merlin/internal/merlin/url_builder_test.go new file mode 100644 index 000000000..1cd911539 --- /dev/null +++ b/plugins/extractors/merlin/internal/merlin/url_builder_test.go @@ -0,0 +1,197 @@ +//go:build plugins +// +build plugins + +package merlin_test + +import ( + "net/url" + "testing" + + "github.com/odpf/meteor/plugins/extractors/merlin/internal/merlin" + "github.com/stretchr/testify/assert" +) + +func TestNewURLBuilderSource(t *testing.T) { + cases := []struct { + name string + baseURL string + expected *url.URL + expectedErr string + }{ + { + name: "Simple", + baseURL: "https://api.example.com", + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + }, + }, + { + name: "WithQueryParams", + baseURL: "http://example.com?limit=10&offset=120", + expected: &url.URL{ + Scheme: "http", + Host: "example.com", + RawQuery: "limit=10&offset=120", + }, + }, + { + name: "WithoutScheme", + baseURL: "api.example.com", + expected: &url.URL{ + Scheme: "http", + Host: "api.example.com", + }, + }, + { + name: "InvalidBaseURL", + baseURL: ":foo", + expectedErr: `invalid input: parse ":foo": missing protocol scheme`, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + b, err := merlin.NewURLBuilderSource(tc.baseURL) + if tc.expectedErr != "" { + assert.ErrorContains(t, err, tc.expectedErr) + return + } + + assert.NoError(t, err) + assert.Equal(t, tc.expected, b.New().URL()) + }) + } +} + +func TestURLBuilder(t *testing.T) { + b, err := merlin.NewURLBuilderSource("https://api.example.com/v1/?limit=10&mode=light") + if err != nil { + t.Fatalf("NewURLBuilderSource(): %s", err) + } + + cases := []struct { + name string + url *url.URL + expected *url.URL + }{ + { + name: "Simple", + url: b.New().Path("users").URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/users", + RawQuery: "limit=10&mode=light", + }, + }, + { + name: "PathParams", + url: b.New(). + Path("/users/{userID}/posts/{postID}/comments"). + PathParam("userID", "foo"). + PathParamInt("postID", 42). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/users/foo/posts/42/comments", + RawQuery: "limit=10&mode=light", + }, + }, + { + name: "DoublePathParam", + url: b.New(). + Path("/path/{p1}/{p2}/{p1}"). + PathParam("p1", "a"). + PathParam("p2", "b"). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/path/a/b/a", + RawQuery: "limit=10&mode=light", + }, + }, + { + name: "EscapePathParam", + url: b.New(). + Path("/posts/{title}"). + PathParam("title", `Letters & "Special" Characters`). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/posts/Letters%20&%20%22Special%22%20Characters", + RawQuery: "limit=10&mode=light", + }, + }, + { + name: "PathParamsMap", + url: b.New(). + Path("{app}/users/{userID}/posts/{postID}/comments"). + PathParams(map[string]string{ + "app": "myapp", + "userID": "1", + "postID": "42", + }). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/myapp/users/1/posts/42/comments", + RawQuery: "limit=10&mode=light", + }, + }, + { + name: "QueryParams", + url: b.New(). + Path("/search"). + QueryParam("author_id", "foo", "bar"). + QueryParamInt("limit", 20). + QueryParamInt("ints", 1, 3, 5, 7). + QueryParamBool("recent", true). + QueryParamFloat("min_rating", 4.5). + QueryParamFloat("floats", 0, -2, 4.6735593624473). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/search", + RawQuery: "author_id=foo&author_id=bar&floats=0&floats=-2&floats=4.6735593624473&ints=1&ints=3&ints=5&ints=7&limit=20&min_rating=4.5&mode=light&recent=true", + }, + }, + { + name: "EscapeQueryParam", + url: b.New(). + Path("/search"). + QueryParam("text", "foo bar/®"). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1/search", + RawQuery: "limit=10&mode=light&text=foo+bar%2F%C2%AE", + }, + }, + { + name: "QueryParamValues", + url: b.New(). + QueryParams(url.Values{ + "mode": {"dark"}, + "offset": {"20"}, + }). + URL(), + expected: &url.URL{ + Scheme: "https", + Host: "api.example.com", + Path: "/v1", + RawQuery: "limit=10&mode=dark&offset=20", + }, + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, tc.url) + }) + } +} diff --git a/plugins/extractors/merlin/internal/mocks/merlin_client_mock.go b/plugins/extractors/merlin/internal/mocks/merlin_client_mock.go new file mode 100644 index 000000000..89ee1ad24 --- /dev/null +++ b/plugins/extractors/merlin/internal/mocks/merlin_client_mock.go @@ -0,0 +1,178 @@ +// Code generated by mockery v2.14.0. DO NOT EDIT. + +package mocks + +import ( + context "context" + + merlin "github.com/odpf/meteor/plugins/extractors/merlin/internal/merlin" + + mock "github.com/stretchr/testify/mock" +) + +// MerlinClient is an autogenerated mock type for the Client type +type MerlinClient struct { + mock.Mock +} + +type MerlinClient_Expecter struct { + mock *mock.Mock +} + +func (_m *MerlinClient) EXPECT() *MerlinClient_Expecter { + return &MerlinClient_Expecter{mock: &_m.Mock} +} + +// ModelVersion provides a mock function with given fields: ctx, modelID, versionID +func (_m *MerlinClient) ModelVersion(ctx context.Context, modelID int64, versionID int64) (merlin.ModelVersion, error) { + ret := _m.Called(ctx, modelID, versionID) + + var r0 merlin.ModelVersion + if rf, ok := ret.Get(0).(func(context.Context, int64, int64) merlin.ModelVersion); ok { + r0 = rf(ctx, modelID, versionID) + } else { + r0 = ret.Get(0).(merlin.ModelVersion) + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, int64, int64) error); ok { + r1 = rf(ctx, modelID, versionID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MerlinClient_ModelVersion_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'ModelVersion' +type MerlinClient_ModelVersion_Call struct { + *mock.Call +} + +// ModelVersion is a helper method to define mock.On call +// - ctx context.Context +// - modelID int64 +// - versionID int64 +func (_e *MerlinClient_Expecter) ModelVersion(ctx interface{}, modelID interface{}, versionID interface{}) *MerlinClient_ModelVersion_Call { + return &MerlinClient_ModelVersion_Call{Call: _e.mock.On("ModelVersion", ctx, modelID, versionID)} +} + +func (_c *MerlinClient_ModelVersion_Call) Run(run func(ctx context.Context, modelID int64, versionID int64)) *MerlinClient_ModelVersion_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(int64), args[2].(int64)) + }) + return _c +} + +func (_c *MerlinClient_ModelVersion_Call) Return(_a0 merlin.ModelVersion, _a1 error) *MerlinClient_ModelVersion_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +// Models provides a mock function with given fields: ctx, projectID +func (_m *MerlinClient) Models(ctx context.Context, projectID int64) ([]merlin.Model, error) { + ret := _m.Called(ctx, projectID) + + var r0 []merlin.Model + if rf, ok := ret.Get(0).(func(context.Context, int64) []merlin.Model); ok { + r0 = rf(ctx, projectID) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]merlin.Model) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context, int64) error); ok { + r1 = rf(ctx, projectID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MerlinClient_Models_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Models' +type MerlinClient_Models_Call struct { + *mock.Call +} + +// Models is a helper method to define mock.On call +// - ctx context.Context +// - projectID int64 +func (_e *MerlinClient_Expecter) Models(ctx interface{}, projectID interface{}) *MerlinClient_Models_Call { + return &MerlinClient_Models_Call{Call: _e.mock.On("Models", ctx, projectID)} +} + +func (_c *MerlinClient_Models_Call) Run(run func(ctx context.Context, projectID int64)) *MerlinClient_Models_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(int64)) + }) + return _c +} + +func (_c *MerlinClient_Models_Call) Return(_a0 []merlin.Model, _a1 error) *MerlinClient_Models_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +// Projects provides a mock function with given fields: ctx +func (_m *MerlinClient) Projects(ctx context.Context) ([]merlin.Project, error) { + ret := _m.Called(ctx) + + var r0 []merlin.Project + if rf, ok := ret.Get(0).(func(context.Context) []merlin.Project); ok { + r0 = rf(ctx) + } else { + if ret.Get(0) != nil { + r0 = ret.Get(0).([]merlin.Project) + } + } + + var r1 error + if rf, ok := ret.Get(1).(func(context.Context) error); ok { + r1 = rf(ctx) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MerlinClient_Projects_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'Projects' +type MerlinClient_Projects_Call struct { + *mock.Call +} + +// Projects is a helper method to define mock.On call +// - ctx context.Context +func (_e *MerlinClient_Expecter) Projects(ctx interface{}) *MerlinClient_Projects_Call { + return &MerlinClient_Projects_Call{Call: _e.mock.On("Projects", ctx)} +} + +func (_c *MerlinClient_Projects_Call) Run(run func(ctx context.Context)) *MerlinClient_Projects_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context)) + }) + return _c +} + +func (_c *MerlinClient_Projects_Call) Return(_a0 []merlin.Project, _a1 error) *MerlinClient_Projects_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +type mockConstructorTestingTNewMerlinClient interface { + mock.TestingT + Cleanup(func()) +} + +// NewMerlinClient creates a new instance of MerlinClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. +func NewMerlinClient(t mockConstructorTestingTNewMerlinClient) *MerlinClient { + mock := &MerlinClient{} + mock.Mock.Test(t) + + t.Cleanup(func() { mock.AssertExpectations(t) }) + + return mock +} diff --git a/plugins/extractors/merlin/merlin.go b/plugins/extractors/merlin/merlin.go new file mode 100644 index 000000000..2a55cd0e1 --- /dev/null +++ b/plugins/extractors/merlin/merlin.go @@ -0,0 +1,288 @@ +package merlin + +import ( + "context" + _ "embed" // used to print the embedded assets + "encoding/base64" + "fmt" + "net/http" + "runtime/debug" + "sync" + "time" + + "github.com/MakeNowJust/heredoc" + "github.com/odpf/meteor/models" + "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/extractors/merlin/internal/merlin" + "github.com/odpf/meteor/registry" + "github.com/odpf/salt/log" + "github.com/pkg/errors" +) + +// init register the extractor to the catalog +func init() { + if err := registry.Extractors.Register("merlin", func() plugins.Extractor { + return New(plugins.GetLog(), newHTTPClient) + }); err != nil { + panic(err) + } +} + +//go:embed README.md +var summary string + +// Config holds the set of configuration for the Merlin extractor. +type Config struct { + URL string `mapstructure:"url" validate:"required"` + ServiceAccountBase64 string `mapstructure:"service_account_base64"` + RequestTimeout time.Duration `mapstructure:"request_timeout" validate:"min=1ms" default:"10s"` + WorkerCount int `mapstructure:"worker_count" validate:"min=1" default:"5"` +} + +var sampleConfig = heredoc.Doc(` + url: merlin.my-company.com + service_account_base64: |- + ____base64_encoded_service_account_credentials____ +`) + +var info = plugins.Info{ + Description: "Merlin ML models metadata", + SampleConfig: sampleConfig, + Summary: summary, + Tags: []string{"merlin", "ml", "model", "extractor"}, +} + +// Extractor manages the communication with the Merlin service. +type Extractor struct { + plugins.BaseExtractor + + logger log.Logger + newClient NewClientFunc + client Client + config Config +} + +type NewClientFunc func(ctx context.Context, cfg Config) (Client, error) + +//go:generate mockery --name=Client -r --case underscore --with-expecter --structname MerlinClient --filename merlin_client_mock.go --output=./internal/mocks + +type Client interface { + Projects(ctx context.Context) ([]merlin.Project, error) + Models(ctx context.Context, projectID int64) ([]merlin.Model, error) + ModelVersion(ctx context.Context, modelID, versionID int64) (merlin.ModelVersion, error) +} + +// New returns a pointer to an initialized Extractor Object +func New(logger log.Logger, newClient NewClientFunc) *Extractor { + e := &Extractor{ + logger: logger, + newClient: newClient, + } + e.BaseExtractor = plugins.NewBaseExtractor(info, &e.config) + + return e +} + +// Init initializes the extractor +func (e *Extractor) Init(ctx context.Context, config plugins.Config) error { + if err := e.BaseExtractor.Init(ctx, config); err != nil { + return fmt.Errorf("init Merlin extractor: %w", err) + } + + client, err := e.newClient(ctx, e.config) + if err != nil { + return fmt.Errorf("init Merlin extractor: %w", err) + } + + e.client = client + + return nil +} + +func (e *Extractor) Extract(ctx context.Context, emit plugins.Emit) error { + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + projects, err := e.client.Projects(ctx) + if err != nil { + if shouldRetry(err) { + return plugins.NewRetryError(err) + } + + return err + } + + errCh := e.startWorkers(ctx, jobQueue(ctx, projects), e.config.WorkerCount, emit) + select { + case <-ctx.Done(): + return ctx.Err() + + case err, ok := <-errCh: + if ok { + return err + } + } + + return nil +} + +func (e *Extractor) startWorkers( + ctx context.Context, jobs <-chan merlin.Project, workerCnt int, emit plugins.Emit, +) <-chan error { + var wg sync.WaitGroup + wg.Add(workerCnt) + + errCh := make(chan error) + for i := 0; i < workerCnt; i++ { + go func() { + defer wg.Done() + + sendErr := func(err error) { + select { + case <-ctx.Done(): + return + + case errCh <- err: + } + } + + for { + select { + case <-ctx.Done(): + return + + case p, ok := <-jobs: + if !ok { + return + } + + if err := e.extractProject(ctx, p, emit); err != nil { + sendErr(err) + return + } + } + } + }() + } + + go func() { + wg.Wait() + close(errCh) + }() + + return errCh +} + +func (e *Extractor) extractProject(ctx context.Context, prj merlin.Project, emit plugins.Emit) (err error) { + defer func() { + if r := recover(); r != nil { + e.logger.Error("panic recovered") + e.logger.Info(string(debug.Stack())) + if e, ok := r.(error); ok { + err = fmt.Errorf("extract project '%d': panic: %w", prj.ID, e) + } else { + err = fmt.Errorf("extract project '%d': panic: %v", prj.ID, r) + } + } + }() + + mdls, err := e.client.Models(ctx, prj.ID) + if err != nil { + e.logger.Error("merlin extractor", "project", prj.ID, "err", err) + return nil + } + + for _, mdl := range mdls { + if len(mdl.Endpoints) == 0 { + continue + } + + versions := make(map[int64]merlin.ModelVersion) + for _, endpoint := range mdl.Endpoints { + for _, dest := range endpoint.Rule.Destinations { + if dest.VersionEndpoint == nil { + continue + } + + versionID := dest.VersionEndpoint.VersionID + version, err := e.client.ModelVersion(ctx, mdl.ID, versionID) + if err != nil { + e.logger.Error("merlin extractor", "project", prj.ID, "model", mdl.ID, "err", err) + continue + } + + versions[versionID] = version + } + } + + asset, err := modelBuilder{ + scope: e.UrnScope, + project: prj, + model: mdl, + versions: versions, + }.buildAsset() + if err != nil { + e.logger.Error( + "merlin extractor", + "project", prj.ID, + "model", mdl.ID, + "err", err, + ) + continue + } + + emit(models.NewRecord(asset)) + } + + return nil +} + +func newHTTPClient(ctx context.Context, cfg Config) (Client, error) { + params := merlin.ClientParams{ + BaseURL: cfg.URL, + Timeout: cfg.RequestTimeout, + } + + if len(cfg.ServiceAccountBase64) != 0 { + credsJSON, err := base64.StdEncoding.DecodeString(cfg.ServiceAccountBase64) + if err != nil { + return nil, fmt.Errorf("new Merlin client: decode base64: %w", err) + } + + params.ServiceAccountJSON = credsJSON + } + + return merlin.NewClient(ctx, params) +} + +func jobQueue(ctx context.Context, projects []merlin.Project) <-chan merlin.Project { + jobs := make(chan merlin.Project) + + go func() { + defer close(jobs) + + for _, p := range projects { + select { + case <-ctx.Done(): + return + + case jobs <- p: + } + } + }() + + return jobs +} + +func shouldRetry(err error) bool { + if errors.Is(err, context.DeadlineExceeded) { + return true + } + + var e *merlin.APIError + if errors.As(err, &e) && (e.Status >= 500 || e.Status == http.StatusTooManyRequests) { + return true + } + + return false +} diff --git a/plugins/extractors/merlin/merlin_test.go b/plugins/extractors/merlin/merlin_test.go new file mode 100644 index 000000000..846d5d320 --- /dev/null +++ b/plugins/extractors/merlin/merlin_test.go @@ -0,0 +1,225 @@ +//go:build plugins +// +build plugins + +package merlin + +import ( + "context" + "errors" + "fmt" + "testing" + "time" + + "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/extractors/merlin/internal/merlin" + intrnlmcks "github.com/odpf/meteor/plugins/extractors/merlin/internal/mocks" + "github.com/odpf/meteor/test/mocks" + testutils "github.com/odpf/meteor/test/utils" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +const ( + urnScope = "test-merlin" + credsBase64 = `eyJ0eXBlIjoic2VydmljZV9hY2NvdW50IiwicHJvamVjdF9pZCI6ImNvbXBhbnktZGF0YS1wbGF0Zm9ybSIsInByaXZhdGVfa2V5X2lkIjoiNjk4dnh2MzA4dzNpNjhwOTM4MDQwYno4MTdyOTViMWUwazRrbXZxcyIsInByaXZhdGVfa2V5IjoiLS0tLS1CRUdJTiBQUklWQVRFIEtFWS0tLS0tXG5NSUlFb1FJQkFBS0NBUUJWelEwV1B1YXFkd01OYXBDR0tkS1VSL01PZ1dOQnlydVQ2MFNKd2Q1bFkvMlNqeDF3XG5RNHNKNnhrLytUejdiVDNDZ05CQVBRK3JaZkxEMmZkUUpJQmVZRWxSY0h3NmEyUEEvNlRhWDJlNHFxMCs1eGszXG5nbkl0bHFabTBoUUVsWmQ3NkxObE1jSXRITm1uZUxJQ293VE9kemwwaFVkMklncnFMQjU0NXYzS09HZndvRUFwXG56M21QbS9pRjErelRQV3kwNDF3N2FqdldLMk4zbVJ5Z0tvUDc5bmUyZ0R1TjIrUUhtVzh3UEZ6UTNwZFFaVTY1XG4xbnBnUDlONHdSSFFUOHZvd1RVU1lkU1JaRzFwMU1QS0NYc3JxaE1Vdjd5TGRyT2NhY0F2Y1pxWU9jTWhKUTFwXG5iWXBzaW5EUjY1QVJEZHVNZUtvRVVrRmIzaGYyelBVY2RZTmhBZ01CQUFFQ2dnRUFRbGRPeENHVWxyOTRvN24rXG56MDJ0SGF2WUdpSWZEZkxrUUlZTHMzd3NLamM3REVRT0hneUxoL3E0eGtjL1NLUjV1VmVDTGZsSWtWMDliUU91XG5mdEFLVlc2Ym9oV1lhRTg2alRMZFUxK3JRaFR0NlpJa1pGQS9XbEoralVmbjVIZUo3bXZKc2ZmY1RLZGUvMmVLXG5OQkc2R0s0RXhieDd1Ykt1djh1bk1CSmlyeVV5Y2lvUHlrV1pFVllsNzIrMElCc0tDUU9YMzlGZC9wZ0pGOWpMXG5GUGVsZ0NzcnZQQS8zbG9kZ1F1M204VkVObHU0RzZ6M2tQUWdoQXZJMzd4QzlObFVOVnZ4MXl4Q3VrUWhmMHpRXG5RNTVrVVR3Z1o5c0lHR2NJLzJLNkgxWUh2K20zdm5NNUQ1aUw5ZVRIbjFIbmxHdHBsUUpobWhLakN4WElwYkh4XG5RVG9Pd1FLQmdRQ2NFWlA2SDNucTNlSDdkNXJvMWZ2QTZZRW9FUmZ6SXphVTRLazNTYjllMXRYallTejhjY052XG5LM2dac0hWMllaeTNxOW1DWW5jMG9Qd3d4NWRTd2h6cE9yQnJ3dnlvcFBia0twRDlXQ1h0WnRSa3dSVE43Q1hSXG5FKzJlU1NwdTJ5MTRTS3lzUFFvRFpteUpvOGJzN3JzZUxRVGlaZVVQbFlkbFA2YWRPR1NYK1FLQmdRQ012VnFFXG42bmJYNDFEY0xKdVV4VDAyNlQ5em5jbnBSdTNna2Z5WTBPNVFGOC9WY3E2eTVMeGRRdHlNTmJjYmtEWThpc0FNXG53VFA0S2FYUHVsMzhUT0NqZkczTU9ERGJ6bWVRMjdxS0wvOVVleWk4MTJCTjRYSXJwZ3VvUEtnRnRseWkxSk5IXG5aaVV0aW1lZE9vTkc0THV1REVxZU55VzFRbS9XbFF1NWZxS3dxUUtCZ0dzY3VWVzZFcCs2UnVXaXNlUEpNTzYyXG5rOWtlMmpRWjM5VVAxN05GWHgxRkR5anVRY1RFZzJBaUVseDNPamJVU1kzWldQL2VlbmZaWVJ4TmI3THgzSXZKXG5wdGxleXE4b0FQYVpyRWJrSDZ1dW5takVCM1pJODY5cUlQUTR2UEcyWlorZktUdFE3VFZtTDJuTHlMUkdLSkJPXG5UNExlY2ZaZkpyeTdrYXRuejhwcEFvR0FJMEZYeUkzM1lWTkhNVEJYZE9nSDBwYVJWNFFDVFZhQVJrNHJxWmhFXG42bmxjamNxaHFweVQ5d1RGdkxYRC9icWRhNE1TWXQrUEJpNWdvKzI2bDNZbW02MlN6NktQMHJBY3ozUExnY3hPXG5PTHAxVlFEYTFnZVFreENRUVArWTAzMkFMU1gxRXVDcWxZTGpPOGFwbGZxNzZQaVpSSkxwOWtNRFF3eXBHRGw1XG54YWtDZ1lBbTdwTzBMQS9oVHZkclo3ekdVSWZUVFp4ZjFxRCtXMGlVaDJNdHlhWk05dVFoRG9hYWhmN2YyVFQvXG50Mit3bHlJbEhNZFV4ZkRZZjhVNW93bDlJeXNxYVBNWnNRbVlOZ1ltWHBXOC9BaE5jS0Zuc2x5cnRkNTdPZjNDXG5sRkhwTndmak5seERUc3FsMmtXYmN3SmJZMEVibFBSSXRwbEU3Z0RsVXZmZ1NOVGorZz09XG4tLS0tLUVORCBQUklWQVRFIEtFWS0tLS0tXG4iLCJjbGllbnRfZW1haWwiOiJzeXN0ZW1zLW1ldGVvckBjb21wYW55LWRhdGEtcGxhdGZvcm0uaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20iLCJjbGllbnRfaWQiOiIwNDMxNjE2ODg4ODA0MzA3OTU4OTMiLCJhdXRoX3VyaSI6Imh0dHBzOi8vYWNjb3VudHMuZ29vZ2xlLmNvbS9vL29hdXRoMi9hdXRoIiwidG9rZW5fdXJpIjoiaHR0cHM6Ly9vYXV0aDIuZ29vZ2xlYXBpcy5jb20vdG9rZW4iLCJhdXRoX3Byb3ZpZGVyX3g1MDlfY2VydF91cmwiOiJodHRwczovL3d3dy5nb29nbGVhcGlzLmNvbS9vYXV0aDIvdjEvY2VydHMiLCJjbGllbnRfeDUwOV9jZXJ0X3VybCI6Imh0dHBzOi8vd3d3Lmdvb2dsZWFwaXMuY29tL3JvYm90L3YxL21ldGFkYXRhL3g1MDkvc3lzdGVtcy1tZXRlb3IlNDBjb21wYW55LWRhdGEtcGxhdGZvcm0uaWFtLmdzZXJ2aWNlYWNjb3VudC5jb20ifQ==` + hostURL = "merlin.com/api/merlin" +) + +var ctx = context.Background() + +func TestInit(t *testing.T) { + t.Run("should return error if config is invalid", func(t *testing.T) { + extr := New(testutils.Logger, func(ctx context.Context, cfg Config) (Client, error) { + return nil, errors.New("unexpected call") + }) + err := extr.Init(ctx, plugins.Config{ + URNScope: urnScope, + RawConfig: map[string]interface{}{}, + }) + + assert.ErrorAs(t, err, &plugins.InvalidConfigError{}) + }) + + t.Run("should try to create a new client if config is valid", func(t *testing.T) { + extr := New(testutils.Logger, func(ctx context.Context, cfg Config) (Client, error) { + expected := Config{ + URL: hostURL, + ServiceAccountBase64: credsBase64, + RequestTimeout: 30 * time.Second, + WorkerCount: 10, + } + assert.Equal(t, expected, cfg) + return intrnlmcks.NewMerlinClient(t), nil + }) + err := extr.Init(ctx, plugins.Config{ + URNScope: urnScope, + RawConfig: map[string]interface{}{ + "url": hostURL, + "service_account_base64": credsBase64, + "request_timeout": "30s", + "worker_count": 10, + }, + }) + assert.NoError(t, err) + }) + + t.Run("should return error if credentials is not a base64 string", func(t *testing.T) { + extr := New(testutils.Logger, newHTTPClient) + err := extr.Init(ctx, plugins.Config{ + URNScope: urnScope, + RawConfig: map[string]interface{}{ + "url": hostURL, + "service_account_base64": "Good Times Bad Times", + }, + }) + assert.Error(t, err) + }) +} + +func TestExtract(t *testing.T) { + cases := []struct { + name string + err error + isRetryErr bool + }{ + { + name: "ObtuseError", + err: errors.New("Mambo No. 5"), + isRetryErr: false, + }, + { + name: "ContextDeadlineExceeded", + err: fmt.Errorf("wrapped for dramatic effect: %w", context.DeadlineExceeded), + isRetryErr: true, + }, + { + name: "5xx", + err: fmt.Errorf("egg wrap: %w", &merlin.APIError{Status: 503}), + isRetryErr: true, + }, + { + name: "429", + err: fmt.Errorf("salad wrap: %w", &merlin.APIError{Status: 429}), + isRetryErr: true, + }, + } + for _, tc := range cases { + t.Run("ProjectsCallFailure/"+tc.name, func(t *testing.T) { + m := intrnlmcks.NewMerlinClient(t) + m.EXPECT().Projects(testutils.OfTypeContext()). + Return(nil, tc.err) + extr := initialisedExtr(t, m) + + err := extr.Extract(ctx, mocks.NewEmitter().Push) + assert.Error(t, err) + assert.Equal(t, tc.isRetryErr, errors.As(err, &plugins.RetryError{})) + }) + } + + t.Run("it should tolerate models, version fetch failures", func(t *testing.T) { + mc := intrnlmcks.NewMerlinClient(t) + + var projects []merlin.Project + testutils.LoadJSON(t, "testdata/mocked-projects.json", &projects) + mc.EXPECT().Projects(testutils.OfTypeContext()). + Return(projects, nil) + + var models []merlin.Model + testutils.LoadJSON(t, "testdata/mocked-models-1.json", &models) + mc.EXPECT().Models(testutils.OfTypeContext(), int64(1)).Return(models, nil) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(80), int64(2)). + Return(merlin.ModelVersion{}, errors.New("I Want It All")) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(689), int64(7)). + Return(merlin.ModelVersion{}, errors.New("Knights In White Satin")) + + mc.EXPECT().Models(testutils.OfTypeContext(), int64(100)). + Return(nil, errors.New("Losing My Edge")) + mc.EXPECT().Models(testutils.OfTypeContext(), int64(200)). + Return(nil, errors.New("Paranoid")) + + emitter := mocks.NewEmitter() + err := initialisedExtr(t, mc).Extract(ctx, emitter.Push) + assert.NoError(t, err) + + actual := emitter.GetAllData() + assert.Empty(t, actual) + }) + + t.Run("should build models from Merlin", func(t *testing.T) { + mc := intrnlmcks.NewMerlinClient(t) + + var projects []merlin.Project + testutils.LoadJSON(t, "testdata/mocked-projects.json", &projects) + mc.EXPECT().Projects(testutils.OfTypeContext()). + Return(projects, nil) + + var models []merlin.Model + testutils.LoadJSON(t, "testdata/mocked-models-1.json", &models) + mc.EXPECT().Models(testutils.OfTypeContext(), int64(1)).Return(models, nil) + + var mv merlin.ModelVersion + testutils.LoadJSON(t, "testdata/mocked-model-version-80-2.json", &mv) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(80), int64(2)). + Return(mv, nil) + + mv = merlin.ModelVersion{} + testutils.LoadJSON(t, "testdata/mocked-model-version-689-7.json", &mv) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(689), int64(7)). + Return(mv, nil) + + models = nil + testutils.LoadJSON(t, "testdata/mocked-models-100.json", &models) + mc.EXPECT().Models(testutils.OfTypeContext(), int64(100)).Return(models, nil) + + mv = merlin.ModelVersion{} + testutils.LoadJSON(t, "testdata/mocked-model-version-1376-47.json", &mv) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(1376), int64(47)). + Return(mv, nil) + + mv = merlin.ModelVersion{} + testutils.LoadJSON(t, "testdata/mocked-model-version-284-582.json", &mv) + mc.EXPECT().ModelVersion(testutils.OfTypeContext(), int64(284), int64(582)). + Return(mv, nil) + + mc.EXPECT().Models(testutils.OfTypeContext(), int64(200)).Return(nil, nil) + + emitter := mocks.NewEmitter() + err := initialisedExtr(t, mc).Extract(ctx, emitter.Push) + assert.NoError(t, err) + + actual := emitter.GetAllData() + testutils.AssertProtosWithJSONFile(t, "testdata/expected-assets.json", actual) + }) + + t.Run("it should recover from panics in workers", func(t *testing.T) { + mc := intrnlmcks.NewMerlinClient(t) + + var projects []merlin.Project + testutils.LoadJSON(t, "testdata/mocked-projects.json", &projects) + mc.EXPECT().Projects(testutils.OfTypeContext()). + Return(projects, nil) + + panicErr := errors.New("Starman") + mc.EXPECT().Models(testutils.OfTypeContext(), int64(1)). + Run(func(ctx context.Context, projectID int64) { + panic(panicErr) + }) + + err := initialisedExtr(t, mc).Extract(ctx, mocks.NewEmitter().Push) + assert.ErrorIs(t, err, panicErr) + }) +} + +func initialisedExtr(t *testing.T, m *intrnlmcks.MerlinClient) *Extractor { + extr := New(testutils.Logger, func(context.Context, Config) (Client, error) { + return m, nil + }) + require.NoError(t, extr.Init(ctx, plugins.Config{ + URNScope: urnScope, + RawConfig: map[string]interface{}{ + "url": hostURL, + "worker_count": 1, + }, + })) + return extr +} diff --git a/plugins/extractors/merlin/model_asset_builder.go b/plugins/extractors/merlin/model_asset_builder.go new file mode 100644 index 000000000..12e392fe6 --- /dev/null +++ b/plugins/extractors/merlin/model_asset_builder.go @@ -0,0 +1,275 @@ +package merlin + +import ( + "encoding/json" + "fmt" + "sort" + "strconv" + + "github.com/odpf/meteor/models" + v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" + "github.com/odpf/meteor/plugins" + "github.com/odpf/meteor/plugins/extractors/merlin/internal/merlin" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/structpb" + "google.golang.org/protobuf/types/known/timestamppb" +) + +const ( + service = "merlin" + typ = "model" +) + +type modelBuilder struct { + scope string + project merlin.Project + model merlin.Model + versions map[int64]merlin.ModelVersion +} + +func (b modelBuilder) buildAsset() (*v1beta2.Asset, error) { + fail := func(step string, err error) (*v1beta2.Asset, error) { + return nil, fmt.Errorf( + "build %s for model '%d' in project '%d': %w", + step, b.model.ID, b.project.ID, err, + ) + } + + versions, err := b.buildVersions() + if err != nil { + return fail("versions", err) + } + + urls := b.buildEndpointURLs() + + model, err := anypb.New(&v1beta2.Model{ + Namespace: b.project.Name, + Flavor: b.model.Type, + Versions: versions, + Attributes: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "project_id": intToValue(b.project.ID), + "mlflow_experiment_id": intToValue(b.model.MlflowExperimentID), + "mlflow_experiment_url": structpb.NewStringValue(b.model.MlflowURL), + "endpoint_urls": stringSliceToValue(urls), + }, + }, + CreateTime: timestamppb.New(b.model.CreatedAt), + UpdateTime: timestamppb.New(b.model.UpdatedAt), + }) + if err != nil { + return fail("encode model metadata", err) + } + + lineage, err := b.buildLineage() + if err != nil { + return fail("lineage", err) + } + + return &v1beta2.Asset{ + Urn: models.NewURN(service, b.scope, typ, fmt.Sprintf("%d.%d", b.project.ID, b.model.ID)), + Name: b.model.Name, + Service: service, + Type: typ, + Url: urls[0], + Data: model, + Owners: b.buildOwners(), + Lineage: lineage, + Labels: b.buildLabels(), + }, nil +} + +func (b modelBuilder) buildLineage() (*v1beta2.Lineage, error) { + upstreams, err := b.buildUpstreams() + if err != nil { + return nil, fmt.Errorf("build upstreams: %w", err) + } + + return &v1beta2.Lineage{Upstreams: upstreams}, nil +} + +// Based on https://github.com/gojek/merlin/blob/v0.24.0/api/pkg/transformer/spec/feast.pb.go#L350 +type featureTable struct { + Name string `json:"name"` + Project string `json:"project"` +} + +func (b modelBuilder) buildUpstreams() ([]*v1beta2.Resource, error) { + fts := make(map[featureTable]struct{}) + for _, endpoint := range b.model.Endpoints { + for _, dest := range endpoint.Rule.Destinations { + if dest.VersionEndpoint == nil { + continue + } + + specs, err := decodeFeatureTableSpecs(dest.VersionEndpoint.Transformer) + if err != nil { + return nil, err + } + + for _, f := range specs { + fts[f] = struct{}{} + } + } + } + + var upstreams []*v1beta2.Resource + for ft := range fts { + upstreams = append(upstreams, &v1beta2.Resource{ + Urn: plugins.CaraMLStoreURN(b.scope, ft.Project, ft.Name), + Service: "caramlstore", + Type: "feature_table", + }) + } + // For testability, we need a deterministic output. So sort the upstreams + sort.Slice(upstreams, func(i, j int) bool { + return upstreams[i].Urn < upstreams[j].Urn + }) + return upstreams, nil +} + +func (b modelBuilder) buildVersions() ([]*v1beta2.ModelVersion, error) { + var versions []*v1beta2.ModelVersion + for _, endpoint := range b.model.Endpoints { + for _, dest := range endpoint.Rule.Destinations { + vEp := dest.VersionEndpoint + if vEp == nil { + continue + } + + mdlv, ok := b.versions[vEp.VersionID] + if !ok { + return nil, fmt.Errorf("model version not found: %d", vEp.VersionID) + } + + versions = append(versions, &v1beta2.ModelVersion{ + Status: vEp.Status, + Version: strconv.FormatInt(mdlv.ID, 10), + Attributes: &structpb.Struct{ + Fields: map[string]*structpb.Value{ + "endpoint_id": intToValue(endpoint.ID), + "mlflow_run_id": structpb.NewStringValue(mdlv.MlflowRunID), + "mlflow_run_url": structpb.NewStringValue(mdlv.MlflowURL), + "endpoint_url": structpb.NewStringValue(endpoint.URL), + "version_endpoint_url": structpb.NewStringValue(vEp.URL), + "monitoring_url": structpb.NewStringValue(vEp.MonitoringURL), + "message": structpb.NewStringValue(vEp.Message), + "environment_name": structpb.NewStringValue(endpoint.EnvironmentName), + "deployment_mode": structpb.NewStringValue(vEp.DeploymentMode), + "service_name": structpb.NewStringValue(vEp.ServiceName), + "env_vars": envVarsToValue(vEp.EnvVars), + "transformer": transformerAttrs(vEp.Transformer), + "weight": intToValue(dest.Weight), + }, + }, + Labels: mdlv.Labels, + CreateTime: timestamppb.New(mdlv.CreatedAt), + UpdateTime: timestamppb.New(mdlv.UpdatedAt), + }) + } + } + + return versions, nil +} + +func (b modelBuilder) buildEndpointURLs() []string { + var urls []string + for _, endpoint := range b.model.Endpoints { + urls = append(urls, endpoint.URL) + } + return urls +} + +func (b modelBuilder) buildOwners() []*v1beta2.Owner { + var owners []*v1beta2.Owner + emails := make(map[string]struct{}, len(b.project.Administrators)) + for _, admin := range b.project.Administrators { + if _, ok := emails[admin]; ok { + continue + } + + owners = append(owners, &v1beta2.Owner{ + Urn: admin, + Email: admin, + }) + emails[admin] = struct{}{} + } + return owners +} + +func (b modelBuilder) buildLabels() map[string]string { + labels := map[string]string{ + "team": b.project.Team, + "stream": b.project.Stream, + } + for _, l := range b.project.Labels { + labels[l.Key] = l.Value + } + + return labels +} + +func decodeFeatureTableSpecs(tr merlin.Transformer) ([]featureTable, error) { + if !tr.Enabled || tr.TransformerType != "standard" { + return nil, nil + } + + for _, envvar := range tr.EnvVars { + if envvar.Name != "FEAST_FEATURE_TABLE_SPECS_JSONS" { + continue + } + + var specs []featureTable + if err := json.Unmarshal(([]byte)(envvar.Value), &specs); err != nil { + return nil, fmt.Errorf("decode FEAST_FEATURE_TABLE_SPECS_JSONS %w", err) + } + + return specs, nil + } + + return nil, nil +} + +func transformerAttrs(tr merlin.Transformer) *structpb.Value { + attrs := map[string]*structpb.Value{ + "enabled": structpb.NewBoolValue(tr.Enabled), + } + if !tr.Enabled { + return structpb.NewStructValue(&structpb.Struct{Fields: attrs}) + } + + attrs["type"] = structpb.NewStringValue(tr.TransformerType) + attrs["image"] = structpb.NewStringValue(tr.Image) + if tr.Command != "" { + attrs["command"] = structpb.NewStringValue(tr.Command) + attrs["args"] = structpb.NewStringValue(tr.Args) + } + + attrs["env_vars"] = envVarsToValue(tr.EnvVars) + + return structpb.NewStructValue(&structpb.Struct{Fields: attrs}) +} + +func stringSliceToValue(urls []string) *structpb.Value { + var l structpb.ListValue + for _, u := range urls { + l.Values = append(l.Values, structpb.NewStringValue(u)) + } + + return structpb.NewListValue(&l) +} + +func intToValue(n int64) *structpb.Value { return structpb.NewNumberValue((float64)(n)) } + +func envVarsToValue(vars []merlin.EnvVar) *structpb.Value { + if len(vars) == 0 { + return structpb.NewNullValue() + } + + attrs := make(map[string]*structpb.Value, len(vars)) + for _, envvar := range vars { + attrs[envvar.Name] = structpb.NewStringValue(envvar.Value) + } + + return structpb.NewStructValue(&structpb.Struct{Fields: attrs}) +} diff --git a/plugins/extractors/merlin/testdata/expected-assets.json b/plugins/extractors/merlin/testdata/expected-assets.json new file mode 100644 index 000000000..11c55e90a --- /dev/null +++ b/plugins/extractors/merlin/testdata/expected-assets.json @@ -0,0 +1,320 @@ +[ + { + "data": { + "@type": "type.googleapis.com/odpf.assets.v1beta2.Model", + "attributes": { + "endpoint_urls": ["resource-request.sample.models.company.com"], + "mlflow_experiment_id": 83, + "mlflow_experiment_url": "http://mlflow.company.com/#/experiments/83", + "project_id": 1 + }, + "create_time": "2020-04-23T10:07:40.105711Z", + "flavor": "sklearn", + "namespace": "one-piece", + "update_time": "2022-06-14T07:12:09.536419Z", + "versions": [ + { + "attributes": { + "deployment_mode": "", + "endpoint_id": 450, + "endpoint_url": "resource-request.sample.models.company.com", + "env_vars": null, + "environment_name": "staging", + "message": "", + "mlflow_run_id": "061ew38v3b7kp088s9b49kzr68v5ixvm3", + "mlflow_run_url": "http://mlflow.company.com/#/experiments/83/runs/061ew38v3b7kp088s9b49kzr68v5ixvm3", + "monitoring_url": "", + "service_name": "resource-request-2-predictor-default.sample.models.company.com", + "transformer": {"enabled": false}, + "version_endpoint_url": "http://resource-request-2.sample.models.company.com/v1/models/resource-request-2", + "weight": 100 + }, + "create_time": "2022-05-30T07:43:18.120248Z", + "status": "running", + "update_time": "2022-05-30T07:43:18.120248Z", + "version": "2" + } + ] + }, + "labels": {"stream": "roger", "team": "pirates"}, + "lineage": {}, + "name": "resource-request", + "owners": [ + { + "email": "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com", + "urn": "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com" + }, + { + "email": "gol.d.roger@onepiece.com", + "urn": "gol.d.roger@onepiece.com" + } + ], + "service": "merlin", + "type": "model", + "url": "resource-request.sample.models.company.com", + "urn": "urn:merlin:test-merlin:model:1.80" + }, + { + "data": { + "@type": "type.googleapis.com/odpf.assets.v1beta2.Model", + "attributes": { + "endpoint_urls": ["pyfunc-standard-transfo.sample.models.company.com"], + "mlflow_experiment_id": 692, + "mlflow_experiment_url": "http://mlflow.company.com/#/experiments/692", + "project_id": 1 + }, + "create_time": "2021-02-04T13:05:31.593956Z", + "flavor": "pyfunc", + "namespace": "one-piece", + "update_time": "2021-07-22T08:13:34.642949Z", + "versions": [ + { + "attributes": { + "deployment_mode": "", + "endpoint_id": 174, + "endpoint_url": "pyfunc-standard-transfo.sample.models.company.com", + "env_vars": { + "MODEL_DIR": "gs://mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model", + "MODEL_NAME": "pyfunc-standard-transfo-7", + "WORKERS": "1" + }, + "environment_name": "staging", + "message": "", + "mlflow_run_id": "061ew38v3b7kp088s9b49kzr68v5ixvm3", + "mlflow_run_url": "http://mlflow.company.com/#/experiments/692/runs/061ew38v3b7kp088s9b49kzr68v5ixvm3", + "monitoring_url": "", + "service_name": "pyfunc-standard-transfo-7-predictor-default.sample.models.company.com", + "transformer": { + "enabled": true, + "env_vars": { + "FEAST_FEATURE_STATUS_MONITORING_ENABLED": "true", + "FEAST_FEATURE_VALUE_MONITORING_ENABLED": "true", + "LOG_LEVEL": "DEBUG", + "STANDARD_TRANSFORMER_CONFIG": "{\"transformerConfig\":{\"feast\":[{\"project\":\"default\",\"entities\":[{\"name\":\"merchant_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.merchants[*].id\"}],\"features\":[{\"name\":\"merchant_t1_discovery:t1_estimate\",\"valueType\":\"DOUBLE\",\"defaultValue\":\"0\"}]}]}}" + }, + "image": "asia.gcr.io/company-staging/merlin-transformer:v0.10.0-rc2", + "type": "standard" + }, + "version_endpoint_url": "http://pyfunc-standard-transfo-7.sample.models.company.com/v1/models/pyfunc-standard-transfo-7", + "weight": 100 + }, + "create_time": "2021-02-05T05:26:38.212696Z", + "status": "running", + "update_time": "2021-02-05T05:26:38.212696Z", + "version": "7" + } + ] + }, + "labels": {"stream": "roger", "team": "pirates"}, + "lineage": {}, + "name": "pyfunc-standard-transfo", + "owners": [ + { + "email": "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com", + "urn": "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com" + }, + { + "email": "gol.d.roger@onepiece.com", + "urn": "gol.d.roger@onepiece.com" + } + ], + "service": "merlin", + "type": "model", + "url": "pyfunc-standard-transfo.sample.models.company.com", + "urn": "urn:merlin:test-merlin:model:1.689" + }, + { + "data": { + "@type": "type.googleapis.com/odpf.assets.v1beta2.Model", + "attributes": { + "endpoint_urls": [ + "inca-restaurant-image.inca-restaurant.models.company.com", + "inca-restaurant-image.inca-restaurant.models.company.com" + ], + "mlflow_experiment_id": 1466, + "mlflow_experiment_url": "http://mlflow.company.com/#/experiments/1466", + "project_id": 100 + }, + "create_time": "2022-05-25T04:57:24.972027Z", + "flavor": "pyfunc", + "namespace": "strongest-man", + "update_time": "2022-11-10T06:13:02.181028Z", + "versions": [ + { + "attributes": { + "deployment_mode": "serverless", + "endpoint_id": 569, + "endpoint_url": "inca-restaurant-image.inca-restaurant.models.company.com", + "env_vars": null, + "environment_name": "staging", + "message": "", + "mlflow_run_id": "f190512242584707b79831ee3d5e406c", + "mlflow_run_url": "http://mlflow.company.com/#/experiments/1466/runs/f190512242584707b79831ee3d5e406c", + "monitoring_url": "", + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "transformer": {"enabled": false}, + "version_endpoint_url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "weight": 100 + }, + "create_time": "2022-10-28T06:11:52.148118Z", + "labels": {"env": "staging", "ver": "2.1.0"}, + "status": "running", + "update_time": "2022-10-28T06:11:52.148118Z", + "version": "47" + }, + { + "attributes": { + "deployment_mode": "serverless", + "endpoint_id": 443, + "endpoint_url": "inca-restaurant-image.inca-restaurant.models.company.com", + "env_vars": null, + "environment_name": "global-staging", + "message": "", + "mlflow_run_id": "f190512242584707b79831ee3d5e406c", + "mlflow_run_url": "http://mlflow.company.com/#/experiments/1466/runs/f190512242584707b79831ee3d5e406c", + "monitoring_url": "", + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "transformer": {"enabled": false}, + "version_endpoint_url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "weight": 100 + }, + "create_time": "2022-10-28T06:11:52.148118Z", + "labels": {"env": "staging", "ver": "2.1.0"}, + "status": "running", + "update_time": "2022-10-28T06:11:52.148118Z", + "version": "47" + } + ] + }, + "labels": {"stream": "whitebeard", "team": "pirates"}, + "lineage": {}, + "name": "inca-restaurant-image", + "owners": [ + { + "email": "edward.newgate@onepiece.com", + "urn": "edward.newgate@onepiece.com" + } + ], + "service": "merlin", + "type": "model", + "url": "inca-restaurant-image.inca-restaurant.models.company.com", + "urn": "urn:merlin:test-merlin:model:100.1376" + }, + { + "data": { + "@type": "type.googleapis.com/odpf.assets.v1beta2.Model", + "attributes": { + "endpoint_urls": ["id-car-b-default.jaeger.models.company.com"], + "mlflow_experiment_id": 287, + "mlflow_experiment_url": "http://mlflow.company.com/#/experiments/287", + "project_id": 100 + }, + "create_time": "2020-06-23T14:40:23.168186Z", + "flavor": "pyfunc", + "namespace": "strongest-man", + "update_time": "2022-11-09T15:34:07.647932Z", + "versions": [ + { + "attributes": { + "deployment_mode": "serverless", + "endpoint_id": 63, + "endpoint_url": "id-car-b-default.jaeger.models.company.com", + "env_vars": { + "COUNTRY": "id", + "MODEL_VERSION": "B", + "PEAK_HOURS": "6,7,8,16,17,18,19", + "SERVICE_TYPE": "car", + "WORKERS": "1" + }, + "environment_name": "staging", + "message": "", + "mlflow_run_id": "543f674167704d94bbc03025e50b78b7", + "mlflow_run_url": "http://mlflow.company.com/#/experiments/287/runs/543f674167704d94bbc03025e50b78b7", + "monitoring_url": "", + "service_name": "id-car-b-default-582.jaeger.models.company.com", + "transformer": { + "enabled": true, + "env_vars": { + "APP_NAME": "jaeger-cancellation-id-car-b", + "COUNTRY": "id", + "FEAST_BATCH_SIZE": "90", + "FEAST_CACHE_DURATION_SECOND": "60", + "FEAST_CACHE_ENABLED": "True", + "FEAST_FEATURE_CONFIG_PATH": "/data/features.json", + "FEAST_FEATURE_STATUS_MONITORING_ENABLED": "True", + "FEAST_FEATURE_TABLE_SPECS_JSONS": "[{\"name\":\"jaeger_car_origin_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"origin_geohash_7\"]},{\"name\":\"jaeger_car_destination_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"destination_geohash_7\"]},{\"name\":\"jaeger_car_standing_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"standing_geohash_7\"]},{\"name\":\"jaeger_car_driver_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"driver_id\"]},{\"name\":\"jaeger_car_customer_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"customer_id\"]}]", + "FEAST_FEATURE_VALUE_MONITORING_ENABLED": "False", + "FEAST_PROJECT": "jaeger", + "FEAST_SERVING_HOST": "online-serving.s.feast.company.com", + "FEAST_SERVING_PORT": "15010", + "GOOGLE_CLOUD_PROJECT": "staging", + "INIT_HEAP_SIZE_IN_MB": "300", + "JAEGER_DISABLED": "False", + "JAEGER_SAMPLER_PARAM": "0.01", + "JAEGER_SAMPLER_TYPE": "probabilistic", + "MLFLOW_DOWNLOAD_PATH": "/data", + "MLFLOW_RUN_ID": "543f674167704d94bbc03025e50b78b7", + "MLFLOW_TRACKING_URI": "http://jaeger.mlflow.global.company.com/", + "MODEL_VERSION": "B", + "ORDER_FEATURE_CONFIG_PATH": "/data/order_features.json", + "PEAK_HOURS": "6,7,8,16,17,18,19", + "PUBLIC_HOLIDAY_FILE": "/data/public_holidays_feast_0_9.csv", + "SERVICE_TYPE": "car", + "STANDARD_TRANSFORMER_CONFIG": "{\"transformerConfig\":{\"preprocess\":{\"inputs\":[{\"tables\":[{\"name\":\"customer_order\",\"columns\":[{\"name\":\"order_id\",\"fromJson\":{\"jsonPath\":\"$.order_id\"}},{\"name\":\"service_area_id\",\"fromJson\":{\"jsonPath\":\"$.service_area_id\"}},{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"end_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.latitude\"}},{\"name\":\"end_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.longitude\"}}]},{\"name\":\"driver_order\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers\"}},\"columns\":[{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"driver_latitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.latitude\"}},{\"name\":\"driver_longitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.longitude\"}},{\"name\":\"driver_elevation\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.elevation\"}},{\"name\":\"driver_gps_accuracy\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.gps_accuracy\"}},{\"name\":\"standing_geohash_7\",\"expression\":\"Geohash(\\\"$.drivers[*].driver_location.latitude\\\", \\\"$.drivers[*].driver_location.longitude\\\", 7)\"}]},{\"name\":\"first_customer_location\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers[*].first_customer_drop_off_location\"}},\"columns\":[{\"name\":\"driver_id\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_id\"}}]}]},{\"feast\":[{\"project\":\"jaeger\",\"entities\":[{\"name\":\"origin_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.from.latitude\\\", \\\"$.bid.from.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"origin_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"destination_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.to.latitude\\\", \\\"$.bid.to.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"destination_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"standing_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.drivers.driver_location.latitude\\\", \\\"$.drivers.driver_location.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"standing_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"driver_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.drivers.driver_id\"}],\"features\":[{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"driver_id\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"customer_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.customer_id\"}],\"features\":[{\"name\":\"jaeger_car_customer_acceptance:customer_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"}],\"tableName\":\"customer_id\",\"source\":\"BIGTABLE\"}]}],\"transformations\":[{\"tableJoin\":{\"leftTable\":\"driver_order\",\"rightTable\":\"standing_geohash_7\",\"outputTable\":\"driver_tb1\",\"how\":\"LEFT\",\"onColumn\":\"standing_geohash_7\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb1\",\"rightTable\":\"driver_id\",\"outputTable\":\"driver_tb2\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb2\",\"rightTable\":\"first_customer_location\",\"outputTable\":\"driver_tb3\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"origin_geohash_7\",\"rightTable\":\"destination_geohash_7\",\"outputTable\":\"customer_feast_tb1\",\"how\":\"CONCAT\"}},{\"tableJoin\":{\"leftTable\":\"customer_feast_tb1\",\"rightTable\":\"customer_id\",\"outputTable\":\"customer_feast_tb2\",\"how\":\"CONCAT\"}},{\"tableTransformation\":{\"inputTable\":\"driver_tb3\",\"outputTable\":\"driver_tb3\",\"steps\":[{\"updateColumns\":[{\"column\":\"customer_distance\",\"expression\":\"map(HaversineDistance(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'), driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude')), {# * 1000})\"},{\"column\":\"polar_angle\",\"expression\":\"PolarAngle(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'),driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude'))\"}]}]}}],\"outputs\":[{\"jsonOutput\":{\"jsonTemplate\":{\"fields\":[{\"fieldName\":\"driver_features\",\"fromTable\":{\"tableName\":\"driver_tb3\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_features\",\"fromTable\":{\"tableName\":\"customer_order\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_feast_features\",\"fromTable\":{\"tableName\":\"customer_feast_tb2\",\"format\":\"RECORD\"}},{\"fieldName\":\"bid\",\"fromJson\":{\"jsonPath\":\"$.bid\"}},{\"fieldName\":\"order_details\",\"expression\":\"JsonExtract(\\\"$.details\\\", \\\"$\\\")\"}]}}}]},\"postprocess\":{}}}" + }, + "image": "asia.gcr.io/staging/merlin-transformer:0.24.0-rc3", + "type": "standard" + }, + "version_endpoint_url": "http://id-car-b-default-582.jaeger.models.company.com/v1/models/id-car-b-default-582", + "weight": 100 + }, + "create_time": "2022-11-09T15:23:26.716082Z", + "status": "running", + "update_time": "2022-11-09T15:23:26.716082Z", + "version": "582" + } + ] + }, + "labels": {"stream": "whitebeard", "team": "pirates"}, + "lineage": { + "upstreams": [ + { + "service": "caramlstore", + "type": "feature_table", + "urn": "urn:caramlstore:test-merlin:feature_table:jaeger.jaeger_car_customer_acceptance" + }, + { + "service": "caramlstore", + "type": "feature_table", + "urn": "urn:caramlstore:test-merlin:feature_table:jaeger.jaeger_car_destination_geohash_acceptance" + }, + { + "service": "caramlstore", + "type": "feature_table", + "urn": "urn:caramlstore:test-merlin:feature_table:jaeger.jaeger_car_driver_acceptance" + }, + { + "service": "caramlstore", + "type": "feature_table", + "urn": "urn:caramlstore:test-merlin:feature_table:jaeger.jaeger_car_origin_geohash_acceptance" + }, + { + "service": "caramlstore", + "type": "feature_table", + "urn": "urn:caramlstore:test-merlin:feature_table:jaeger.jaeger_car_standing_geohash_acceptance" + } + ] + }, + "name": "id-car-b-default", + "owners": [ + { + "email": "edward.newgate@onepiece.com", + "urn": "edward.newgate@onepiece.com" + } + ], + "service": "merlin", + "type": "model", + "url": "id-car-b-default.jaeger.models.company.com", + "urn": "urn:merlin:test-merlin:model:100.284" + } +] diff --git a/plugins/extractors/merlin/testdata/mocked-model-version-1376-47.json b/plugins/extractors/merlin/testdata/mocked-model-version-1376-47.json new file mode 100644 index 000000000..0f5aedbcd --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-model-version-1376-47.json @@ -0,0 +1,172 @@ +{ + "id": 47, + "model_id": 1376, + "model": { + "id": 1376, + "name": "inca-restaurant-image", + "project_id": 100, + "mlflow_experiment_id": 1466, + "type": "pyfunc", + "mlflow_url": "", + "endpoints": null, + "created_at": "2022-05-25T04:57:24.972027Z", + "updated_at": "2022-11-10T06:13:02.181028Z" + }, + "mlflow_run_id": "f190512242584707b79831ee3d5e406c", + "mlflow_url": "http://mlflow.company.com/#/experiments/1466/runs/f190512242584707b79831ee3d5e406c", + "artifact_uri": "gs://mlp/mlflow/1466/f190512242584707b79831ee3d5e406c/artifacts", + "endpoints": [ + { + "id": "8f7c20c7-01c9-4542-8a38-be1d804d49f2", + "version_id": 47, + "model_id": 1376, + "status": "serving", + "url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "monitoring_url": "https://lens.company.com/graph/d/GZwEmWzeM/merlin-overview-dashboard?var-cluster=s-company-models-v2&var-model=inca-restaurant-image&var-model_version=inca-restaurant-image-47&var-project=inca-restaurant", + "environment": { + "id": 1, + "name": "staging", + "cluster": "s-company-models-v2", + "is_default": true, + "region": "id", + "gcp_project": "staging", + "max_cpu": "8", + "max_memory": "8Gi", + "default_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "default_transformer_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "is_prediction_job_enabled": false, + "is_default_prediction_job": null, + "default_prediction_job_resource_request": null, + "created_at": "2020-04-20T16:21:52.496669Z", + "updated_at": "2022-11-18T06:44:52.976654Z" + }, + "environment_name": "staging", + "message": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "2", + "memory_request": "1Gi" + }, + "env_vars": null, + "transformer": { + "id": "16911", + "enabled": false, + "version_endpoint_id": "8f7c20c7-01c9-4542-8a38-be1d804d49f2", + "transformer_type": "custom", + "image": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "500m", + "memory_request": "512Mi" + }, + "env_vars": [], + "created_at": "2022-11-04T06:28:18.513204Z", + "updated_at": "2022-11-04T06:31:28.404372Z" + }, + "logger": { + "model": {"enabled": false, "mode": "all"}, + "transformer": {"enabled": false, "mode": "all"} + }, + "deployment_mode": "serverless", + "autoscaling_policy": {"metrics_type": "concurrency", "target_value": 1}, + "protocol": "HTTP_JSON", + "created_at": "2022-11-04T06:28:18.536545Z", + "updated_at": "2022-11-04T06:31:28.402672Z" + }, + { + "id": "41fe8c2f-a570-4aa1-8228-0f7db368b32d", + "version_id": 47, + "model_id": 1376, + "status": "terminated", + "url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "monitoring_url": "https://lens.company.com/graph/d/JZiMaVZnk/merlin-overview-dashboard?var-cluster=s-company-global-models-v2&var-model=inca-restaurant-image&var-model_version=inca-restaurant-image-47&var-project=inca-restaurant", + "environment": { + "id": 2, + "name": "global-staging", + "cluster": "s-company-global-models-v2", + "is_default": null, + "region": "gl", + "gcp_project": "staging", + "max_cpu": "8", + "max_memory": "8Gi", + "default_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "default_transformer_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "is_prediction_job_enabled": true, + "is_default_prediction_job": true, + "default_prediction_job_resource_request": { + "driver_cpu_request": "2", + "driver_memory_request": "2Gi", + "executor_replica": 3, + "executor_cpu_request": "2", + "executor_memory_request": "2Gi" + }, + "created_at": "2020-04-20T16:21:52.504704Z", + "updated_at": "2022-11-17T10:55:35.781333Z" + }, + "environment_name": "global-staging", + "message": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "2", + "memory_request": "1Gi" + }, + "env_vars": null, + "transformer": { + "id": "16701", + "enabled": false, + "version_endpoint_id": "41fe8c2f-a570-4aa1-8228-0f7db368b32d", + "transformer_type": "custom", + "image": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "500m", + "memory_request": "512Mi" + }, + "env_vars": [], + "created_at": "2022-10-28T07:09:12.230198Z", + "updated_at": "2022-11-10T06:13:07.192331Z" + }, + "logger": { + "model": {"enabled": false, "mode": "all"}, + "transformer": {"enabled": false, "mode": "all"} + }, + "deployment_mode": "serverless", + "autoscaling_policy": {"metrics_type": "concurrency", "target_value": 1}, + "protocol": "HTTP_JSON", + "created_at": "2022-10-28T06:12:22.623595Z", + "updated_at": "2022-11-10T06:13:07.190693Z" + } + ], + "properties": null, + "labels": {"env": "staging", "ver": "2.1.0"}, + "python_version": "3.7.*", + "custom_predictor": null, + "created_at": "2022-10-28T06:11:52.148118Z", + "updated_at": "2022-10-28T06:11:52.148118Z" +} diff --git a/plugins/extractors/merlin/testdata/mocked-model-version-284-582.json b/plugins/extractors/merlin/testdata/mocked-model-version-284-582.json new file mode 100644 index 000000000..d3c24764e --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-model-version-284-582.json @@ -0,0 +1,151 @@ +{ + "id": 582, + "model_id": 284, + "model": { + "id": 284, + "name": "id-car-b-default", + "project_id": 100, + "mlflow_experiment_id": 287, + "type": "pyfunc", + "mlflow_url": "", + "endpoints": null, + "created_at": "2020-06-23T14:40:23.168186Z", + "updated_at": "2022-11-17T15:23:55.621564Z" + }, + "mlflow_run_id": "543f674167704d94bbc03025e50b78b7", + "mlflow_url": "http://mlflow.company.com/#/experiments/287/runs/543f674167704d94bbc03025e50b78b7", + "artifact_uri": "gs://mlp/mlflow/287/543f674167704d94bbc03025e50b78b7/artifacts", + "endpoints": [ + { + "id": "e355ce59-eb2b-4c75-883d-c517ad530cf0", + "version_id": 582, + "model_id": 284, + "status": "terminated", + "url": "http://id-car-b-default-582.jaeger.models.company.com/v1/models/id-car-b-default-582", + "service_name": "id-car-b-default-582.jaeger.models.company.com", + "monitoring_url": "https://lens.company.com/graph/d/GZwEmWzeM/merlin-overview-dashboard?var-cluster=s-company-models-v2&var-model=id-car-b-default&var-model_version=id-car-b-default-582&var-project=jaeger", + "environment": { + "id": 1, + "name": "staging", + "cluster": "s-company-models-v2", + "is_default": true, + "region": "id", + "gcp_project": "staging", + "max_cpu": "8", + "max_memory": "8Gi", + "default_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "default_transformer_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "is_prediction_job_enabled": false, + "is_default_prediction_job": null, + "default_prediction_job_resource_request": null, + "created_at": "2020-04-20T16:21:52.496669Z", + "updated_at": "2022-11-18T06:44:52.976654Z" + }, + "environment_name": "staging", + "message": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "2", + "memory_request": "200Mi" + }, + "env_vars": [ + {"name": "WORKERS", "value": "1"}, + {"name": "SERVICE_TYPE", "value": "car"}, + {"name": "PEAK_HOURS", "value": "6,7,8,16,17,18,19"}, + {"name": "COUNTRY", "value": "id"}, + {"name": "MODEL_VERSION", "value": "B"} + ], + "transformer": { + "id": "17069", + "enabled": true, + "version_endpoint_id": "e355ce59-eb2b-4c75-883d-c517ad530cf0", + "transformer_type": "standard", + "image": "asia.gcr.io/staging/merlin-transformer:0.24.0-rc3", + "resource_request": { + "min_replica": 0, + "max_replica": 4, + "cpu_request": "4", + "memory_request": "4Gi" + }, + "env_vars": [ + {"name": "APP_NAME", "value": "jaeger-cancellation-id-car-b"}, + {"name": "GOOGLE_CLOUD_PROJECT", "value": "staging"}, + {"name": "COUNTRY", "value": "id"}, + {"name": "MODEL_VERSION", "value": "B"}, + {"name": "SERVICE_TYPE", "value": "car"}, + {"name": "PEAK_HOURS", "value": "6,7,8,16,17,18,19"}, + { + "name": "FEAST_SERVING_HOST", + "value": "online-serving.s.feast.company.com" + }, + {"name": "FEAST_SERVING_PORT", "value": "15010"}, + {"name": "FEAST_PROJECT", "value": "jaeger"}, + {"name": "FEAST_CACHE_ENABLED", "value": "True"}, + {"name": "FEAST_BATCH_SIZE", "value": "90"}, + {"name": "FEAST_CACHE_DURATION_SECOND", "value": "60"}, + {"name": "FEAST_FEATURE_STATUS_MONITORING_ENABLED", "value": "True"}, + {"name": "FEAST_FEATURE_VALUE_MONITORING_ENABLED", "value": "False"}, + {"name": "JAEGER_SAMPLER_TYPE", "value": "probabilistic"}, + {"name": "JAEGER_SAMPLER_PARAM", "value": "0.01"}, + {"name": "JAEGER_DISABLED", "value": "False"}, + { + "name": "MLFLOW_TRACKING_URI", + "value": "http://jaeger.mlflow.global.company.com/" + }, + { + "name": "MLFLOW_RUN_ID", "value": "543f674167704d94bbc03025e50b78b7" + }, + {"name": "MLFLOW_DOWNLOAD_PATH", "value": "/data"}, + { + "name": "ORDER_FEATURE_CONFIG_PATH", + "value": "/data/order_features.json" + }, + { + "name": "FEAST_FEATURE_CONFIG_PATH", + "value": "/data/features.json" + }, + { + "name": "PUBLIC_HOLIDAY_FILE", + "value": "/data/public_holidays_feast_0_9.csv" + }, + {"name": "INIT_HEAP_SIZE_IN_MB", "value": "300"}, + { + "name": "STANDARD_TRANSFORMER_CONFIG", + "value": "{\"transformerConfig\":{\"preprocess\":{\"inputs\":[{\"tables\":[{\"name\":\"customer_order\",\"columns\":[{\"name\":\"order_id\",\"fromJson\":{\"jsonPath\":\"$.order_id\"}},{\"name\":\"service_area_id\",\"fromJson\":{\"jsonPath\":\"$.service_area_id\"}},{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"end_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.latitude\"}},{\"name\":\"end_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.longitude\"}}]},{\"name\":\"driver_order\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers\"}},\"columns\":[{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"driver_latitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.latitude\"}},{\"name\":\"driver_longitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.longitude\"}},{\"name\":\"driver_elevation\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.elevation\"}},{\"name\":\"driver_gps_accuracy\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.gps_accuracy\"}},{\"name\":\"standing_geohash_7\",\"expression\":\"Geohash(\\\"$.drivers[*].driver_location.latitude\\\", \\\"$.drivers[*].driver_location.longitude\\\", 7)\"}]},{\"name\":\"first_customer_location\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers[*].first_customer_drop_off_location\"}},\"columns\":[{\"name\":\"driver_id\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_id\"}}]}]},{\"feast\":[{\"project\":\"jaeger\",\"entities\":[{\"name\":\"origin_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.from.latitude\\\", \\\"$.bid.from.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"origin_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"destination_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.to.latitude\\\", \\\"$.bid.to.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"destination_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"standing_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.drivers.driver_location.latitude\\\", \\\"$.drivers.driver_location.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"standing_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"driver_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.drivers.driver_id\"}],\"features\":[{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"driver_id\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"customer_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.customer_id\"}],\"features\":[{\"name\":\"jaeger_car_customer_acceptance:customer_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"}],\"tableName\":\"customer_id\",\"source\":\"BIGTABLE\"}]}],\"transformations\":[{\"tableJoin\":{\"leftTable\":\"driver_order\",\"rightTable\":\"standing_geohash_7\",\"outputTable\":\"driver_tb1\",\"how\":\"LEFT\",\"onColumn\":\"standing_geohash_7\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb1\",\"rightTable\":\"driver_id\",\"outputTable\":\"driver_tb2\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb2\",\"rightTable\":\"first_customer_location\",\"outputTable\":\"driver_tb3\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"origin_geohash_7\",\"rightTable\":\"destination_geohash_7\",\"outputTable\":\"customer_feast_tb1\",\"how\":\"CONCAT\"}},{\"tableJoin\":{\"leftTable\":\"customer_feast_tb1\",\"rightTable\":\"customer_id\",\"outputTable\":\"customer_feast_tb2\",\"how\":\"CONCAT\"}},{\"tableTransformation\":{\"inputTable\":\"driver_tb3\",\"outputTable\":\"driver_tb3\",\"steps\":[{\"updateColumns\":[{\"column\":\"customer_distance\",\"expression\":\"map(HaversineDistance(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'), driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude')), {# * 1000})\"},{\"column\":\"polar_angle\",\"expression\":\"PolarAngle(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'),driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude'))\"}]}]}}],\"outputs\":[{\"jsonOutput\":{\"jsonTemplate\":{\"fields\":[{\"fieldName\":\"driver_features\",\"fromTable\":{\"tableName\":\"driver_tb3\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_features\",\"fromTable\":{\"tableName\":\"customer_order\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_feast_features\",\"fromTable\":{\"tableName\":\"customer_feast_tb2\",\"format\":\"RECORD\"}},{\"fieldName\":\"bid\",\"fromJson\":{\"jsonPath\":\"$.bid\"}},{\"fieldName\":\"order_details\",\"expression\":\"JsonExtract(\\\"$.details\\\", \\\"$\\\")\"}]}}}]},\"postprocess\":{}}}" + }, + { + "name": "FEAST_FEATURE_TABLE_SPECS_JSONS", + "value": "[{\"name\":\"jaeger_car_origin_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"origin_geohash_7\"]},{\"name\":\"jaeger_car_destination_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"destination_geohash_7\"]},{\"name\":\"jaeger_car_standing_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"standing_geohash_7\"]},{\"name\":\"jaeger_car_driver_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"driver_id\"]},{\"name\":\"jaeger_car_customer_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"customer_id\"]}]" + } + ], + "created_at": "2022-11-09T15:23:47.27563Z", + "updated_at": "2022-11-10T15:32:28.24309Z" + }, + "deployment_mode": "serverless", + "autoscaling_policy": { + "metrics_type": "concurrency", + "target_value": 1 + }, + "protocol": "HTTP_JSON", + "created_at": "2022-11-09T15:23:47.296498Z", + "updated_at": "2022-11-10T15:32:28.241306Z" + } + ], + "properties": null, + "labels": null, + "python_version": "3.7.*", + "custom_predictor": null, + "created_at": "2022-11-09T15:23:26.716082Z", + "updated_at": "2022-11-09T15:23:26.716082Z" +} diff --git a/plugins/extractors/merlin/testdata/mocked-model-version-689-7.json b/plugins/extractors/merlin/testdata/mocked-model-version-689-7.json new file mode 100644 index 000000000..282cd172e --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-model-version-689-7.json @@ -0,0 +1,111 @@ +{ + "id": 7, + "model_id": 689, + "model": { + "id": 689, + "name": "pyfunc-standard-transfo", + "project_id": 1, + "mlflow_experiment_id": 692, + "type": "pyfunc", + "mlflow_url": "", + "endpoints": null, + "created_at": "2021-02-04T13:05:31.593956Z", + "updated_at": "2021-07-22T08:13:34.642949Z" + }, + "mlflow_run_id": "061ew38v3b7kp088s9b49kzr68v5ixvm3", + "mlflow_url": "http://mlflow.company.com/#/experiments/692/runs/061ew38v3b7kp088s9b49kzr68v5ixvm3", + "artifact_uri": "gs://s-gods-mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts", + "endpoints": [ + { + "id": "81dbbb50-8cea-48cf-85c9-2ccdfcb91d24", + "version_id": 7, + "model_id": 689, + "status": "terminated", + "url": "http://pyfunc-standard-transfo-7.sample.models.company.com/v1/models/pyfunc-standard-transfo-7", + "service_name": "pyfunc-standard-transfo-7-predictor-default.sample.models.company.com", + "monitoring_url": "https://lens.company.com/graph/d/GZwEmWzeM/merlin-overview-dashboard?var-cluster=s-company-models-v2&var-model=pyfunc-standard-transfo&var-model_version=pyfunc-standard-transfo-7&var-project=sample", + "environment": { + "id": 1, + "name": "staging", + "cluster": "s-company-models-v2", + "is_default": true, + "region": "id", + "gcp_project": "staging", + "max_cpu": "8", + "max_memory": "8Gi", + "default_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "default_transformer_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "is_prediction_job_enabled": false, + "is_default_prediction_job": null, + "default_prediction_job_resource_request": null, + "created_at": "2020-04-20T16:21:52.496669Z", + "updated_at": "2022-11-18T02:38:14.852768Z" + }, + "environment_name": "staging", + "message": "", + "resource_request": { + "min_replica": 0, + "max_replica": 4, + "cpu_request": "2", + "memory_request": "1Gi" + }, + "env_vars": [ + {"name": "MODEL_NAME", "value": "pyfunc-standard-transfo-7"}, + { + "name": "MODEL_DIR", + "value": "gs://s-gods-mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model" + }, + {"name": "WORKERS", "value": "1"} + ], + "transformer": { + "id": "1707", + "enabled": true, + "version_endpoint_id": "81dbbb50-8cea-48cf-85c9-2ccdfcb91d24", + "transformer_type": "standard", + "image": "asia.gcr.io/staging/merlin-transformer:v0.10.0-rc2", + "resource_request": { + "min_replica": 0, + "max_replica": 1, + "cpu_request": "1", + "memory_request": "200Mi" + }, + "env_vars": [ + {"name": "LOG_LEVEL", "value": "DEBUG"}, + { + "name": "STANDARD_TRANSFORMER_CONFIG", + "value": "{\"transformerConfig\":{\"feast\":[{\"project\":\"default\",\"entities\":[{\"name\":\"merchant_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.merchants[*].id\"}],\"features\":[{\"name\":\"merchant_t1_discovery:t1_estimate\",\"valueType\":\"DOUBLE\",\"defaultValue\":\"0\"}]}]}}" + }, + {"name": "FEAST_FEATURE_STATUS_MONITORING_ENABLED", "value": "true"}, + {"name": "FEAST_FEATURE_VALUE_MONITORING_ENABLED", "value": "true"} + ], + "created_at": "2021-02-05T05:26:42.759879Z", + "updated_at": "2021-07-22T08:13:38.013914Z" + }, + "logger": { + "model": {"enabled": true, "mode": "all"}, + "transformer": null + }, + "deployment_mode": "serverless", + "autoscaling_policy": null, + "protocol": "", + "created_at": "2021-02-05T05:26:42.768235Z", + "updated_at": "2021-07-22T08:13:38.012912Z" + } + ], + "properties": null, + "labels": null, + "python_version": "3.7.*", + "custom_predictor": null, + "created_at": "2021-02-05T05:26:38.212696Z", + "updated_at": "2021-02-05T05:26:38.212696Z" +} diff --git a/plugins/extractors/merlin/testdata/mocked-model-version-80-2.json b/plugins/extractors/merlin/testdata/mocked-model-version-80-2.json new file mode 100644 index 000000000..1aeed2a8b --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-model-version-80-2.json @@ -0,0 +1,96 @@ +{ + "id": 2, + "model_id": 80, + "model": { + "id": 80, + "name": "resource-request", + "project_id": 1, + "mlflow_experiment_id": 83, + "type": "sklearn", + "mlflow_url": "", + "endpoints": null, + "created_at": "2020-04-23T10:07:40.105711Z", + "updated_at": "2022-06-14T07:12:09.536419Z" + }, + "mlflow_run_id": "061ew38v3b7kp088s9b49kzr68v5ixvm3", + "mlflow_url": "http://mlflow.company.com/#/experiments/83/runs/061ew38v3b7kp088s9b49kzr68v5ixvm3", + "artifact_uri": "gs://mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model", + "endpoints": [ + { + "id": "01fee5b9-1fd9-4664-8c2d-52073af405f8", + "version_id": 2, + "model_id": 80, + "status": "terminated", + "url": "http://resource-request-2.sample.models.company.com/v1/models/resource-request-2", + "service_name": "resource-request-2-predictor-default.sample.models.company.com", + "monitoring_url": "https://lens.company.com/graph/d/yqCUcuYQQ/merlin-overview-dashboard?var-cluster=s-company-models-v2&var-model=resource-request&var-model_version=resource-request-2&var-project=sample", + "environment": { + "id": 1, + "name": "staging", + "cluster": "s-company-models-v2", + "is_default": true, + "region": "id", + "gcp_project": "company-staging", + "max_cpu": "8", + "max_memory": "8Gi", + "default_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "default_transformer_resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "is_prediction_job_enabled": false, + "is_default_prediction_job": null, + "default_prediction_job_resource_request": null, + "created_at": "2020-04-20T16:21:52.496669Z", + "updated_at": "2022-11-18T02:38:14.852768Z" + }, + "environment_name": "staging", + "message": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "1", + "memory_request": "1Gi" + }, + "env_vars": null, + "transformer": { + "id": "11930", + "enabled": false, + "version_endpoint_id": "01fee5b9-1fd9-4664-8c2d-52073af405f8", + "transformer_type": "custom", + "image": "", + "resource_request": { + "min_replica": 0, + "max_replica": 2, + "cpu_request": "500m", + "memory_request": "512Mi" + }, + "env_vars": [], + "created_at": "2022-05-30T07:52:32.790496Z", + "updated_at": "2022-06-14T07:12:13.513532Z" + }, + "logger": { + "model": {"enabled": false, "mode": "all"}, + "transformer": {"enabled": false, "mode": "all"} + }, + "deployment_mode": "serverless", + "autoscaling_policy": null, + "protocol": "", + "created_at": "2022-05-30T07:43:21.252389Z", + "updated_at": "2022-06-14T07:12:13.512746Z" + } + ], + "properties": null, + "labels": null, + "python_version": "3.7.*", + "custom_predictor": null, + "created_at": "2022-05-30T07:43:18.120248Z", + "updated_at": "2022-05-30T07:43:18.120248Z" +} diff --git a/plugins/extractors/merlin/testdata/mocked-models-1.json b/plugins/extractors/merlin/testdata/mocked-models-1.json new file mode 100644 index 000000000..49b67283a --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-models-1.json @@ -0,0 +1,138 @@ +[ + { + "id": 12, + "project_id": 1, + "mlflow_experiment_id": 12, + "name": "model.bst", + "type": "xgboost", + "mlflow_url": "http://mlflow.company.com/#/experiments/12", + "endpoints": [], + "created_at": "2020-01-10T08:18:46.887143Z", + "updated_at": "2020-01-10T08:18:46.887143Z" + }, + { + "id": 80, + "project_id": 1, + "mlflow_experiment_id": 83, + "name": "resource-request", + "type": "sklearn", + "mlflow_url": "http://mlflow.company.com/#/experiments/83", + "endpoints": [ + { + "id": 450, + "status": "terminated", + "url": "resource-request.sample.models.company.com", + "rule": { + "destinations": [ + { + "version_endpoint_id": "01fee5b9-1fd9-4664-8c2d-52073af405f8", + "version_endpoint": { + "id": "01fee5b9-1fd9-4664-8c2d-52073af405f8", + "version_id": 2, + "status": "running", + "url": "http://resource-request-2.sample.models.company.com/v1/models/resource-request-2", + "service_name": "resource-request-2-predictor-default.sample.models.company.com", + "environment_name": "staging", + "monitoring_url": "", + "message": "", + "env_vars": null, + "transformer": { + "enabled": false, + "transformer_type": "custom", + "image": "", + "command": "", + "args": "", + "env_vars": [], + "created_at": "2022-05-30T07:52:32.790496Z", + "updated_at": "2022-05-30T07:52:51.047734Z" + }, + "deployment_mode": "", + "created_at": "2022-05-30T07:43:21.252389Z", + "updated_at": "2022-05-30T07:52:51.046666Z" + }, + "weight": 100 + } + ] + }, + "environment_name": "staging", + "created_at": "2022-05-30T07:54:04.137016Z", + "updated_at": "2022-06-14T07:12:09.538048Z" + } + ], + "created_at": "2020-04-23T10:07:40.105711Z", + "updated_at": "2022-06-14T07:12:09.536419Z" + }, + { + "id": 689, + "project_id": 1, + "mlflow_experiment_id": 692, + "name": "pyfunc-standard-transfo", + "type": "pyfunc", + "mlflow_url": "http://mlflow.company.com/#/experiments/692", + "endpoints": [ + { + "id": 174, + "status": "terminated", + "url": "pyfunc-standard-transfo.sample.models.company.com", + "rule": { + "destinations": [ + { + "version_endpoint_id": "81dbbb50-8cea-48cf-85c9-2ccdfcb91d24", + "version_endpoint": { + "id": "81dbbb50-8cea-48cf-85c9-2ccdfcb91d24", + "version_id": 7, + "status": "running", + "url": "http://pyfunc-standard-transfo-7.sample.models.company.com/v1/models/pyfunc-standard-transfo-7", + "service_name": "pyfunc-standard-transfo-7-predictor-default.sample.models.company.com", + "environment_name": "staging", + "monitoring_url": "", + "message": "", + "env_vars": [ + {"name": "MODEL_NAME", "value": "pyfunc-standard-transfo-7"}, + { + "name": "MODEL_DIR", + "value": "gs://mlp/mlflow/692/061ew38v3b7kp088s9b49kzr68v5ixvm3/artifacts/model" + }, + {"name": "WORKERS", "value": "1"} + ], + "transformer": { + "enabled": true, + "transformer_type": "standard", + "image": "asia.gcr.io/company-staging/merlin-transformer:v0.10.0-rc2", + "command": "", + "args": "", + "env_vars": [ + {"name": "LOG_LEVEL", "value": "DEBUG"}, + { + "name": "STANDARD_TRANSFORMER_CONFIG", + "value": "{\"transformerConfig\":{\"feast\":[{\"project\":\"default\",\"entities\":[{\"name\":\"merchant_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.merchants[*].id\"}],\"features\":[{\"name\":\"merchant_t1_discovery:t1_estimate\",\"valueType\":\"DOUBLE\",\"defaultValue\":\"0\"}]}]}}" + }, + { + "name": "FEAST_FEATURE_STATUS_MONITORING_ENABLED", + "value": "true" + }, + { + "name": "FEAST_FEATURE_VALUE_MONITORING_ENABLED", + "value": "true" + } + ], + "created_at": "2021-02-05T05:26:42.759879Z", + "updated_at": "2021-02-05T05:40:20.092802Z" + }, + "deployment_mode": "", + "created_at": "2021-02-05T05:26:42.768235Z", + "updated_at": "2021-02-05T05:40:20.091784Z" + }, + "weight": 100 + } + ] + }, + "environment_name": "staging", + "created_at": "2021-02-05T08:33:26.204561Z", + "updated_at": "2021-07-22T08:13:34.64483Z" + } + ], + "created_at": "2021-02-04T13:05:31.593956Z", + "updated_at": "2021-07-22T08:13:34.642949Z" + } +] diff --git a/plugins/extractors/merlin/testdata/mocked-models-100.json b/plugins/extractors/merlin/testdata/mocked-models-100.json new file mode 100644 index 000000000..dae674a81 --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-models-100.json @@ -0,0 +1,454 @@ +[ + { + "created_at": "2022-05-25T04:57:24.972027Z", + "endpoints": [ + { + "created_at": "2022-11-04T06:31:28.372368Z", + "environment": { + "cluster": "s-company-models-v2", + "created_at": "2020-04-20T16:21:52.496669Z", + "default_prediction_job_resource_request": null, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 1, + "is_default": true, + "is_default_prediction_job": null, + "is_prediction_job_enabled": false, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "staging", + "region": "id", + "updated_at": "2022-11-10T12:17:04.445183Z" + }, + "environment_name": "staging", + "id": 569, + "model": null, + "model_id": 1376, + "protocol": "HTTP_JSON", + "rule": { + "destinations": [ + { + "version_endpoint": { + "autoscaling_policy": { + "metrics_type": "concurrency", + "target_value": 1 + }, + "created_at": "2022-11-04T06:28:18.536545Z", + "deployment_mode": "serverless", + "env_vars": null, + "environment": { + "cluster": "s-company-models-v2", + "created_at": "2020-04-20T16:21:52.496669Z", + "default_prediction_job_resource_request": null, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 1, + "is_default": true, + "is_default_prediction_job": null, + "is_prediction_job_enabled": false, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "staging", + "region": "id", + "updated_at": "2022-11-04T06:30:30.935493Z" + }, + "environment_name": "staging", + "id": "8f7c20c7-01c9-4542-8a38-be1d804d49f2", + "logger": { + "model": {"enabled": false, "mode": "all"}, + "transformer": {"enabled": false, "mode": "all"} + }, + "message": "", + "model_id": 1376, + "protocol": "HTTP_JSON", + "resource_request": { + "cpu_request": "2", + "max_replica": 2, + "memory_request": "1Gi", + "min_replica": 0 + }, + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "status": "running", + "transformer": { + "created_at": "2022-11-04T06:28:18.513204Z", + "enabled": false, + "env_vars": [], + "id": "16911", + "image": "", + "resource_request": { + "cpu_request": "500m", + "max_replica": 2, + "memory_request": "512Mi", + "min_replica": 0 + }, + "transformer_type": "custom", + "updated_at": "2022-11-04T06:30:30.939953Z", + "version_endpoint_id": "8f7c20c7-01c9-4542-8a38-be1d804d49f2" + }, + "updated_at": "2022-11-04T06:30:30.937905Z", + "url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "version_id": 47 + }, + "version_endpoint_id": "8f7c20c7-01c9-4542-8a38-be1d804d49f2", + "weight": 100 + } + ] + }, + "status": "serving", + "updated_at": "2022-11-04T06:31:28.372368Z", + "url": "inca-restaurant-image.inca-restaurant.models.company.com" + }, + { + "created_at": "2022-05-25T06:53:28.075479Z", + "environment": { + "cluster": "s-company-global-models-v2", + "created_at": "2020-04-20T16:21:52.504704Z", + "default_prediction_job_resource_request": { + "driver_cpu_request": "2", + "driver_memory_request": "2Gi", + "executor_cpu_request": "2", + "executor_memory_request": "2Gi", + "executor_replica": 3 + }, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 2, + "is_default": null, + "is_default_prediction_job": true, + "is_prediction_job_enabled": true, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "global-staging", + "region": "gl", + "updated_at": "2022-11-10T06:13:07.188409Z" + }, + "environment_name": "global-staging", + "id": 443, + "model": null, + "model_id": 1376, + "protocol": "HTTP_JSON", + "rule": { + "destinations": [ + { + "version_endpoint": { + "autoscaling_policy": { + "metrics_type": "concurrency", + "target_value": 1 + }, + "created_at": "2022-10-28T06:12:22.623595Z", + "deployment_mode": "serverless", + "env_vars": null, + "environment": { + "cluster": "s-company-global-models-v2", + "created_at": "2020-04-20T16:21:52.504704Z", + "default_prediction_job_resource_request": { + "driver_cpu_request": "2", + "driver_memory_request": "2Gi", + "executor_cpu_request": "2", + "executor_memory_request": "2Gi", + "executor_replica": 3 + }, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 2, + "is_default": null, + "is_default_prediction_job": true, + "is_prediction_job_enabled": true, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "global-staging", + "region": "gl", + "updated_at": "2022-11-04T06:33:09.44217Z" + }, + "environment_name": "global-staging", + "id": "41fe8c2f-a570-4aa1-8228-0f7db368b32d", + "logger": { + "model": {"enabled": false, "mode": "all"}, + "transformer": {"enabled": false, "mode": "all"} + }, + "message": "", + "model_id": 1376, + "protocol": "HTTP_JSON", + "resource_request": { + "cpu_request": "2", + "max_replica": 2, + "memory_request": "1Gi", + "min_replica": 0 + }, + "service_name": "inca-restaurant-image-47.inca-restaurant.models.company.com", + "status": "running", + "transformer": { + "created_at": "2022-10-28T07:09:12.230198Z", + "enabled": false, + "env_vars": [], + "id": "16701", + "image": "", + "resource_request": { + "cpu_request": "500m", + "max_replica": 2, + "memory_request": "512Mi", + "min_replica": 0 + }, + "transformer_type": "custom", + "updated_at": "2022-11-04T06:33:09.446768Z", + "version_endpoint_id": "41fe8c2f-a570-4aa1-8228-0f7db368b32d" + }, + "updated_at": "2022-11-04T06:33:09.44496Z", + "url": "http://inca-restaurant-image-47.inca-restaurant.models.company.com/v1/models/inca-restaurant-image-47", + "version_id": 47 + }, + "version_endpoint_id": "41fe8c2f-a570-4aa1-8228-0f7db368b32d", + "weight": 100 + } + ] + }, + "status": "terminated", + "updated_at": "2022-11-10T06:13:02.185052Z", + "url": "inca-restaurant-image.inca-restaurant.models.company.com" + } + ], + "id": 1376, + "mlflow_experiment_id": 1466, + "mlflow_url": "http://mlflow.company.com/#/experiments/1466", + "name": "inca-restaurant-image", + "project_id": 100, + "type": "pyfunc", + "updated_at": "2022-11-10T06:13:02.181028Z" + }, + { + "created_at": "2020-06-23T14:40:23.168186Z", + "endpoints": [ + { + "created_at": "2020-06-23T14:48:16.864671Z", + "environment": { + "cluster": "s-company-models-v2", + "created_at": "2020-04-20T16:21:52.496669Z", + "default_prediction_job_resource_request": null, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 1, + "is_default": true, + "is_default_prediction_job": null, + "is_prediction_job_enabled": false, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "staging", + "region": "id", + "updated_at": "2022-11-10T12:17:04.445183Z" + }, + "environment_name": "staging", + "id": 63, + "model": null, + "model_id": 284, + "protocol": "HTTP_JSON", + "rule": { + "destinations": [ + { + "version_endpoint": { + "autoscaling_policy": { + "metrics_type": "concurrency", + "target_value": 1 + }, + "created_at": "2022-11-09T15:23:47.296498Z", + "deployment_mode": "serverless", + "env_vars": [ + {"name": "WORKERS", "value": "1"}, + {"name": "SERVICE_TYPE", "value": "car"}, + {"name": "PEAK_HOURS", "value": "6,7,8,16,17,18,19"}, + {"name": "COUNTRY", "value": "id"}, + {"name": "MODEL_VERSION", "value": "B"} + ], + "environment": { + "cluster": "s-company-models-v2", + "created_at": "2020-04-20T16:21:52.496669Z", + "default_prediction_job_resource_request": null, + "default_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "default_transformer_resource_request": { + "cpu_request": "2", + "max_replica": 4, + "memory_request": "1Gi", + "min_replica": 0 + }, + "gcp_project": "staging", + "id": 1, + "is_default": true, + "is_default_prediction_job": null, + "is_prediction_job_enabled": false, + "max_cpu": "8", + "max_memory": "8Gi", + "name": "staging", + "region": "id", + "updated_at": "2022-11-09T15:33:56.806113Z" + }, + "environment_name": "staging", + "id": "e355ce59-eb2b-4c75-883d-c517ad530cf0", + "message": "", + "model_id": 284, + "protocol": "HTTP_JSON", + "resource_request": { + "cpu_request": "2", + "max_replica": 2, + "memory_request": "200Mi", + "min_replica": 0 + }, + "service_name": "id-car-b-default-582.jaeger.models.company.com", + "status": "running", + "transformer": { + "created_at": "2022-11-09T15:23:47.27563Z", + "enabled": true, + "env_vars": [ + { + "name": "APP_NAME", + "value": "jaeger-cancellation-id-car-b" + }, + {"name": "GOOGLE_CLOUD_PROJECT", "value": "staging"}, + {"name": "COUNTRY", "value": "id"}, + {"name": "MODEL_VERSION", "value": "B"}, + {"name": "SERVICE_TYPE", "value": "car"}, + {"name": "PEAK_HOURS", "value": "6,7,8,16,17,18,19"}, + { + "name": "FEAST_SERVING_HOST", + "value": "online-serving.s.feast.company.com" + }, + {"name": "FEAST_SERVING_PORT", "value": "15010"}, + {"name": "FEAST_PROJECT", "value": "jaeger"}, + {"name": "FEAST_CACHE_ENABLED", "value": "True"}, + {"name": "FEAST_BATCH_SIZE", "value": "90"}, + {"name": "FEAST_CACHE_DURATION_SECOND", "value": "60"}, + { + "name": "FEAST_FEATURE_STATUS_MONITORING_ENABLED", + "value": "True" + }, + { + "name": "FEAST_FEATURE_VALUE_MONITORING_ENABLED", + "value": "False" + }, + {"name": "JAEGER_SAMPLER_TYPE", "value": "probabilistic"}, + {"name": "JAEGER_SAMPLER_PARAM", "value": "0.01"}, + {"name": "JAEGER_DISABLED", "value": "False"}, + { + "name": "MLFLOW_TRACKING_URI", + "value": "http://jaeger.mlflow.global.company.com/" + }, + { + "name": "MLFLOW_RUN_ID", + "value": "543f674167704d94bbc03025e50b78b7" + }, + {"name": "MLFLOW_DOWNLOAD_PATH", "value": "/data"}, + { + "name": "ORDER_FEATURE_CONFIG_PATH", + "value": "/data/order_features.json" + }, + { + "name": "FEAST_FEATURE_CONFIG_PATH", + "value": "/data/features.json" + }, + { + "name": "PUBLIC_HOLIDAY_FILE", + "value": "/data/public_holidays_feast_0_9.csv" + }, + {"name": "INIT_HEAP_SIZE_IN_MB", "value": "300"}, + { + "name": "STANDARD_TRANSFORMER_CONFIG", + "value": "{\"transformerConfig\":{\"preprocess\":{\"inputs\":[{\"tables\":[{\"name\":\"customer_order\",\"columns\":[{\"name\":\"order_id\",\"fromJson\":{\"jsonPath\":\"$.order_id\"}},{\"name\":\"service_area_id\",\"fromJson\":{\"jsonPath\":\"$.service_area_id\"}},{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"end_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.latitude\"}},{\"name\":\"end_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.to.longitude\"}}]},{\"name\":\"driver_order\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers\"}},\"columns\":[{\"name\":\"order_time\",\"fromJson\":{\"jsonPath\":\"$.order_time\"}},{\"name\":\"start_latitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.latitude\"}},{\"name\":\"start_longitude\",\"fromJson\":{\"jsonPath\":\"$.bid.from.longitude\"}},{\"name\":\"driver_latitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.latitude\"}},{\"name\":\"driver_longitude\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.longitude\"}},{\"name\":\"driver_elevation\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.elevation\"}},{\"name\":\"driver_gps_accuracy\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_location.gps_accuracy\"}},{\"name\":\"standing_geohash_7\",\"expression\":\"Geohash(\\\"$.drivers[*].driver_location.latitude\\\", \\\"$.drivers[*].driver_location.longitude\\\", 7)\"}]},{\"name\":\"first_customer_location\",\"baseTable\":{\"fromJson\":{\"jsonPath\":\"$.drivers[*].first_customer_drop_off_location\"}},\"columns\":[{\"name\":\"driver_id\",\"fromJson\":{\"jsonPath\":\"$.drivers[*].driver_id\"}}]}]},{\"feast\":[{\"project\":\"jaeger\",\"entities\":[{\"name\":\"origin_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.from.latitude\\\", \\\"$.bid.from.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_origin_geohash_acceptance:origin_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"origin_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"destination_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.bid.to.latitude\\\", \\\"$.bid.to.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_destination_geohash_acceptance:destination_geohash_7_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"destination_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"standing_geohash_7\",\"valueType\":\"STRING\",\"udf\":\"Geohash(\\\"$.drivers.driver_location.latitude\\\", \\\"$.drivers.driver_location.longitude\\\", 7)\"}],\"features\":[{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_polar_angle_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_standing_geohash_acceptance:standing_geohash_7_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"standing_geohash_7\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"driver_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.drivers.driver_id\"}],\"features\":[{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_standing_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_num_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_prop_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_avg_customer_distance_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_accepted_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_driver_acceptance:driver_id_donut_count_rej_ignore_recent\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"}],\"tableName\":\"driver_id\",\"source\":\"BIGTABLE\"},{\"project\":\"jaeger\",\"entities\":[{\"name\":\"customer_id\",\"valueType\":\"STRING\",\"jsonPath\":\"$.customer_id\"}],\"features\":[{\"name\":\"jaeger_car_customer_acceptance:customer_id_num_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_prop_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_accepted\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_avg_customer_distance_rej_ignore\",\"valueType\":\"FLOAT\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_origin_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_accepted_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_1\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_2\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"},{\"name\":\"jaeger_car_customer_acceptance:customer_id_destination_rej_ignore_3\",\"valueType\":\"INT64\",\"defaultValue\":\"-1\"}],\"tableName\":\"customer_id\",\"source\":\"BIGTABLE\"}]}],\"transformations\":[{\"tableJoin\":{\"leftTable\":\"driver_order\",\"rightTable\":\"standing_geohash_7\",\"outputTable\":\"driver_tb1\",\"how\":\"LEFT\",\"onColumn\":\"standing_geohash_7\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb1\",\"rightTable\":\"driver_id\",\"outputTable\":\"driver_tb2\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"driver_tb2\",\"rightTable\":\"first_customer_location\",\"outputTable\":\"driver_tb3\",\"how\":\"LEFT\",\"onColumn\":\"driver_id\"}},{\"tableJoin\":{\"leftTable\":\"origin_geohash_7\",\"rightTable\":\"destination_geohash_7\",\"outputTable\":\"customer_feast_tb1\",\"how\":\"CONCAT\"}},{\"tableJoin\":{\"leftTable\":\"customer_feast_tb1\",\"rightTable\":\"customer_id\",\"outputTable\":\"customer_feast_tb2\",\"how\":\"CONCAT\"}},{\"tableTransformation\":{\"inputTable\":\"driver_tb3\",\"outputTable\":\"driver_tb3\",\"steps\":[{\"updateColumns\":[{\"column\":\"customer_distance\",\"expression\":\"map(HaversineDistance(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'), driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude')), {# * 1000})\"},{\"column\":\"polar_angle\",\"expression\":\"PolarAngle(driver_tb3.Col('driver_latitude'), driver_tb3.Col('driver_longitude'),driver_tb3.Col('start_latitude'), driver_tb3.Col('start_longitude'))\"}]}]}}],\"outputs\":[{\"jsonOutput\":{\"jsonTemplate\":{\"fields\":[{\"fieldName\":\"driver_features\",\"fromTable\":{\"tableName\":\"driver_tb3\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_features\",\"fromTable\":{\"tableName\":\"customer_order\",\"format\":\"RECORD\"}},{\"fieldName\":\"customer_feast_features\",\"fromTable\":{\"tableName\":\"customer_feast_tb2\",\"format\":\"RECORD\"}},{\"fieldName\":\"bid\",\"fromJson\":{\"jsonPath\":\"$.bid\"}},{\"fieldName\":\"order_details\",\"expression\":\"JsonExtract(\\\"$.details\\\", \\\"$\\\")\"}]}}}]},\"postprocess\":{}}}" + }, + { + "name": "FEAST_FEATURE_TABLE_SPECS_JSONS", + "value": "[{\"name\":\"jaeger_car_origin_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"origin_geohash_7\"]},{\"name\":\"jaeger_car_destination_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"destination_geohash_7\"]},{\"name\":\"jaeger_car_standing_geohash_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"standing_geohash_7\"]},{\"name\":\"jaeger_car_driver_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"driver_id\"]},{\"name\":\"jaeger_car_customer_acceptance\",\"project\":\"jaeger\",\"maxAge\":\"3455990s\",\"entities\":[\"customer_id\"]}]" + } + ], + "id": "17069", + "image": "asia.gcr.io/staging/merlin-transformer:0.24.0-rc3", + "resource_request": { + "cpu_request": "4", + "max_replica": 4, + "memory_request": "4Gi", + "min_replica": 0 + }, + "transformer_type": "standard", + "updated_at": "2022-11-09T15:33:56.809769Z", + "version_endpoint_id": "e355ce59-eb2b-4c75-883d-c517ad530cf0" + }, + "updated_at": "2022-11-09T15:33:56.808307Z", + "url": "http://id-car-b-default-582.jaeger.models.company.com/v1/models/id-car-b-default-582", + "version_id": 582 + }, + "version_endpoint_id": "e355ce59-eb2b-4c75-883d-c517ad530cf0", + "weight": 100 + } + ] + }, + "status": "serving", + "updated_at": "2022-11-09T15:34:07.651591Z", + "url": "id-car-b-default.jaeger.models.company.com" + } + ], + "id": 284, + "mlflow_experiment_id": 287, + "mlflow_url": "http://mlflow.company.com/#/experiments/287", + "name": "id-car-b-default", + "project_id": 100, + "type": "pyfunc", + "updated_at": "2022-11-09T15:34:07.647932Z" + } +] diff --git a/plugins/extractors/merlin/testdata/mocked-projects.json b/plugins/extractors/merlin/testdata/mocked-projects.json new file mode 100644 index 000000000..7e9586def --- /dev/null +++ b/plugins/extractors/merlin/testdata/mocked-projects.json @@ -0,0 +1,44 @@ +[ + { + "id": 1, + "name": "one-piece", + "mlflow_tracking_url": "http://mlflow.company.com", + "administrators": [ + "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com", + "gol.d.roger@onepiece.com" + ], + "readers": null, + "team": "pirates", + "stream": "roger", + "labels": null, + "created_at": "2019-11-19T11:05:11.41501Z", + "updated_at": "2022-06-23T09:31:25.834714Z" + }, + { + "id": 100, + "name": "strongest-man", + "mlflow_tracking_url": "http://mlflow.company.com", + "administrators": ["edward.newgate@onepiece.com"], + "readers": ["ace.d.portgas@onepiece.com"], + "team": "pirates", + "stream": "whitebeard", + "labels": null, + "created_at": "2021-09-20T05:20:53.540571Z", + "updated_at": "2021-09-20T05:20:53.540571Z" + }, + { + "id": 200, + "name": "kurohige", + "mlflow_tracking_url": "http://mlflow.company.com", + "administrators": [ + "teach.d.marshall@onepiece.com", + "s-ds-gitlab-runner@company-staging.iam.gserviceaccount.com" + ], + "readers": ["jesus.burgess@gojek.com"], + "team": "pirates", + "stream": "blackbeard", + "labels": null, + "created_at": "2021-10-29T02:12:43.142433Z", + "updated_at": "2022-05-08T17:56:32.924721Z" + } +] diff --git a/plugins/extractors/populate.go b/plugins/extractors/populate.go index 40d6f8f61..8de765f41 100644 --- a/plugins/extractors/populate.go +++ b/plugins/extractors/populate.go @@ -16,6 +16,7 @@ import ( _ "github.com/odpf/meteor/plugins/extractors/gsuite" _ "github.com/odpf/meteor/plugins/extractors/kafka" _ "github.com/odpf/meteor/plugins/extractors/mariadb" + _ "github.com/odpf/meteor/plugins/extractors/merlin" _ "github.com/odpf/meteor/plugins/extractors/metabase" _ "github.com/odpf/meteor/plugins/extractors/mongodb" _ "github.com/odpf/meteor/plugins/extractors/mssql" diff --git a/plugins/processors/script/README.md b/plugins/processors/script/README.md index 5e7c12799..5da759d3b 100644 --- a/plugins/processors/script/README.md +++ b/plugins/processors/script/README.md @@ -224,29 +224,29 @@ for information on contributing to this module. [tengo-stdlib]: https://github.com/d5/tengo/blob/v2.13.0/docs/stdlib.md -[proton-asset]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/asset.proto#L14 +[proton-asset]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/asset.proto#L14 -[proton-bucket]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/bucket.proto#L13 +[proton-bucket]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/bucket.proto#L13 -[proton-dashboard]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/dashboard.proto#L14 +[proton-dashboard]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/dashboard.proto#L14 -[proton-experiment]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/experiment.proto#L15 +[proton-experiment]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/experiment.proto#L15 -[proton-featuretable]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/feature_table.proto#L32 +[proton-featuretable]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/feature_table.proto#L32 -[proton-group]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/group.proto#L12 +[proton-group]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/group.proto#L12 -[proton-job]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/job.proto#L13 +[proton-job]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/job.proto#L13 -[proton-metric]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/metric.proto#L13 +[proton-metric]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/metric.proto#L13 -[proton-model]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/model.proto#L17 +[proton-model]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/model.proto#L73 -[proton-service]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/service.proto#L11 +[proton-service]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/service.proto#L11 -[proton-table]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/table.proto#L14 +[proton-table]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/table.proto#L14 -[proton-topic]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/topic.proto#L14 +[proton-topic]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/topic.proto#L14 -[proton-user]: https://github.com/odpf/proton/blob/0acbe8a/odpf/assets/v1beta2/user.proto#L15 +[proton-user]: https://github.com/odpf/proton/blob/5267e1f/odpf/assets/v1beta2/user.proto#L15 diff --git a/plugins/sinks/compass/sink_test.go b/plugins/sinks/compass/sink_test.go index 7e5ffb99c..2e6b987d7 100644 --- a/plugins/sinks/compass/sink_test.go +++ b/plugins/sinks/compass/sink_test.go @@ -7,7 +7,6 @@ import ( "bytes" "context" "encoding/json" - "errors" "fmt" "io" "net/http" @@ -111,7 +110,7 @@ func TestSink(t *testing.T) { } err = compassSink.Sink(ctx, []models.Record{models.NewRecord(data)}) require.Error(t, err) - assert.True(t, errors.Is(err, plugins.RetryError{})) + assert.ErrorAs(t, err, &plugins.RetryError{}) }) } }) diff --git a/plugins/sinks/http/http_test.go b/plugins/sinks/http/http_test.go index 72180e7eb..888197b5e 100644 --- a/plugins/sinks/http/http_test.go +++ b/plugins/sinks/http/http_test.go @@ -16,7 +16,6 @@ import ( "github.com/odpf/meteor/plugins" h "github.com/odpf/meteor/plugins/sinks/http" testutils "github.com/odpf/meteor/test/utils" - "github.com/pkg/errors" "github.com/stretchr/testify/assert" "google.golang.org/protobuf/types/known/anypb" ) @@ -122,7 +121,7 @@ func TestSink(t *testing.T) { assert.NoError(t, err) defer httpSink.Close() err = httpSink.Sink(context.TODO(), getExpectedVal(t)) - assert.True(t, errors.Is(err, plugins.RetryError{})) + assert.ErrorAs(t, err, &plugins.RetryError{}) port += 2 }) } diff --git a/plugins/sinks/shield/sink_test.go b/plugins/sinks/shield/sink_test.go index 39f60608d..9451b5f23 100644 --- a/plugins/sinks/shield/sink_test.go +++ b/plugins/sinks/shield/sink_test.go @@ -19,7 +19,7 @@ import ( v1beta2 "github.com/odpf/meteor/models/odpf/assets/v1beta2" "github.com/odpf/meteor/plugins" - shield "github.com/odpf/meteor/plugins/sinks/shield" + "github.com/odpf/meteor/plugins/sinks/shield" shieldProto "github.com/odpf/shield/proto/v1beta1" "github.com/stretchr/testify/assert" @@ -102,7 +102,7 @@ func TestSink(t *testing.T) { err = shieldSink.Sink(ctx, []models.Record{models.NewRecord(data)}) require.Error(t, err) - assert.True(t, errors.Is(err, plugins.RetryError{})) + assert.ErrorAs(t, err, &plugins.RetryError{}) }) diff --git a/plugins/sinks/stencil/stencil_test.go b/plugins/sinks/stencil/stencil_test.go index 3c7906136..5b3b06980 100644 --- a/plugins/sinks/stencil/stencil_test.go +++ b/plugins/sinks/stencil/stencil_test.go @@ -7,7 +7,6 @@ import ( "bytes" "context" "encoding/json" - "errors" "fmt" "io" "net/http" @@ -107,7 +106,7 @@ func TestSink(t *testing.T) { Data: table, } err = stencilSink.Sink(ctx, []models.Record{models.NewRecord(asset)}) - assert.True(t, errors.Is(err, plugins.RetryError{})) + assert.ErrorAs(t, err, &plugins.RetryError{}) }) } }) diff --git a/plugins/util.go b/plugins/util.go index a706c18d1..963ad15f6 100644 --- a/plugins/util.go +++ b/plugins/util.go @@ -80,6 +80,10 @@ func BigQueryURN(projectID, datasetID, tableID string) string { return models.NewURN("bigquery", projectID, "table", fqn) } +func KafkaURN(bootstrapServers, topic string) string { + return models.NewURN("kafka", KafkaServersToScope(bootstrapServers), "topic", topic) +} + func KafkaServersToScope(servers string) string { if strings.IndexRune(servers, ',') > 0 { // there are multiple bootstrap servers, just strip port, sort and join @@ -105,6 +109,10 @@ func KafkaServersToScope(servers string) string { return host } +func CaraMLStoreURN(scope, project, featureTable string) string { + return models.NewURN("caramlstore", scope, "feature_table", project+"."+featureTable) +} + func parseBQTableFQN(fqn string) (projectID, datasetID, tableID string, err error) { // fqn is the ID of the table in projectID:datasetID.tableID format. if !strings.ContainsRune(fqn, ':') || strings.IndexRune(fqn, '.') < strings.IndexRune(fqn, ':') { diff --git a/plugins/util_test.go b/plugins/util_test.go index e93b699d4..a82f23b29 100644 --- a/plugins/util_test.go +++ b/plugins/util_test.go @@ -55,6 +55,39 @@ func TestBigQueryTableFQNToURN(t *testing.T) { } } +func TestKafkaURN(t *testing.T) { + cases := []struct { + name string + servers string + topic string + expected string + }{ + { + name: "Simple", + servers: "celestial-dragons-prodstream.yonkou.io:9999", + topic: "staging_feast09_mixed_granularity_demand_forecast_3es", + expected: "urn:kafka:celestial-dragons-prodstream.yonkou.io:topic:staging_feast09_mixed_granularity_demand_forecast_3es", + }, + { + name: "MultipleBootstrapServers", + servers: "2-my-kafka.company.com:9999,1-my-kafka.company.com:9999", + topic: "staging_feast09_mixed_granularity_demand_forecast_3es", + expected: "urn:kafka:1-my-kafka.company.com,2-my-kafka.company.com:topic:staging_feast09_mixed_granularity_demand_forecast_3es", + }, + { + name: "SlugBootstrapServer", + servers: "1-my-kafka", + topic: "staging_feast09_mixed_granularity_demand_forecast_3es", + expected: "urn:kafka:1-my-kafka:topic:staging_feast09_mixed_granularity_demand_forecast_3es", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + assert.Equal(t, tc.expected, plugins.KafkaURN(tc.servers, tc.topic)) + }) + } +} + func TestKafkaServersToScope(t *testing.T) { cases := map[string]string{ "int-dagstream-kafka.yonkou.io:9999": "int-dagstream-kafka.yonkou.io", @@ -70,3 +103,11 @@ func TestKafkaServersToScope(t *testing.T) { }) } } + +func TestCaraMLStoreURN(t *testing.T) { + assert.Equal( + t, + "urn:caramlstore:my_scope:feature_table:my_project.my_ft", + plugins.CaraMLStoreURN("my_scope", "my_project", "my_ft"), + ) +} diff --git a/test/utils/http_helpers.go b/test/utils/http_helpers.go new file mode 100644 index 000000000..d89c8bf49 --- /dev/null +++ b/test/utils/http_helpers.go @@ -0,0 +1,50 @@ +package utils + +import ( + "bytes" + "encoding/json" + "io" + "net/http" + "testing" +) + +func ValueAsJSONReader(t *testing.T, v interface{}) io.ReadCloser { + t.Helper() + + var buf bytes.Buffer + if err := json.NewEncoder(&buf).Encode(v); err != nil { + t.Fatalf("ValueAsJSONReader() = %v", err) + } + return io.NopCloser(&buf) +} + +func Respond(t *testing.T, w http.ResponseWriter, status int, v interface{}) { + t.Helper() + + if v == nil { + w.WriteHeader(status) + return + } + + switch body := v.(type) { + case string: + Respond(t, w, status, ([]byte)(body)) + return + + case []byte: + if !json.Valid(body) { + w.WriteHeader(status) + if _, err := w.Write(body); err != nil { + t.Fatalf("Respond() = %v", err) + } + return + } + v = (json.RawMessage)(body) + } + w.Header().Set("Content-Type", "application/json; charset=utf-8") + w.WriteHeader(status) + + if err := json.NewEncoder(w).Encode(v); err != nil { + t.Fatalf("Respond() = %v", err) + } +} diff --git a/test/utils/load.go b/test/utils/load.go index 63534cbd7..f85347c9b 100644 --- a/test/utils/load.go +++ b/test/utils/load.go @@ -1,6 +1,7 @@ package utils import ( + "encoding/json" "os" "testing" @@ -9,12 +10,16 @@ import ( "google.golang.org/protobuf/proto" ) -func LoadJSONIntoProto(t *testing.T, filePath string, m proto.Message) { +func LoadJSON(t *testing.T, filePath string, v interface{}) { t.Helper() data, err := os.ReadFile(filePath) require.NoError(t, err) - err = protojson.Unmarshal(data, m) + if m, ok := v.(proto.Message); ok { + err = protojson.Unmarshal(data, m) + } else { + err = json.Unmarshal(data, v) + } require.NoError(t, err) }