Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/scheduler/app/options/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ type ServerOption struct {
DefaultQueue string
PrintVersion bool
EnableMetrics bool
EnablePprof bool
ListenAddress string
EnablePriorityClass bool
EnableCSIStorage bool
Expand Down Expand Up @@ -138,6 +139,7 @@ func (s *ServerOption) AddFlags(fs *pflag.FlagSet) {
"Enable tracking of available storage capacity that CSI drivers provide; it is false by default")
fs.BoolVar(&s.EnableHealthz, "enable-healthz", false, "Enable the health check; it is false by default")
fs.BoolVar(&s.EnableMetrics, "enable-metrics", false, "Enable the metrics function; it is false by default")
fs.BoolVar(&s.EnablePprof, "enable-pprof", false, "Enable the pprof endpoint; it is false by default")
fs.StringSliceVar(&s.NodeSelector, "node-selector", nil, "volcano only work with the labeled node, like: --node-selector=volcano.sh/role:train --node-selector=volcano.sh/role:serving")
fs.BoolVar(&s.EnableCacheDumper, "cache-dumper", true, "Enable the cache dumper, it's true by default")
fs.StringVar(&s.CacheDumpFileDir, "cache-dump-dir", "/tmp", "The target dir where the json file put at when dump cache info to json file")
Expand Down
37 changes: 31 additions & 6 deletions cmd/scheduler/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,11 @@ import (
"context"
"fmt"
"net/http"
"net/http/pprof"
"os"
"time"

"volcano.sh/apis/pkg/apis/helpers"

"volcano.sh/volcano/cmd/scheduler/app/options"
"volcano.sh/volcano/pkg/kube"
"volcano.sh/volcano/pkg/scheduler"
Expand Down Expand Up @@ -85,11 +85,8 @@ func Run(opt *options.ServerOption) error {
panic(err)
}

if opt.EnableMetrics {
go func() {
http.Handle("/metrics", promHandler())
klog.Fatalf("Prometheus Http Server failed %s", http.ListenAndServe(opt.ListenAddress, nil))
}()
if opt.EnableMetrics || opt.EnablePprof {
go startMetricsServer(opt)
}

if opt.EnableHealthz {
Expand Down Expand Up @@ -160,3 +157,31 @@ func promHandler() http.Handler {
prometheus.DefaultRegisterer.Unregister(collectors.NewGoCollector())
return promhttp.InstrumentMetricHandler(prometheus.DefaultRegisterer, promhttp.HandlerFor(prometheus.Gatherers{prometheus.DefaultGatherer, legacyregistry.DefaultGatherer}, promhttp.HandlerOpts{}))
}

func startMetricsServer(opt *options.ServerOption) {
mux := http.NewServeMux()

if opt.EnableMetrics {
mux.Handle("/metrics", promHandler())
}

if opt.EnablePprof {
mux.HandleFunc("/debug/pprof/", pprof.Index)
mux.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline)
mux.HandleFunc("/debug/pprof/profile", pprof.Profile)
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol)
mux.HandleFunc("/debug/pprof/trace", pprof.Trace)
}

server := &http.Server{
Addr: opt.ListenAddress,
Handler: mux,
ReadHeaderTimeout: helpers.DefaultReadHeaderTimeout,
ReadTimeout: helpers.DefaultReadTimeout,
WriteTimeout: helpers.DefaultWriteTimeout,
}

if err := server.ListenAndServe(); err != nil {
klog.Errorf("start metrics/pprof http server failed: %v", err)
}
}
3 changes: 0 additions & 3 deletions cmd/scheduler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ import (
"runtime"
"time"

// init pprof server
_ "net/http/pprof"

"github.com/spf13/pflag"
_ "go.uber.org/automaxprocs"

Expand Down
7 changes: 5 additions & 2 deletions cmd/webhook-manager/app/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,11 @@ func Run(config *options.Config) error {
signal.Notify(stopChannel, syscall.SIGTERM, syscall.SIGINT)

server := &http.Server{
Addr: config.ListenAddress + ":" + strconv.Itoa(config.Port),
TLSConfig: configTLS(config, restConfig),
Addr: config.ListenAddress + ":" + strconv.Itoa(config.Port),
TLSConfig: configTLS(config, restConfig),
ReadHeaderTimeout: helpers.DefaultReadHeaderTimeout,
ReadTimeout: helpers.DefaultReadTimeout,
WriteTimeout: helpers.DefaultWriteTimeout,
}
go func() {
err = server.ListenAndServeTLS("", "")
Expand Down
Empty file modified docs/design/jobflow/README.md
100755 → 100644
Empty file.
3 changes: 3 additions & 0 deletions installer/helm/chart/volcano/templates/scheduler.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,9 @@ spec:
- --scheduler-conf=/volcano.scheduler/{{base .Values.basic.scheduler_config_file}}
- --enable-healthz=true
- --enable-metrics=true
{{- if .Values.custom.scheduler_pprof_enable }}
- --enable-pprof=true
{{- end }}
- --leader-elect={{ .Values.custom.leader_elect_enable }}
{{- if .Values.custom.leader_elect_enable }}
- --lock-object-namespace={{ .Release.Namespace }}
Expand Down
1 change: 1 addition & 0 deletions installer/helm/chart/volcano/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ custom:
controller_replicas: 1
scheduler_enable: true
scheduler_replicas: 1
scheduler_pprof_enable: false
leader_elect_enable: false
enabled_admissions: "/jobs/mutate,/jobs/validate,/podgroups/mutate,/pods/validate,/pods/mutate,/queues/mutate,/queues/validate"

Expand Down
Empty file modified pkg/controllers/jobflow/constant.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_action.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_action_test.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_handler.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_handler_test.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_util.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/jobflow_controller_util_test.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/factory.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/failed.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/pending.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/running.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/succeed.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobflow/state/terminating.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobtemplate/constant.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobtemplate/jobTemplate_controller_util_test.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobtemplate/jobtemplate_controller.go
100755 → 100644
Empty file.
Empty file modified pkg/controllers/jobtemplate/jobtemplate_controller_action.go
100755 → 100644
Empty file.
Empty file.
Empty file modified pkg/controllers/jobtemplate/jobtemplate_controller_handler.go
100755 → 100644
Empty file.
Empty file.
Empty file modified pkg/controllers/jobtemplate/jobtemplate_controller_util.go
100755 → 100644
Empty file.
5 changes: 5 additions & 0 deletions pkg/scheduler/metrics/source/metrics_client_elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"time"

"github.com/elastic/go-elasticsearch/v7"
"k8s.io/klog/v2"
)

const (
Expand Down Expand Up @@ -65,6 +66,10 @@ func NewElasticsearchMetricsClient(conf map[string]string) (*ElasticsearchMetric
}
var err error
insecureSkipVerify := conf["tls.insecureSkipVerify"] == "true"
if insecureSkipVerify {
klog.Warningf("WARNING: TLS certificate verification is disabled which is insecure. This should not be used in production environments")
}

e.es, err = elasticsearch.NewClient(elasticsearch.Config{
Addresses: []string{address},
Username: conf["elasticsearch.username"],
Expand Down
4 changes: 4 additions & 0 deletions pkg/scheduler/metrics/source/metrics_client_prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ func (p *PrometheusMetricsClient) NodeMetricsAvg(ctx context.Context, nodeName s
var client api.Client
var err error
insecureSkipVerify := p.conf["tls.insecureSkipVerify"] == "true"
if insecureSkipVerify {
klog.Warningf("WARNING: TLS certificate verification is disabled which is insecure. This should not be used in production environments")
}

tr := &http.Transport{
TLSClientConfig: &tls.Config{
InsecureSkipVerify: insecureSkipVerify,
Expand Down
7 changes: 6 additions & 1 deletion pkg/util/socket.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ import (
"golang.org/x/sys/unix"
"k8s.io/apimachinery/pkg/util/runtime"
"k8s.io/klog/v2"

"volcano.sh/apis/pkg/apis/helpers"
)

const (
Expand Down Expand Up @@ -201,7 +203,10 @@ func listenUnix(componentName string, socketDir string) (net.Listener, error) {
// serveOnListener starts the server using given listener, loops forever.
func serveOnListener(l net.Listener, m *http.ServeMux) error {
server := http.Server{
Handler: m,
Handler: m,
ReadHeaderTimeout: helpers.DefaultReadHeaderTimeout,
ReadTimeout: helpers.DefaultReadTimeout,
WriteTimeout: helpers.DefaultWriteTimeout,
}
return server.Serve(l)
}
Expand Down
Empty file modified pkg/webhooks/admission/pods/mutate/annotation.go
100755 → 100644
Empty file.
Empty file modified pkg/webhooks/admission/pods/mutate/factory.go
100755 → 100644
Empty file.
Empty file modified pkg/webhooks/admission/pods/mutate/namespace.go
100755 → 100644
Empty file.
Loading