Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ FROM alpine:3.23

RUN apk add --no-cache ca-certificates tzdata curl

# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
CMD wget -q --spider http://localhost:18790/health || exit 1

# Copy binary
COPY --from=builder /src/build/picoclaw /usr/local/bin/picoclaw

Expand Down
11 changes: 11 additions & 0 deletions cmd/picoclaw/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"fmt"
"io"
"io/fs"
"net/http"
"os"
"os/signal"
"path/filepath"
Expand All @@ -28,6 +29,7 @@ import (
"github.com/sipeed/picoclaw/pkg/config"
"github.com/sipeed/picoclaw/pkg/cron"
"github.com/sipeed/picoclaw/pkg/devices"
"github.com/sipeed/picoclaw/pkg/health"
"github.com/sipeed/picoclaw/pkg/heartbeat"
"github.com/sipeed/picoclaw/pkg/logger"
"github.com/sipeed/picoclaw/pkg/migrate"
Expand Down Expand Up @@ -658,6 +660,14 @@ func gatewayCmd() {
fmt.Printf("Error starting channels: %v\n", err)
}

healthServer := health.NewServer(cfg.Gateway.Host, cfg.Gateway.Port)
go func() {
if err := healthServer.Start(); err != nil && err != http.ErrServerClosed {
logger.ErrorCF("health", "Health server error", map[string]interface{}{"error": err.Error()})
}
}()
fmt.Printf("✓ Health endpoints available at http://%s:%d/health and /ready\n", cfg.Gateway.Host, cfg.Gateway.Port)

go agentLoop.Run(ctx)

sigChan := make(chan os.Signal, 1)
Expand All @@ -666,6 +676,7 @@ func gatewayCmd() {

fmt.Println("\nShutting down...")
cancel()
healthServer.Stop(context.Background())
deviceService.Stop()
heartbeatService.Stop()
cronService.Stop()
Expand Down
164 changes: 164 additions & 0 deletions pkg/health/server.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,164 @@
package health

import (
"context"
"encoding/json"
"fmt"
"net/http"
"sync"
"time"
)

type Server struct {
server *http.Server
mu sync.RWMutex
ready bool
checks map[string]Check
startTime time.Time
}

type Check struct {
Name string `json:"name"`
Status string `json:"status"`
Message string `json:"message,omitempty"`
Timestamp time.Time `json:"timestamp"`
}

type StatusResponse struct {
Status string `json:"status"`
Uptime string `json:"uptime"`
Checks map[string]Check `json:"checks,omitempty"`
}

func NewServer(host string, port int) *Server {
mux := http.NewServeMux()
s := &Server{
ready: false,
checks: make(map[string]Check),
startTime: time.Now(),
}

mux.HandleFunc("/health", s.healthHandler)
mux.HandleFunc("/ready", s.readyHandler)

addr := fmt.Sprintf("%s:%d", host, port)
s.server = &http.Server{
Addr: addr,
Handler: mux,
ReadTimeout: 5 * time.Second,
WriteTimeout: 5 * time.Second,
}

return s
}

func (s *Server) Start() error {
s.mu.Lock()
s.ready = true
s.mu.Unlock()
return s.server.ListenAndServe()
}

func (s *Server) StartContext(ctx context.Context) error {
s.mu.Lock()
s.ready = true
s.mu.Unlock()

errCh := make(chan error, 1)
go func() {
errCh <- s.server.ListenAndServe()
}()

select {
case err := <-errCh:
return err
case <-ctx.Done():
return s.server.Shutdown(context.Background())
}
}

func (s *Server) Stop(ctx context.Context) error {
s.mu.Lock()
s.ready = false
s.mu.Unlock()
return s.server.Shutdown(ctx)
}

func (s *Server) SetReady(ready bool) {
s.mu.Lock()
s.ready = ready
s.mu.Unlock()
}

func (s *Server) RegisterCheck(name string, checkFn func() (bool, string)) {
s.mu.Lock()
defer s.mu.Unlock()

status, msg := checkFn()
s.checks[name] = Check{
Name: name,
Status: statusString(status),
Message: msg,
Timestamp: time.Now(),
}
}

func (s *Server) healthHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
w.WriteHeader(http.StatusOK)

uptime := time.Since(s.startTime)
resp := StatusResponse{
Status: "ok",
Uptime: uptime.String(),
}

json.NewEncoder(w).Encode(resp)
}

func (s *Server) readyHandler(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")

s.mu.RLock()
ready := s.ready
checks := make(map[string]Check)
for k, v := range s.checks {
checks[k] = v
}
s.mu.RUnlock()

if !ready {
w.WriteHeader(http.StatusServiceUnavailable)
json.NewEncoder(w).Encode(StatusResponse{
Status: "not ready",
Checks: checks,
})
return
}

for _, check := range checks {
if check.Status == "fail" {
w.WriteHeader(http.StatusServiceUnavailable)
json.NewEncoder(w).Encode(StatusResponse{
Status: "not ready",
Checks: checks,
})
return
}
}

w.WriteHeader(http.StatusOK)
uptime := time.Since(s.startTime)
json.NewEncoder(w).Encode(StatusResponse{
Status: "ready",
Uptime: uptime.String(),
Checks: checks,
})
}

func statusString(ok bool) string {
if ok {
return "ok"
}
return "fail"
}