From 03a3f98604bc6ac3db9a0ab0cca7c65428ebee75 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 16 Feb 2026 15:17:41 +0000 Subject: [PATCH 1/2] chore(deps): update dependency jekyll-octicons to v19 --- jekyll-build-json/Gemfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jekyll-build-json/Gemfile b/jekyll-build-json/Gemfile index 26e7462..d10690f 100644 --- a/jekyll-build-json/Gemfile +++ b/jekyll-build-json/Gemfile @@ -10,7 +10,7 @@ gem "github-pages", "= 232" # It is not directly included in the github-pages gem list of dependencies, # even though it is included in the original GitHub Pages build infrastructure. gem "jekyll-include-cache", "= 0.2.1" -gem "jekyll-octicons", "~> 14.2" +gem "jekyll-octicons", "~> 19.0" gem "minima", "~> 2.0" # Work-around for webrick no longer included in Ruby 3.0 (https://github.com/jekyll/jekyll/issues/8523) From 13f384c02a040d5cf86dd528553d45d2db5cdf49 Mon Sep 17 00:00:00 2001 From: "Marlon (Esolitos) Saglia" Date: Wed, 25 Feb 2026 08:52:23 +0100 Subject: [PATCH 2/2] chore: remove unused action --- .github/workflows/jekyll-feed-to-vespa.yml | 33 ---- jekyll-build-json/Dockerfile | 30 ---- jekyll-build-json/Gemfile | 17 -- jekyll-build-json/action.yml | 27 ---- jekyll-build-json/entrypoint.sh | 58 ------- jekyll-build-json/vespa_index_generator.rb | 180 --------------------- 6 files changed, 345 deletions(-) delete mode 100644 .github/workflows/jekyll-feed-to-vespa.yml delete mode 100644 jekyll-build-json/Dockerfile delete mode 100644 jekyll-build-json/Gemfile delete mode 100644 jekyll-build-json/action.yml delete mode 100755 jekyll-build-json/entrypoint.sh delete mode 100644 jekyll-build-json/vespa_index_generator.rb diff --git a/.github/workflows/jekyll-feed-to-vespa.yml b/.github/workflows/jekyll-feed-to-vespa.yml deleted file mode 100644 index d88fb36..0000000 --- a/.github/workflows/jekyll-feed-to-vespa.yml +++ /dev/null @@ -1,33 +0,0 @@ ---- -name: (WIP) Feed Jekyll posts to Vespa - -on: - workflow_call: - inputs: - log_level: - description: 'Log level for Jekyll build.' - required: false - default: 'info' - type: string - -defaults: - run: - # Specify to ensure "pipefail and errexit" are set. - # Ref: https://docs.github.com/en/actions/writing-workflows/workflow-syntax-for-github-actions#defaultsrunshell - shell: bash - -jobs: - lint: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v6 - - - name: Build with Jekyll - uses: vespa-engine/gh-actions/jekyll-build-json@main - with: - log_level: ${{ github.event.inputs.log_level }} - - - name: List JSON - run: | - find _site -type f -name '*.json' diff --git a/jekyll-build-json/Dockerfile b/jekyll-build-json/Dockerfile deleted file mode 100644 index 1079b9b..0000000 --- a/jekyll-build-json/Dockerfile +++ /dev/null @@ -1,30 +0,0 @@ -ARG RUBY_VERSION=3.3 -FROM ruby:$RUBY_VERSION-slim - -RUN apt-get update \ - && apt-get install -y \ - build-essential \ - git \ - locales - -COPY Gemfile Gemfile - -ENV NOKOGIRI_USE_SYSTEM_LIBRARIES=true - -RUN gem env && \ - bundle install --jobs 20 --retry 5 - -RUN \ - echo "en_US UTF-8" > /etc/locale.gen && \ - locale-gen en-US.UTF-8 - -ENV LANG=en_US.UTF-8 -ENV LANGUAGE=en_US.UTF-8 -ENV LC_ALL=en_US.UTF-8 - -COPY entrypoint.sh /entrypoint.sh - -RUN mkdir -p /opt/jekyll/plugins -COPY vespa_index_generator.rb /opt/jekyll/plugins/vespa_index_generator.rb - -ENTRYPOINT ["/entrypoint.sh"] diff --git a/jekyll-build-json/Gemfile b/jekyll-build-json/Gemfile deleted file mode 100644 index d10690f..0000000 --- a/jekyll-build-json/Gemfile +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. -# frozen_string_literal: true - -source "https://rubygems.org" - -# Manage our dependency on the version of the github-pages gem here. -gem "github-pages", "= 232" - -# Explicitly include this gem here. -# It is not directly included in the github-pages gem list of dependencies, -# even though it is included in the original GitHub Pages build infrastructure. -gem "jekyll-include-cache", "= 0.2.1" -gem "jekyll-octicons", "~> 19.0" -gem "minima", "~> 2.0" - -# Work-around for webrick no longer included in Ruby 3.0 (https://github.com/jekyll/jekyll/issues/8523) -gem "webrick" diff --git a/jekyll-build-json/action.yml b/jekyll-build-json/action.yml deleted file mode 100644 index 51b0d65..0000000 --- a/jekyll-build-json/action.yml +++ /dev/null @@ -1,27 +0,0 @@ -name: 'Build Jekyll for Vespa Search' -description: 'A simple GH Action for producing Jekyll build artifacts compatible with Vespa JSON' -author: 'Vespa.ai' -inputs: - source: - description: 'Directory where the source files reside (relative to the workspace).' - required: false - default: ./ - destination: - description: 'Output directory of the build. Although it can be nested inside the source, it cannot be the same as the source directory (relative to the workspace).' - required: false - default: ./_site - config: - description: 'Jekyll configuration file path relative to the workspace root.' - required: false - default: ./_config.yml - log_level: - description: 'Log level for Jekyll build.' - required: false - default: 'info' - token: - description: 'GitHub token' - required: true - default: ${{ github.token }} -runs: - using: 'docker' - image: 'docker://ghcr.io/vespa-engine/gh-actions/jekyll-build-json' diff --git a/jekyll-build-json/entrypoint.sh b/jekyll-build-json/entrypoint.sh deleted file mode 100755 index 6de16a5..0000000 --- a/jekyll-build-json/entrypoint.sh +++ /dev/null @@ -1,58 +0,0 @@ -#!/bin/bash - -if [[ "${RUNNER_DEBUG:-}" == "1" ]]; then - set -x -fi - -set -e -set -o pipefail -set -o nounset - -SOURCE_DIRECTORY="${GITHUB_WORKSPACE}/${INPUT_SOURCE}" -DESTINATION_DIRECTORY="${GITHUB_WORKSPACE}/${INPUT_DESTINATION}" -JEKYLL_CONFIG_FILE="${GITHUB_WORKSPACE}/${INPUT_CONFIG}" -JEKYLL_ARGS=("-p" "/opt/jekyll/plugins") - -# Set environment variables required by supported plugins -export JEKYLL_ENV="production" -export JEKYLL_GITHUB_TOKEN="${INPUT_TOKEN}" -export JEKYLL_BUILD_REVISION="${GITHUB_SHA}" -export JEKYLL_LOG_LEVEL="${INPUT_LOG_LEVEL}" - -if [[ "${JEKYLL_LOG_LEVEL}" = "verbose" || "${JEKYLL_LOG_LEVEL}" = "debug" ]]; then - JEKYLL_ARGS+=("-V") -fi - -if [[ "${RUNNER_DEBUG:-}" == "1" ]]; then - gem env - bundle env - - echo "SOURCE_DIRECTORY: ${SOURCE_DIRECTORY}" - echo "DESTINATION_DIRECTORY: ${DESTINATION_DIRECTORY}" - echo "JEKYLL_CONFIG_FILE: ${JEKYLL_CONFIG_FILE}" - echo "GITHUB_WORKSPACE: ${GITHUB_WORKSPACE}" - - echo "====== START Config file =====" - cat "${JEKYLL_CONFIG_FILE}" - echo "====== END Config file =====" -fi - -{ cd "${BUNDLE_APP_CONFIG}" || { echo "::error::pages gem not found"; exit 1; }; } - -# Run the command, capturing the output -build_output="$(bundle exec jekyll build ${JEKYLL_ARGS[@]} --config "${JEKYLL_CONFIG_FILE}" --source "${SOURCE_DIRECTORY}" --destination "${DESTINATION_DIRECTORY}")" - -# Capture the exit code -exit_code=$? - -if [ $exit_code -ne 0 ]; then - # Remove the newlines from the build_output as annotation not support multiline - error=$(echo "$build_output" | tr '\n' ' ' | tr -s ' ') - echo "::error::$error" -else - # Display the build_output directly - echo "$build_output" -fi - -# Exit with the captured exit code -exit $exit_code diff --git a/jekyll-build-json/vespa_index_generator.rb b/jekyll-build-json/vespa_index_generator.rb deleted file mode 100644 index ba56850..0000000 --- a/jekyll-build-json/vespa_index_generator.rb +++ /dev/null @@ -1,180 +0,0 @@ -# Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. - -require 'json' -require 'nokogiri' -require 'kramdown/parser/kramdown' - -module Jekyll - - class VespaIndexGenerator < Jekyll::Generator - priority :lowest - safe true - - def generate(site) - @site = site - puts "::debug::VespaIndexGenerator" - puts "site.config: #{(site.config).inspect}" - - raise "Wrong parameter type, expected Jekyll::Site, got #{site.class}" unless site.is_a?(Jekyll::Site) - raise "Search configuration not found" unless site.config.key?("search") - - search_config = site.config["search"] - namespace = search_config["namespace"] - operations = [] - if site.pages.empty? - # Drop out with an error - puts "::error::No pages found!" - return nil - end - - puts "::debug::Pages found: #{site.pages.size}" - site.pages.each do |page| - if should_skip?(search_config, page) || page.data["index"] != true - puts "::debug::Page not indexed: #{page.url}, index flag: #{page.data['index']}" - next - end - - puts "::debug::Processing page: #{page.url}" - url = page.url - url += 'index.html' if url[-1, 1] == '/' - - text = extract_text(page) - outlinks = extract_links(page) - headers = extract_headers(page) - keywords = get_keywords(page) - - fields = { - :path => url, - :namespace => namespace, - :title => page.data["title"], - :content => text, - :html => get_html(page), - :term_count => text.split.length(), - :last_updated => Time.now.to_i - } - fields[:outlinks] = outlinks if !outlinks.empty? - fields[:headers] = headers if !headers.empty? - fields[:keywords] = keywords if !keywords.empty? - operations.push({ - :put => "id:" + namespace + ":doc::" + namespace + url, - :fields => fields - }) - end - json = JSON.pretty_generate(operations) - # puts "::debug::Writing index file: #{namespace}_index.json" - # File.open(__dir__ + namespace + "_index.json", "w") { |f| f.write(json) } - - puts "::debug::Writing index file: #{namespace}_index.json" - index_file = PageWithoutAFile.new(@site, __dir__, "", namespace + "_index.json") - index_file.content = json - index_file.data["layout"] = nil - index_file - end - - def should_skip?(search_config, page) - exclude_pages = search_config["exclude_pages"] || [] - return exclude_pages.include?(page) || is_empty(page) - end - - def is_empty(page) - # The generated client-side redirects should not be indexed - - # they have no title and node content - return page.content == "" && !page.data["title"] - end - - def get_html(page) - if page.name[page.name.rindex('.')+1..-1] == "md" - doc = Kramdown::Document.new(page.content).to_html - else - doc = page.content - end - end - - def get_doc(page) - if page.name[page.name.rindex('.')+1..-1] == "md" - doc = Nokogiri::HTML(Kramdown::Document.new(page.content).to_html) - else - doc = Nokogiri::HTML(page.content) - end - end - - def reset_xml_pre(doc) - # The highlighter works on un-quoted XML, so some docs have non-HTML elements like - # Read and set such fields again for proper quoting and later text extraction (dirty hack ...) - doc.search('pre').each do |pre| - if pre.to_s =~ /\{% highlight xml %}/ - pre.content = pre.to_s.gsub("\n", " ") - .gsub(/
\s*\{% highlight xml %}(.+?)\{% endhighlight %}<\/pre>/, '\1')
-                end
-            end
-            return doc
-        end
-
-        def extract_text(page)
-            doc = reset_xml_pre(get_doc(page))
-            doc.search('th,td').each{ |e| e.after "\n" }
-            doc.search('style').each{ |e| e.remove }
-            content = doc.xpath("//text()").to_s
-                .gsub("\r"," ")
-                .gsub("\n"," ")
-            return strip_liquid(content)
-        end
-
-        def extract_links(page)
-            doc = get_doc(page)
-            links = doc.css('a').map { |link| link['href'] || ""}
-            links.reject{ |l| l.empty? }.map{ |l| l }
-            return links
-        end
-
-        def extract_headers(page)
-            doc = get_doc(page)
-            headers = doc.css('h1,h2,h3,h4').map { |header| header.content.gsub("\r"," ").gsub("\n"," ") || ""}
-            headers.reject{ |h| h.empty? }.map{ |h| h }
-            return headers
-        end
-
-        def get_keywords(page)
-            doc = get_doc(page)
-            keywords = []
-            if page.data["keywords"]
-                page.data["keywords"].split(/,/).each do |k|
-                    k = k.strip
-                    keywords.push(k) if ! k.empty?
-                end
-            end
-            return keywords
-        end
-
-        def strip_liquid(text)
-            return text.gsub(/\{%(.+?)%}/) { "#{ process_liquid($1) }" } # .+? is a lazy match, match only once
-        end
-
-        def process_liquid(match)
-        # https://ruby-doc.org/core-3.1.2/Regexp.html for the quotes
-        # ToDo: define the quote pattern (\"|\p{Pi}|\p{Pf}|') once and build regex using this as a parameter
-        #
-        # This is a poor man's solution to clean the data for search -
-        # the alternative is building the site and _then_ extract data
-        # That will however add jekyll build as a dependency for feeding, so keeping this simple for now
-            return match.gsub(/^\s*highlight\s*\w*/, "")
-                     .gsub(/^\s*(raw|endraw|endhighlight)/, "")
-                     .gsub(/^\s*include\s*(deprecated|important|note|query|warning).html\s*content=\s*(\"|\p{Pi}|\p{Pf}|')/, "")
-                     .gsub(/^\s*include\s*video-include.html\s.*video-title=\s*(\"|\p{Pi}|\p{Pf}|')/, "Find at vespa.ai/resources: ")
-                     .gsub(/^\s*include\s*pre-req.html\s*memory=\s*(\"|\p{Pi}|\p{Pf}|')(.*)/)  { "#{ process_pre_req($2) }" }
-                     .gsub(/(\"|\p{Pi}|\p{Pf}|')\s*$/, "")
-        end
-
-        def process_pre_req(match)
-            return match.gsub(/([0-9]*)\s*GB/, '
-                Docker: Docker Desktop for Mac/Windows, or Docker on Linux.
-                Operating system: Linux, macOS or Windows 10 Pro.
-                Architecture: x86_64 or arm64.
-                Minimum \1 GB RAM dedicated to Docker (the default is 2 GB on macOS). Memory recommendations.
-                Homebrew to install the Vespa CLI, or download Vespa CLI from Github releases.')
-                .gsub(/(\"|\p{Pi}|\p{Pf}|')\s*extra-reqs=\s*(\"|\p{Pi}|\p{Pf}|')/, "")
-        end
-
-    end
-
-end