Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
46 commits
Select commit Hold shift + click to select a range
ddabd0b
Add hybrid search and AI chat using Meilisearch and OpenAI
Oct 21, 2025
40d4226
blur
Nov 14, 2025
9a05a04
blur
Nov 14, 2025
3b5986e
focus
Nov 14, 2025
c1c1d2f
ai emoji and md
Nov 14, 2025
9a86d62
thinking shrinkage fix
Nov 14, 2025
f461d35
chatbot option
Nov 14, 2025
6ab4444
Trigger preview redeploy
Nov 14, 2025
52404e0
Revert "chatbot option"
Nov 14, 2025
713c2b6
meilisearch-scrape.yml
Nov 19, 2025
eb21c8f
meilisearch-scrape.yml
Nov 19, 2025
e6e63cb
updated creds
Nov 19, 2025
e5585cd
add scraper to testing branch
Nov 20, 2025
438aac5
restore settings after scrape
Nov 20, 2025
89c2134
moved restoration to a different file
Nov 20, 2025
16bcc32
Fix Meilisearch document structure and settings order
Nov 20, 2025
e677419
formatting match
Nov 20, 2025
7bb7e07
change workflow order
Nov 20, 2025
0e9bef3
add wait time
Nov 20, 2025
26265cc
reorder index settings again
Nov 20, 2025
1b53631
switching to offcial workflow
Nov 20, 2025
82a8776
untitled to titles
Nov 20, 2025
572a191
headings
Nov 20, 2025
fd91f98
headings
Nov 21, 2025
6f0baff
search optimization
Nov 21, 2025
6bcdbb4
ai search results fix
Nov 21, 2025
af1a5bd
search refining
Nov 21, 2025
b6cb750
search results refining
Nov 21, 2025
15916ae
dynamic count for results
Nov 21, 2025
8487886
number results found from bar
Nov 21, 2025
6265620
results formatting
Nov 21, 2025
881d5ef
search for darkmode
Nov 21, 2025
0f99bae
update docusaurus
Nov 21, 2025
264b586
remove algolia
Nov 21, 2025
d333787
ranking fix
Nov 21, 2025
a889123
better filters
Nov 21, 2025
ffd7145
disable filtering to test search
Nov 21, 2025
141c16a
filtering works!
Nov 21, 2025
df77bca
filtering changes
Nov 22, 2025
cf405f3
filters
Nov 22, 2025
d9cdab0
filters
Nov 22, 2025
18a0d67
filters
Nov 22, 2025
6b962c3
Fix search: remove 'semgrep' from synonyms to prevent over-expansion
Nov 24, 2025
118fc43
filtering
Nov 24, 2025
3812926
filtering
Nov 24, 2025
a77fe70
Remove 'community edition' and 'free version' from oss synonyms to fi…
Nov 24, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
89 changes: 89 additions & 0 deletions .github/workflows/meilisearch-scrape.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Meilisearch scrape

on:
workflow_dispatch:
inputs:
parameter:
description: Run from dispatch
push:
branches:
- main
- meilisearch-testing-clean

jobs:
scrape:
runs-on: ubuntu-latest
steps:
- name: Check out code
uses: actions/checkout@v4

- name: Wait for deployment
run: sleep 480s
shell: bash

- name: Run Meilisearch Cloud Crawler
uses: meilisearch/actions/cloud-crawler@main
with:
token: ${{ secrets.MEILISEARCH_CLOUD_CRAWLER_TOKEN }}

- name: Restore index settings after scraping
env:
MEILISEARCH_HOST_URL: ${{ secrets.MEILISEARCH_HOST_URL }}
MEILISEARCH_API_KEY: ${{ secrets.MEILISEARCH_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
echo "Restoring index settings (scraper may have reset them)..."

# Apply base settings from repo file
response1=$(curl -w "\n%{http_code}" -X PATCH \
"${MEILISEARCH_HOST_URL}/indexes/semgrep_docs/settings" \
-H "Authorization: Bearer ${MEILISEARCH_API_KEY}" \
-H "Content-Type: application/json" \
-d @meilisearch-settings.json)

http_code1=$(echo "$response1" | tail -n1)
echo "Base settings response code: $http_code1"

if [ "$http_code1" != "202" ]; then
echo "Failed to apply base settings"
echo "$response1"
exit 1
fi

echo "Checking if OPENAI_API_KEY is set..."
if [ -z "$OPENAI_API_KEY" ]; then
echo "ERROR: OPENAI_API_KEY is not set in GitHub Secrets!"
echo "Please add it at: https://github.com/semgrep/semgrep-docs/settings/secrets/actions"
exit 1
fi

# Apply embedder separately (requires secret)
response2=$(curl -w "\n%{http_code}" -X PATCH \
"${MEILISEARCH_HOST_URL}/indexes/semgrep_docs/settings" \
-H "Authorization: Bearer ${MEILISEARCH_API_KEY}" \
-H "Content-Type: application/json" \
-d "{
\"embedders\": {
\"default\": {
\"source\": \"openAi\",
\"model\": \"text-embedding-3-small\",
\"apiKey\": \"${OPENAI_API_KEY}\",
\"dimensions\": 1536,
\"documentTemplate\": \"{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}\"
}
}
}")

http_code2=$(echo "$response2" | tail -n1)
echo "Embedder settings response code: $http_code2"

if [ "$http_code2" != "202" ]; then
echo "Failed to apply embedder settings"
echo "$response2"
exit 1
fi

echo "✅ Index settings restored successfully!"
echo "Waiting for embedder to process (this takes a few minutes)..."
sleep 60

1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,4 @@ yarn-error.log*

# Ignore .history
/.history/
meili_data/
48 changes: 0 additions & 48 deletions docusaurus.config.js
Original file line number Diff line number Diff line change
Expand Up @@ -13,27 +13,7 @@ module.exports = {
organizationName: 'semgrep', // Usually your GitHub org/user name.
projectName: 'semgrep', // Usually your repo name.
trailingSlash: false,
themes: ['@markprompt/docusaurus-theme-search'],
themeConfig: {
markprompt: {
projectKey: 'jbhF5LligltdKaJucMjDcWcRodaVpzqE',
trigger: { floating: false },
systemPrompt: 'You are a kind AI who loves to help people!',
model: 'gpt-4',
display: 'dialog',
search: {
enabled: true,
provider: {
name: 'algolia',
apiKey: 'f53612c29d04a2ff71dce6e3b2f76752',
appId: 'RGEY1AKPUC',
indexName: 'docs',
},
},
chat: {
assistantId: '5af10a40-7ed8-4aa1-9e7a-65d2858445af',
}
},
docs: {
sidebar: {
hideable: true,
Expand Down Expand Up @@ -181,34 +161,6 @@ module.exports = {
darkTheme: darkCodeTheme,
additionalLanguages: ['java', 'ruby', 'php', 'csharp', 'rust', 'scala', 'kotlin', 'bash', 'json'],
},
//algolia: {
// apiKey: 'f53612c29d04a2ff71dce6e3b2f76752',
// indexName: 'docs',

// // Optional: see doc section below
// contextualSearch: false,

// // Optional: see doc section below
// appId: 'RGEY1AKPUC',

// // Optional: Algolia search parameters
// searchParameters: {},
// facetFilters: [],
// //... other Algolia params
// "customRanking": [
// "desc(weight.page_rank)"
// ],
// "ranking": [
// "desc(weight.page_rank)",
// "custom",
// "filters",
// "typo",
// "attribute",
// "words",
// "exact",
// "proximity"
// ]
//},
image: 'https://semgrep.dev/thumbnail.png',
//announcementBar: {
// id: 'office-hours',
Expand Down
96 changes: 96 additions & 0 deletions meilisearch-settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
{
"synonyms": {
"autofix": ["autofix", "automatic fixes", "remediation", "code fixes"],
"ci": ["ci", "continuous integration", "pipeline", "github actions", "gitlab ci", "automation"],
"config": ["config", "configuration", "settings", "setup"],
"create": ["create", "write", "build", "develop", "make", "author"],
"custom": ["custom", "own", "personal", "user-defined", "bespoke"],
"deployment": ["deployment", "setup", "configuration", "installation", "integration"],
"findings": ["findings", "issues", "vulnerabilities", "results", "matches", "detections"],
"github": ["github", "git", "version control", "repository"],
"gitlab": ["gitlab", "git", "version control", "repository"],
"go": ["go", "golang"],
"ignore": ["ignore", "exclude", "suppress", "disable"],
"java": ["java", "spring", "maven", "gradle"],
"javascript": ["javascript", "js", "node.js", "nodejs", "typescript", "ts"],
"metavariables": ["metavariables", "variables", "placeholders", "pattern variables"],
"oss": ["oss", "open source"],
"patterns": ["patterns", "rules", "expressions", "syntax patterns", "matching"],
"policy": ["policy", "policies", "governance", "compliance"],
"pro": ["pro", "semgrep pro", "commercial", "paid version", "enterprise"],
"python": ["python", "py", "django", "flask"],
"rules": ["rules", "patterns", "detectors", "checks", "rule writing", "rule creation"],
"rulewriting": ["rule writing", "write rules", "create rules", "custom rules", "rule development", "rule authoring"],
"saml": ["saml", "identity provider", "idp", "federation"],
"sast": ["sast", "static application security testing", "code security", "security analysis"],
"sca": ["sca", "supply chain", "dependencies", "vulnerabilities", "dependency scanning"],
"scan": ["scan", "scanning", "analysis", "check", "run"],
"scp": ["scp", "semgrep cloud platform", "semgrep app", "semgrep platform"],
"secrets": ["secrets", "api keys", "tokens", "credentials", "sensitive data"],
"sms": ["sms", "semgrep managed scanning", "managed scans", "cloud scanning"],
"ssc": ["ssc", "semgrep supply chain", "supply chain security", "dependency security"],
"sso": ["sso", "single sign-on", "single sign on", "authentication"],
"taint": ["taint", "taint analysis", "data flow", "taint mode", "taint tracking"],
"workflow": ["workflow", "pipeline", "automation", "ci/cd", "devops"]
},
"stopWords": [
"what", "is", "are", "how", "to", "the", "a", "an", "do", "does", "can", "i", "my"
],
"searchableAttributes": [
"hierarchy_lvl1",
"hierarchy_lvl2",
"hierarchy_lvl3",
"hierarchy_lvl0",
"hierarchy.lvl1",
"hierarchy.lvl2",
"hierarchy.lvl3",
"hierarchy.lvl0",
"content",
"url"
],
"displayedAttributes": [
"*"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"filterableAttributes": [
"type",
"language",
"version",
"docusaurus_tag",
"hierarchy_lvl0",
"hierarchy_lvl1",
"hierarchy_lvl2",
"hierarchy.lvl0",
"hierarchy.lvl1",
"hierarchy.lvl2"
],
"sortableAttributes": [
"hierarchy.lvl0",
"hierarchy.lvl1",
"hierarchy.lvl2"
],
"distinctAttribute": "url",
"typoTolerance": {
"enabled": true,
"minWordSizeForTypos": {
"oneTypo": 3,
"twoTypos": 7
},
"disableOnWords": [],
"disableOnAttributes": []
},
"faceting": {
"maxValuesPerFacet": 100
},
"pagination": {
"maxTotalHits": 1000
}
}

4 changes: 4 additions & 0 deletions netlify.toml
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
[functions]
directory = "netlify/functions"
node_bundler = "esbuild"

[[redirects]]
from = "/*"
to = "/docs/404.html"
Expand Down
Loading
Loading