From 46a2b7e740ca2a9484863d618ccc02a79b80591c Mon Sep 17 00:00:00 2001 From: urasmutlu Date: Wed, 11 Feb 2026 00:50:01 +0000 Subject: [PATCH] feat(gmail): add --safe flag to get and thread get commands Sanitize email output by stripping HTML via a full parser, removing URLs to prevent phishing/tracking, and decoding HTML entities. In JSON mode a sanitized bodies map is provided and raw body data is cleared. Co-Authored-By: Claude Opus 4.6 --- README.md | 8 + internal/cmd/gmail_get.go | 48 +++-- internal/cmd/gmail_get_cmd_test.go | 156 +++++++++++++++++ internal/cmd/gmail_thread.go | 150 ++++++++++++++-- internal/cmd/gmail_thread_run_test.go | 100 +++++++++++ internal/cmd/gmail_thread_test.go | 241 ++++++++++++++++++++++++++ 6 files changed, 681 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index d06683b2..3169acb1 100644 --- a/README.md +++ b/README.md @@ -537,8 +537,10 @@ gog gmail search 'newer_than:7d' --max 10 gog gmail thread get gog gmail thread get --download # Download attachments to current dir gog gmail thread get --download --out-dir ./attachments +gog gmail thread get --safe # Safe mode (see below) gog gmail get gog gmail get --format metadata +gog gmail get --safe # Safe mode (see below) gog gmail attachment gog gmail attachment --out ./attachment.bin gog gmail url # Print Gmail web URL @@ -595,6 +597,12 @@ gog gmail watch serve --bind 0.0.0.0 --verify-oidc --oidc-email --hook gog gmail history --since ``` +Safe mode (`--safe`): +- Strips all HTML using a full parser (not regex), removing scripts, styles, and tags +- Replaces all URLs with `[url removed]` to prevent phishing and tracking +- Decodes HTML entities to catch obfuscated URLs +- In JSON mode, provides a sanitized `bodies` map and clears raw body data from the payload + Gmail watch (Pub/Sub push): - Create Pub/Sub topic + push subscription (OIDC preferred; shared token ok for dev). - Full flow + payload details: `docs/watch.md`. diff --git a/internal/cmd/gmail_get.go b/internal/cmd/gmail_get.go index 2c3625d8..1f86d92a 100644 --- a/internal/cmd/gmail_get.go +++ b/internal/cmd/gmail_get.go @@ -15,6 +15,7 @@ type GmailGetCmd struct { MessageID string `arg:"" name:"messageId" help:"Message ID"` Format string `name:"format" help:"Message format: full|metadata|raw" default:"full"` Headers string `name:"headers" help:"Metadata headers (comma-separated; only for --format=metadata)"` + Safe bool `name:"safe" help:"Sanitize output: strip HTML, remove URLs, decode entities"` } const ( @@ -78,18 +79,31 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error { "subject": headerValue(msg.Payload, "Subject"), "date": headerValue(msg.Payload, "Date"), } + if c.Safe { + for k, v := range headers { + headers[k] = sanitizeText(v) + } + } payload := map[string]any{ "message": msg, "headers": headers, } - if unsubscribe != "" { + if unsubscribe != "" && !c.Safe { payload["unsubscribe"] = unsubscribe } if format == gmailFormatFull { - if body := bestBodyText(msg.Payload); body != "" { + if c.Safe { + safeBody, isHTML := bestBodyForDisplay(msg.Payload) + if safeBody != "" { + payload["body"] = sanitizeBodyText(safeBody, isHTML) + } + } else if body := bestBodyText(msg.Payload); body != "" { payload["body"] = body } } + if c.Safe { + clearPayloadBodies(msg.Payload) + } if format == gmailFormatFull || format == gmailFormatMetadata { attachments := collectAttachments(msg.Payload) if len(attachments) > 0 { @@ -117,11 +131,17 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error { u.Out().Println(string(decoded)) return nil case gmailFormatMetadata, gmailFormatFull: - u.Out().Printf("from\t%s", headerValue(msg.Payload, "From")) - u.Out().Printf("to\t%s", headerValue(msg.Payload, "To")) - u.Out().Printf("subject\t%s", headerValue(msg.Payload, "Subject")) + if c.Safe { + u.Out().Printf("from\t%s", sanitizeText(headerValue(msg.Payload, "From"))) + u.Out().Printf("to\t%s", sanitizeText(headerValue(msg.Payload, "To"))) + u.Out().Printf("subject\t%s", sanitizeText(headerValue(msg.Payload, "Subject"))) + } else { + u.Out().Printf("from\t%s", headerValue(msg.Payload, "From")) + u.Out().Printf("to\t%s", headerValue(msg.Payload, "To")) + u.Out().Printf("subject\t%s", headerValue(msg.Payload, "Subject")) + } u.Out().Printf("date\t%s", headerValue(msg.Payload, "Date")) - if unsubscribe != "" { + if unsubscribe != "" && !c.Safe { u.Out().Printf("unsubscribe\t%s", unsubscribe) } attachments := attachmentOutputs(collectAttachments(msg.Payload)) @@ -130,10 +150,18 @@ func (c *GmailGetCmd) Run(ctx context.Context, flags *RootFlags) error { printAttachmentLines(u.Out(), attachments) } if format == gmailFormatFull { - body := bestBodyText(msg.Payload) - if body != "" { - u.Out().Println("") - u.Out().Println(body) + if c.Safe { + body, isHTML := bestBodyForDisplay(msg.Payload) + if body != "" { + u.Out().Println("") + u.Out().Println(sanitizeBodyText(body, isHTML)) + } + } else { + body := bestBodyText(msg.Payload) + if body != "" { + u.Out().Println("") + u.Out().Println(body) + } } } return nil diff --git a/internal/cmd/gmail_get_cmd_test.go b/internal/cmd/gmail_get_cmd_test.go index f30de232..66a04cea 100644 --- a/internal/cmd/gmail_get_cmd_test.go +++ b/internal/cmd/gmail_get_cmd_test.go @@ -474,3 +474,159 @@ func TestGmailGetCmd_RawEmpty(t *testing.T) { t.Fatalf("unexpected stderr: %q", errOut) } } + +func TestGmailGetCmd_Safe_JSON(t *testing.T) { + origNew := newGmailService + t.Cleanup(func() { newGmailService = origNew }) + + htmlBody := base64.RawURLEncoding.EncodeToString([]byte( + `

Hello https://phish.com/steal

`, + )) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.Contains(r.URL.Path, "/gmail/v1/users/me/messages/") { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "m1", + "threadId": "t1", + "labelIds": []string{"INBOX"}, + "payload": map[string]any{ + "mimeType": "text/html", + "body": map[string]any{"data": htmlBody}, + "headers": []map[string]any{ + {"name": "From", "value": "a@example.com"}, + {"name": "To", "value": "b@example.com"}, + {"name": "Subject", "value": "Visit https://evil.com now"}, + {"name": "Date", "value": "Fri, 26 Dec 2025 10:00:00 +0000"}, + {"name": "List-Unsubscribe", "value": ""}, + }, + }, + }) + })) + defer srv.Close() + + svc, err := gmail.NewService(context.Background(), + option.WithoutAuthentication(), + option.WithHTTPClient(srv.Client()), + option.WithEndpoint(srv.URL+"/"), + ) + if err != nil { + t.Fatalf("NewService: %v", err) + } + newGmailService = func(context.Context, string) (*gmail.Service, error) { return svc, nil } + + flags := &RootFlags{Account: "a@b.com"} + out := captureStdout(t, func() { + _ = captureStderr(t, func() { + u, uiErr := ui.New(ui.Options{Stdout: io.Discard, Stderr: io.Discard, Color: "never"}) + if uiErr != nil { + t.Fatalf("ui.New: %v", uiErr) + } + ctx := ui.WithUI(context.Background(), u) + ctx = outfmt.WithMode(ctx, outfmt.Mode{JSON: true}) + + cmd := &GmailGetCmd{Safe: true} + if err := runKong(t, cmd, []string{"m1", "--format", "full", "--safe"}, ctx, flags); err != nil { + t.Fatalf("execute: %v", err) + } + }) + }) + + var parsed map[string]any + if err := json.Unmarshal([]byte(out), &parsed); err != nil { + t.Fatalf("json parse: %v", err) + } + + // Body should be sanitized + body, _ := parsed["body"].(string) + if strings.Contains(body, "https://") { + t.Fatalf("--safe body should not contain URLs, got: %q", body) + } + if !strings.Contains(body, "Hello") { + t.Fatalf("--safe body should contain 'Hello', got: %q", body) + } + + // Unsubscribe should not be present + if _, ok := parsed["unsubscribe"]; ok { + t.Fatalf("--safe JSON should not include unsubscribe link") + } + + // Headers should be sanitized + headers, _ := parsed["headers"].(map[string]any) + subject, _ := headers["subject"].(string) + if strings.Contains(subject, "https://") { + t.Fatalf("--safe subject should not contain URLs, got: %q", subject) + } + if !strings.Contains(subject, "[url removed]") { + t.Fatalf("--safe subject should contain [url removed], got: %q", subject) + } +} + +func TestGmailGetCmd_Safe_Text(t *testing.T) { + origNew := newGmailService + t.Cleanup(func() { newGmailService = origNew }) + + bodyData := base64.RawURLEncoding.EncodeToString([]byte("Hello visit https://phish.com/login for details")) + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !strings.Contains(r.URL.Path, "/gmail/v1/users/me/messages/") { + http.NotFound(w, r) + return + } + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(map[string]any{ + "id": "m1", + "threadId": "t1", + "labelIds": []string{"INBOX"}, + "payload": map[string]any{ + "mimeType": "text/plain", + "body": map[string]any{"data": bodyData}, + "headers": []map[string]any{ + {"name": "From", "value": "a@example.com"}, + {"name": "To", "value": "b@example.com"}, + {"name": "Subject", "value": "Urgent https://evil.com action"}, + {"name": "Date", "value": "Fri, 26 Dec 2025 10:00:00 +0000"}, + {"name": "List-Unsubscribe", "value": ""}, + }, + }, + }) + })) + defer srv.Close() + + svc, err := gmail.NewService(context.Background(), + option.WithoutAuthentication(), + option.WithHTTPClient(srv.Client()), + option.WithEndpoint(srv.URL+"/"), + ) + if err != nil { + t.Fatalf("NewService: %v", err) + } + newGmailService = func(context.Context, string) (*gmail.Service, error) { return svc, nil } + + flags := &RootFlags{Account: "a@b.com"} + out := captureStdout(t, func() { + _ = captureStderr(t, func() { + u, uiErr := ui.New(ui.Options{Stdout: os.Stdout, Stderr: io.Discard, Color: "never"}) + if uiErr != nil { + t.Fatalf("ui.New: %v", uiErr) + } + ctx := ui.WithUI(context.Background(), u) + + cmd := &GmailGetCmd{Safe: true} + if err := runKong(t, cmd, []string{"m1", "--format", "full", "--safe"}, ctx, flags); err != nil { + t.Fatalf("execute: %v", err) + } + }) + }) + + if strings.Contains(out, "https://") { + t.Fatalf("--safe text output should not contain URLs, got: %q", out) + } + if !strings.Contains(out, "[url removed]") { + t.Fatalf("--safe text output should contain [url removed], got: %q", out) + } + if strings.Contains(out, "unsubscribe") { + t.Fatalf("--safe text output should not show unsubscribe link, got: %q", out) + } +} diff --git a/internal/cmd/gmail_thread.go b/internal/cmd/gmail_thread.go index 52ccac96..97a3a2fd 100644 --- a/internal/cmd/gmail_thread.go +++ b/internal/cmd/gmail_thread.go @@ -6,6 +6,7 @@ import ( "encoding/base64" "errors" "fmt" + htmlpkg "html" "io" "mime" "mime/quotedprintable" @@ -15,6 +16,7 @@ import ( "regexp" "strings" + "golang.org/x/net/html" "golang.org/x/net/html/charset" "google.golang.org/api/gmail/v1" @@ -46,6 +48,102 @@ func stripHTMLTags(s string) string { return strings.TrimSpace(s) } +// --safe mode sanitization functions. + +// urlPattern matches HTTP and HTTPS URLs. +var urlPattern = regexp.MustCompile(`https?://[^\s<>"'` + "`" + `\]\)]+`) + +// blockElements is the set of HTML elements that produce visual line breaks. +var blockElements = map[string]bool{ + "div": true, "p": true, "br": true, "li": true, "tr": true, + "h1": true, "h2": true, "h3": true, "h4": true, "h5": true, "h6": true, + "blockquote": true, "pre": true, "hr": true, "table": true, + "ul": true, "ol": true, "dl": true, "dt": true, "dd": true, + "section": true, "article": true, "header": true, "footer": true, +} + +// safeExtractTextFromHTML uses the golang.org/x/net/html tokenizer to properly +// extract text content from HTML, skipping script and style blocks. +// Unlike stripHTMLTags (regex-based), this uses a full HTML parser for +// robust handling of malformed HTML, making it suitable for --safe mode. +func safeExtractTextFromHTML(s string) string { + tokenizer := html.NewTokenizer(strings.NewReader(s)) + var buf strings.Builder + skip := false + for { + tt := tokenizer.Next() + switch tt { + case html.ErrorToken: + // Collapse whitespace in the final result. + result := whitespacePattern.ReplaceAllString(buf.String(), " ") + return strings.TrimSpace(result) + case html.StartTagToken, html.SelfClosingTagToken: + tn, _ := tokenizer.TagName() + tag := string(tn) + if tag == "script" || tag == "style" { + skip = true + } + if blockElements[tag] { + buf.WriteByte(' ') + } + case html.EndTagToken: + tn, _ := tokenizer.TagName() + tag := string(tn) + if tag == "script" || tag == "style" { + skip = false + } + if blockElements[tag] { + buf.WriteByte(' ') + } + case html.TextToken: + if !skip { + buf.Write(tokenizer.Text()) + } + } + } +} + +// stripURLs replaces all HTTP/HTTPS URLs with [url removed]. +func stripURLs(s string) string { + return urlPattern.ReplaceAllString(s, "[url removed]") +} + +// sanitizeBodyText sanitizes email body content for safe display. +// It extracts text from HTML (if needed), decodes HTML entities, and strips URLs. +func sanitizeBodyText(body string, isHTML bool) string { + if body == "" { + return "" + } + text := body + if isHTML { + text = safeExtractTextFromHTML(text) + } + text = htmlpkg.UnescapeString(text) + text = stripURLs(text) + text = whitespacePattern.ReplaceAllString(text, " ") + return strings.TrimSpace(text) +} + +// sanitizeText applies lightweight sanitization to header values and other text. +func sanitizeText(s string) string { + s = htmlpkg.UnescapeString(s) + return stripURLs(s) +} + +// clearPayloadBodies recursively clears body data on all text/* MIME parts +// to prevent raw content from leaking into JSON output. +func clearPayloadBodies(p *gmail.MessagePart) { + if p == nil { + return + } + if strings.HasPrefix(strings.ToLower(p.MimeType), "text/") && p.Body != nil { + p.Body.Data = "" + } + for _, part := range p.Parts { + clearPayloadBodies(part) + } +} + type GmailThreadCmd struct { Get GmailThreadGetCmd `cmd:"" name:"get" default:"withargs" help:"Get a thread with all messages (optionally download attachments)"` Modify GmailThreadModifyCmd `cmd:"" name:"modify" help:"Modify labels on all messages in a thread"` @@ -56,6 +154,7 @@ type GmailThreadGetCmd struct { ThreadID string `arg:"" name:"threadId" help:"Thread ID"` Download bool `name:"download" help:"Download attachments"` Full bool `name:"full" help:"Show full message bodies"` + Safe bool `name:"safe" help:"Sanitize output: strip HTML, remove URLs, decode entities"` OutputDir OutputDirFlag `embed:""` } @@ -108,6 +207,22 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error { downloadedFiles = append(downloadedFiles, attachmentDownloadSummaries(downloads)...) } } + if c.Safe && thread != nil { + bodies := make(map[string]string, len(thread.Messages)) + for _, msg := range thread.Messages { + if msg == nil || msg.Id == "" { + continue + } + body, isHTML := bestBodyForDisplay(msg.Payload) + bodies[msg.Id] = sanitizeBodyText(body, isHTML) + clearPayloadBodies(msg.Payload) + } + return outfmt.WriteJSON(os.Stdout, map[string]any{ + "thread": thread, + "bodies": bodies, + "downloaded": downloadedFiles, + }) + } return outfmt.WriteJSON(os.Stdout, map[string]any{ "thread": thread, "downloaded": downloadedFiles, @@ -127,18 +242,29 @@ func (c *GmailThreadGetCmd) Run(ctx context.Context, flags *RootFlags) error { continue } u.Out().Printf("=== Message %d/%d: %s ===", i+1, len(thread.Messages), msg.Id) - u.Out().Printf("From: %s", headerValue(msg.Payload, "From")) - u.Out().Printf("To: %s", headerValue(msg.Payload, "To")) - u.Out().Printf("Subject: %s", headerValue(msg.Payload, "Subject")) + if c.Safe { + u.Out().Printf("From: %s", sanitizeText(headerValue(msg.Payload, "From"))) + u.Out().Printf("To: %s", sanitizeText(headerValue(msg.Payload, "To"))) + u.Out().Printf("Subject: %s", sanitizeText(headerValue(msg.Payload, "Subject"))) + } else { + u.Out().Printf("From: %s", headerValue(msg.Payload, "From")) + u.Out().Printf("To: %s", headerValue(msg.Payload, "To")) + u.Out().Printf("Subject: %s", headerValue(msg.Payload, "Subject")) + } u.Out().Printf("Date: %s", headerValue(msg.Payload, "Date")) u.Out().Println("") body, isHTML := bestBodyForDisplay(msg.Payload) if body != "" { - cleanBody := body - if isHTML { - // Strip HTML tags for cleaner text output - cleanBody = stripHTMLTags(body) + var cleanBody string + if c.Safe { + cleanBody = sanitizeBodyText(body, isHTML) + } else { + cleanBody = body + if isHTML { + // Strip HTML tags for cleaner text output + cleanBody = stripHTMLTags(body) + } } // Limit body preview to avoid overwhelming output // Use runes to avoid breaking multi-byte UTF-8 characters @@ -361,8 +487,8 @@ func bestBodyText(p *gmail.MessagePart) string { if plain != "" { return plain } - html := findPartBody(p, "text/html") - return html + htmlBody := findPartBody(p, "text/html") + return htmlBody } func bestBodyForDisplay(p *gmail.MessagePart) (string, bool) { @@ -376,11 +502,11 @@ func bestBodyForDisplay(p *gmail.MessagePart) (string, bool) { } return plain, false } - html := findPartBody(p, "text/html") - if html == "" { + htmlBody := findPartBody(p, "text/html") + if htmlBody == "" { return "", false } - return html, true + return htmlBody, true } func findPartBody(p *gmail.MessagePart, mimeType string) string { diff --git a/internal/cmd/gmail_thread_run_test.go b/internal/cmd/gmail_thread_run_test.go index 648f6da2..8e686ca9 100644 --- a/internal/cmd/gmail_thread_run_test.go +++ b/internal/cmd/gmail_thread_run_test.go @@ -248,3 +248,103 @@ func TestGmailThreadGetAndAttachments_JSON(t *testing.T) { t.Fatalf("unexpected empty attachments output: %q", emptyAttachOut) } } + +func TestGmailThreadGet_Safe(t *testing.T) { + origNew := newGmailService + t.Cleanup(func() { newGmailService = origNew }) + + htmlBody := base64.RawURLEncoding.EncodeToString([]byte( + `

Hello visit https://phish.com/login now

`, + )) + threadResp := map[string]any{ + "id": "t1", + "messages": []map[string]any{ + { + "id": "m1", + "payload": map[string]any{ + "headers": []map[string]any{ + {"name": "From", "value": "a@example.com"}, + {"name": "To", "value": "b@example.com"}, + {"name": "Subject", "value": "Check https://evil.com now"}, + {"name": "Date", "value": "Mon, 1 Jan 2025 00:00:00 +0000"}, + }, + "mimeType": "text/html", + "body": map[string]any{ + "data": htmlBody, + }, + }, + }, + }, + } + + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + path := strings.TrimPrefix(r.URL.Path, "/gmail/v1") + if r.Method == http.MethodGet && path == "/users/me/threads/t1" { + w.Header().Set("Content-Type", "application/json") + _ = json.NewEncoder(w).Encode(threadResp) + return + } + http.NotFound(w, r) + })) + defer srv.Close() + + svc, err := gmail.NewService(context.Background(), + option.WithoutAuthentication(), + option.WithHTTPClient(srv.Client()), + option.WithEndpoint(srv.URL+"/"), + ) + if err != nil { + t.Fatalf("NewService: %v", err) + } + newGmailService = func(context.Context, string) (*gmail.Service, error) { return svc, nil } + + // Test text output with --safe + textOut := captureStdout(t, func() { + _ = captureStderr(t, func() { + if err := Execute([]string{"--account", "a@b.com", "gmail", "thread", "get", "t1", "--safe"}); err != nil { + t.Fatalf("Execute --safe text: %v", err) + } + }) + }) + if strings.Contains(textOut, "https://") { + t.Fatalf("--safe text output should not contain URLs, got: %q", textOut) + } + if !strings.Contains(textOut, "[url removed]") { + t.Fatalf("--safe text output should contain [url removed], got: %q", textOut) + } + if !strings.Contains(textOut, "Hello") { + t.Fatalf("--safe text output should contain plain text 'Hello', got: %q", textOut) + } + if strings.Contains(textOut, "safe text", + want: "safe text", + }, + { + name: "style block removed", + input: "visible", + want: "visible", + }, + { + name: "nested tags", + input: "
inner
", + want: "inner", + }, + { + name: "block elements add spaces", + input: "

first

second

", + want: "first second", + }, + { + name: "malformed HTML consumed safely", + input: `a & b

", + want: "a & b", + }, + { + name: "complex email HTML", + input: `
Hello

World

`, + want: "Hello World", + }, + { + name: "empty input", + input: "", + want: "", + }, + { + name: "plain text unchanged", + input: "no tags here", + want: "no tags here", + }, + { + name: "self closing tags", + input: "line1
line2", + want: "line1 line2", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := safeExtractTextFromHTML(tt.input) + if got != tt.want { + t.Errorf("safeExtractTextFromHTML(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestStripURLs(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "http URL", + input: "visit http://example.com for info", + want: "visit [url removed] for info", + }, + { + name: "https URL", + input: "click https://example.com/page", + want: "click [url removed]", + }, + { + name: "URL with query params", + input: "track https://track.example.com/open?id=abc123&utm_source=email here", + want: "track [url removed] here", + }, + { + name: "multiple URLs", + input: "see https://a.com and http://b.com ok", + want: "see [url removed] and [url removed] ok", + }, + { + name: "no URLs unchanged", + input: "plain text with no links", + want: "plain text with no links", + }, + { + name: "empty string", + input: "", + want: "", + }, + { + name: "URL at start", + input: "https://evil.com/phish is bad", + want: "[url removed] is bad", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := stripURLs(tt.input) + if got != tt.want { + t.Errorf("stripURLs(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestSanitizeBodyText(t *testing.T) { + tests := []struct { + name string + body string + isHTML bool + want string + }{ + { + name: "HTML with URL", + body: `

Click here now

`, + isHTML: true, + want: "Click here now", + }, + { + name: "plain text with URL", + body: "Visit https://example.com for details", + isHTML: false, + want: "Visit [url removed] for details", + }, + { + name: "HTML entities decoded then URL stripped", + body: "check https://evil.com/payload here", + isHTML: false, + want: "check [url removed] here", + }, + { + name: "HTML with script and tracking", + body: `

Hello https://phish.com

`, + isHTML: true, + want: "Hello [url removed]", + }, + { + name: "empty body", + body: "", + isHTML: false, + want: "", + }, + { + name: "plain text no URLs", + body: "Just a normal message", + isHTML: false, + want: "Just a normal message", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sanitizeBodyText(tt.body, tt.isHTML) + if got != tt.want { + t.Errorf("sanitizeBodyText(%q, %v) = %q, want %q", tt.body, tt.isHTML, got, tt.want) + } + }) + } +} + +func TestSanitizeText(t *testing.T) { + tests := []struct { + name string + input string + want string + }{ + { + name: "URL in subject", + input: "Check https://evil.com now", + want: "Check [url removed] now", + }, + { + name: "HTML entity decoded", + input: "a & b", + want: "a & b", + }, + { + name: "no changes needed", + input: "Normal Subject Line", + want: "Normal Subject Line", + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := sanitizeText(tt.input) + if got != tt.want { + t.Errorf("sanitizeText(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} + +func TestClearPayloadBodies(t *testing.T) { + p := &gmail.MessagePart{ + MimeType: "multipart/alternative", + Parts: []*gmail.MessagePart{ + { + MimeType: "text/plain", + Body: &gmail.MessagePartBody{Data: "c29tZSBkYXRh"}, + }, + { + MimeType: "text/html", + Body: &gmail.MessagePartBody{Data: "PHA-aHRtbDwvcD4"}, + }, + { + MimeType: "image/png", + Body: &gmail.MessagePartBody{Data: "imagedata", AttachmentId: "att1"}, + }, + }, + } + clearPayloadBodies(p) + + if p.Parts[0].Body.Data != "" { + t.Errorf("text/plain body should be cleared, got %q", p.Parts[0].Body.Data) + } + if p.Parts[1].Body.Data != "" { + t.Errorf("text/html body should be cleared, got %q", p.Parts[1].Body.Data) + } + if p.Parts[2].Body.Data != "imagedata" { + t.Errorf("image/png body should be preserved, got %q", p.Parts[2].Body.Data) + } +} + func encodeBase64URL(value string) string { return base64.RawURLEncoding.EncodeToString([]byte(value)) }