1717import java .net .http .HttpClient ;
1818import java .net .http .HttpRequest ;
1919import java .net .http .HttpResponse ;
20+ import java .time .Duration ;
2021import java .util .Collection ;
2122import java .util .List ;
2223import java .util .Optional ;
2324import java .util .concurrent .CompletableFuture ;
25+ import java .util .concurrent .TimeUnit ;
2426import java .util .function .Predicate ;
2527import java .util .stream .IntStream ;
2628
@@ -34,7 +36,8 @@ public final class LinkPreviews {
3436 private static final String IMAGE_CONTENT_TYPE_PREFIX = "image" ;
3537 private static final String IMAGE_META_NAME = "image" ;
3638
37- private static final HttpClient CLIENT = HttpClient .newHttpClient ();
39+ private static final HttpClient CLIENT =
40+ HttpClient .newBuilder ().connectTimeout (Duration .ofSeconds (10 )).build ();
3841
3942 private LinkPreviews () {
4043 throw new UnsupportedOperationException ("Utility class" );
@@ -49,10 +52,33 @@ private LinkPreviews() {
4952 public static List <String > extractLinks (String content ) {
5053 return new UrlDetector (content , UrlDetectorOptions .BRACKET_MATCH ).detect ()
5154 .stream ()
52- .map (Url ::getFullUrl )
55+ .map (LinkPreviews ::toLink )
56+ .flatMap (Optional ::stream )
5357 .toList ();
5458 }
5559
60+ private static Optional <String > toLink (Url url ) {
61+ String raw = url .getOriginalUrl ();
62+ if (raw .contains (">" )) {
63+ // URL escapes, such as "<http://example.com>" should be skipped
64+ return Optional .empty ();
65+ }
66+ // Not interested in other schemes, also to filter out matches without scheme.
67+ // It detects a lot of such false-positives in Java snippets
68+ if (!raw .startsWith ("http" )) {
69+ return Optional .empty ();
70+ }
71+
72+ String link = url .getFullUrl ();
73+
74+ if (link .endsWith ("," ) || link .endsWith ("." )) {
75+ // Remove trailing punctuation
76+ link = link .substring (0 , link .length () - 1 );
77+ }
78+
79+ return Optional .of (link );
80+ }
81+
5682 /**
5783 * Attempts to create previews of all given links.
5884 * <p>
@@ -75,7 +101,10 @@ public static CompletableFuture<List<LinkPreview>> createLinkPreviews(List<Strin
75101 .toList ();
76102
77103 var allDoneTask = CompletableFuture .allOf (tasks .toArray (CompletableFuture []::new ));
78- return allDoneTask .thenApply (any -> extractResults (tasks ));
104+ return allDoneTask .thenApply (any -> extractResults (tasks )).exceptionally (e -> {
105+ logger .error ("Unknown error during link preview creation" , e );
106+ return List .of ();
107+ });
79108 }
80109
81110 private static <T > List <T > extractResults (
@@ -103,6 +132,9 @@ private static CompletableFuture<Optional<LinkPreview>> createLinkPreview(String
103132 return parseWebsite (link , attachmentName , content .dataStream );
104133 }
105134 return noResult ();
135+ }).orTimeout (10 , TimeUnit .SECONDS ).exceptionally (e -> {
136+ logger .warn ("Failed to create link preview for {}" , link , e );
137+ return Optional .empty ();
106138 });
107139 }
108140
@@ -142,7 +174,8 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
142174 try {
143175 doc = Jsoup .parse (websiteContent , null , link );
144176 } catch (IOException e ) {
145- logger .warn ("Attempted to create a preview for {}, but the content invalid." , link , e );
177+ logger .warn ("Attempted to create a preview for {}, but the content is invalid." , link ,
178+ e );
146179 return noResult ();
147180 }
148181
@@ -152,7 +185,7 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
152185
153186 LinkPreview textPreview = LinkPreview .ofText (title , link , description );
154187
155- String image = parseOpenGraphMeta (doc , IMAGE_META_NAME ).orElse (null );
188+ String image = parseOpenGraphTwitterMeta (doc , IMAGE_META_NAME , null ).orElse (null );
156189 if (image == null ) {
157190 return result (textPreview );
158191 }
@@ -173,24 +206,27 @@ private static CompletableFuture<Optional<LinkPreview>> parseWebsite(String link
173206
174207 private static Optional <String > parseOpenGraphTwitterMeta (Document doc , String metaProperty ,
175208 @ Nullable String fallback ) {
176- String value = Optional
177- .ofNullable (doc .selectFirst ("meta[property=og:%s]" .formatted (metaProperty )))
178- .or (() -> Optional
179- .ofNullable (doc .selectFirst ("meta[property=twitter:%s]" .formatted (metaProperty ))))
180- .map (element -> element .attr ("content" ))
209+ String value = parseMetaProperty (doc , "og:" + metaProperty )
210+ .or (() -> parseMetaProperty (doc , "twitter:" + metaProperty ))
181211 .orElse (fallback );
212+
182213 if (value == null ) {
183214 return Optional .empty ();
184215 }
185216 return value .isBlank () ? Optional .empty () : Optional .of (value );
186217 }
187218
188- private static Optional <String > parseOpenGraphMeta (Document doc , String metaProperty ) {
189- return Optional . ofNullable (doc . selectFirst ( "meta[ property=og:%s]" . formatted ( metaProperty )) )
190- .map ( element -> element . attr ( "content" ))
219+ private static Optional <String > parseMetaProperty (Document doc , String metaProperty ) {
220+ return selectFirstMetaTag (doc , " property" , metaProperty )
221+ .or (() -> selectFirstMetaTag ( doc , "name" , metaProperty ))
191222 .filter (Predicate .not (String ::isBlank ));
192223 }
193224
225+ private static Optional <String > selectFirstMetaTag (Document doc , String key , String value ) {
226+ return Optional .ofNullable (doc .selectFirst ("meta[%s=%s]" .formatted (key , value )))
227+ .map (element -> element .attr ("content" ));
228+ }
229+
194230 private static <T > CompletableFuture <Optional <T >> noResult () {
195231 return CompletableFuture .completedFuture (Optional .empty ());
196232 }
0 commit comments