-
Notifications
You must be signed in to change notification settings - Fork 5
[#93] Add support for basic html tags #259
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
Sereja313
wants to merge
3
commits into
master
Choose a base branch
from
Sereja313/#93-add-support-for-html-tags
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -22,6 +22,7 @@ import Universum | |
| import CMarkGFM | ||
| (Node (..), NodeType (..), PosInfo (..), commonmarkToNode, extAutolink, optFootnotes) | ||
| import Control.Lens (_Just, makeLenses, makeLensesFor, (.=)) | ||
| import Control.Monad.Trans.RWS.CPS qualified as RWS | ||
| import Control.Monad.Trans.Writer.CPS (Writer, runWriter, tell) | ||
| import Data.Aeson (FromJSON (..), genericParseJSON) | ||
| import Data.ByteString.Lazy qualified as BSL | ||
|
|
@@ -195,6 +196,12 @@ removeIgnored = withIgnoreMode . cataNodeWithParentNodeInfo remove | |
| (IMSLink _, IMAGE {}) -> do | ||
| ssIgnore .= Nothing | ||
| return defNode | ||
| (IMSLink _, HTML_INLINE text) | isLink text -> do | ||
| ssIgnore .= Nothing | ||
| pure defNode | ||
| (IMSLink _, HTML_BLOCK text) | isLink text -> do | ||
| ssIgnore .= Nothing | ||
| pure defNode | ||
| (IMSLink ignoreLinkState, _) -> do | ||
| when (ignoreLinkState == ExpectingLinkInSubnodes) $ | ||
| ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink | ||
|
|
@@ -264,57 +271,59 @@ removeIgnored = withIgnoreMode . cataNodeWithParentNodeInfo remove | |
| pure node | ||
| (node, _) -> pure node | ||
|
|
||
| findAttributes :: [Text] -> [Attribute Text] -> Maybe Text | ||
| findAttributes (map T.toLower -> attrs) = | ||
| fmap snd . find ((`elem` attrs) . T.toLower . fst) | ||
|
|
||
| isLink :: Text -> Bool | ||
| isLink (parseTags -> tags) = case safeHead tags of | ||
| Just (TagOpen tag attrs) -> | ||
| T.toLower tag == "a" && isJust (findAttributes ["href"] attrs) | ||
| || T.toLower tag == "img" && isJust (findAttributes ["src"] attrs) | ||
| _ -> False | ||
|
|
||
| -- | Custom `foldMap` for source tree. | ||
| foldNode :: (Monoid a, Monad m) => (Node -> m a) -> Node -> m a | ||
| foldNode action node@(Node _ _ subs) = do | ||
| a <- action node | ||
| b <- concatForM subs (foldNode action) | ||
| return (a <> b) | ||
|
|
||
| type ExtractorM a = ReaderT MarkdownConfig (Writer [ScanError 'Parse]) a | ||
| type ExtractorM a = RWS.RWS MarkdownConfig [ScanError 'Parse] (Maybe Reference) a | ||
|
|
||
| -- | Extract information from source tree. | ||
| nodeExtractInfo :: Node -> ExtractorM FileInfo | ||
| nodeExtractInfo input@(Node _ _ nSubs) = do | ||
| if checkIgnoreAllFile nSubs | ||
| then return (diffToFileInfo mempty) | ||
| else diffToFileInfo <$> (foldNode extractor =<< lift (removeIgnored input)) | ||
| else diffToFileInfo <$> (foldNode extractor =<< (RWS.writer . runWriter $ removeIgnored input)) | ||
|
|
||
| where | ||
| extractor :: Node -> ExtractorM FileInfoDiff | ||
| extractor node@(Node pos ty _) = | ||
| case ty of | ||
| HTML_BLOCK _ -> do | ||
| return mempty | ||
| extractor node@(Node pos ty _) = do | ||
| reference' <- RWS.get | ||
| -- If current state is not `Nothing`, try extracting associated text | ||
| let fileInfoDiff = case (reference', ty) of | ||
| (Just ref, TEXT text) -> | ||
| mempty & fidReferences .~ DList.singleton ref {rName = text} | ||
| (Just ref, _) -> mempty & fidReferences .~ DList.singleton ref | ||
| _ -> mempty | ||
| RWS.put Nothing | ||
| fmap (fileInfoDiff <>) case ty of | ||
| HTML_BLOCK text | isLink text -> extractHtmlLink text | ||
|
|
||
| HTML_BLOCK text -> extractAnchor text | ||
|
|
||
| HEADING lvl -> do | ||
| flavor <- asks mcFlavor | ||
| flavor <- RWS.asks mcFlavor | ||
| let aType = HeaderAnchor lvl | ||
| let aName = headerToAnchor flavor $ nodeExtractText node | ||
| let aPos = toPosition pos | ||
| return $ FileInfoDiff DList.empty $ DList.singleton $ Anchor {aType, aName, aPos} | ||
|
|
||
| HTML_INLINE text -> do | ||
| let | ||
| mName = do | ||
| tag <- safeHead $ parseTags text | ||
| attributes <- case tag of | ||
| TagOpen a attrs | ||
| | T.toLower a == "a" -> Just attrs | ||
| _ -> Nothing | ||
| (_, name) <- find (\(field, _) -> T.toLower field `elem` ["name", "id"]) attributes | ||
| pure name | ||
|
|
||
| case mName of | ||
| Just aName -> do | ||
| let aType = HandAnchor | ||
| aPos = toPosition pos | ||
| return $ FileInfoDiff | ||
| mempty | ||
| (pure $ Anchor {aType, aName, aPos}) | ||
| HTML_INLINE text | isLink text -> extractHtmlLink text | ||
|
|
||
| Nothing -> do | ||
| return mempty | ||
| HTML_INLINE text -> extractAnchor text | ||
|
|
||
| LINK url _ -> extractLink url | ||
|
|
||
|
|
@@ -328,17 +337,69 @@ nodeExtractInfo input@(Node _ _ nSubs) = do | |
| rPos = toPosition pos | ||
| link = if null url then rName else url | ||
|
|
||
| let (rLink, rAnchor) = case T.splitOn "#" link of | ||
| [t] -> (t, Nothing) | ||
| t : ts -> (t, Just $ T.intercalate "#" ts) | ||
| [] -> error "impossible" | ||
| let (rLink, rAnchor) = splitLink link | ||
|
|
||
| let rInfo = referenceInfo rLink | ||
|
|
||
| return $ FileInfoDiff | ||
| (DList.singleton $ Reference {rName, rPos, rLink, rAnchor, rInfo}) | ||
| DList.empty | ||
|
|
||
| extractAnchor :: Text -> ExtractorM FileInfoDiff | ||
| extractAnchor text = do | ||
| let mName = do | ||
| tag <- safeHead $ parseTags text | ||
| attributes <- case tag of | ||
| TagOpen a attrs | T.toLower a == "a" -> Just attrs | ||
| _ -> Nothing | ||
| findAttributes ["name", "id"] attributes | ||
|
|
||
| case mName of | ||
| Just aName -> do | ||
| let aType = HandAnchor | ||
| aPos = toPosition pos | ||
| return $ FileInfoDiff | ||
| mempty | ||
| (pure $ Anchor {aType, aName, aPos}) | ||
|
|
||
| Nothing -> do | ||
| return mempty | ||
|
|
||
| extractHtmlReference :: [Attribute Text] -> Maybe PosInfo -> DList.DList Reference | ||
| extractHtmlReference attrs tagPos = fromMaybe mempty do | ||
| link <- findAttributes ["href"] attrs | ||
| let (rLink, rAnchor) = splitLink link | ||
| pure . DList.singleton $ Reference "" rLink rAnchor (toPosition tagPos) (referenceInfo rLink) | ||
|
|
||
| splitLink :: Text -> (Text, Maybe Text) | ||
| splitLink link = case T.splitOn "#" link of | ||
| [t] -> (t, Nothing) | ||
| t : ts -> (t, Just $ T.intercalate "#" ts) | ||
| [] -> error "impossible" | ||
|
|
||
| extractHtmlImage :: [Attribute Text] -> Maybe PosInfo -> DList.DList Reference | ||
| extractHtmlImage attrs tagPos = fromMaybe mempty do | ||
| link <- findAttributes ["src"] attrs | ||
| pure . DList.singleton $ Reference "" link Nothing (toPosition tagPos) (referenceInfo link) | ||
|
|
||
| extractHtmlLink :: Text -> ExtractorM FileInfoDiff | ||
| extractHtmlLink text = | ||
| case safeHead $ parseTags text of | ||
| Just (TagOpen tag attrs) | T.toLower tag == "img" -> | ||
| pure $ mempty & fidReferences .~ extractHtmlImage attrs pos | ||
| Just (TagOpen tag attrs) | T.toLower tag == "a" -> do | ||
| let reference = extractHtmlReference attrs pos | ||
| case DList.toList reference of | ||
| [ref] -> do | ||
| -- The `cmark-gfm` package parses the link tag as three separate nodes: | ||
| -- `HTML_INLINE` with an opening tag, a `TEXT` with text in between, | ||
| -- and `HTML_INLINE` with a closing tag. So we keep the extracted link in a state and | ||
| -- try to get associated text in the next node. | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As we have been discussing in recent PRs, this code seems to be more difficult to understand with each new feature that requires to modify it. At least, comments and separating chunks of code to functions help in clarifying it for the moment 👍 |
||
| RWS.put $ Just ref | ||
| pure mempty | ||
| _ -> pure mempty | ||
| _ -> pure mempty | ||
|
|
||
aeqz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| -- | Check if there is `ignore all` at the beginning of the file, | ||
| -- ignoring preceding comments if there are any. | ||
| checkIgnoreAllFile :: [Node] -> Bool | ||
|
|
@@ -406,11 +467,10 @@ textToMode _ = NotAnAnnotation | |
|
|
||
| parseFileInfo :: MarkdownConfig -> LT.Text -> (FileInfo, [ScanError 'Parse]) | ||
| parseFileInfo config input | ||
| = runWriter | ||
| $ flip runReaderT config | ||
| $ nodeExtractInfo | ||
| = RWS.evalRWS | ||
| (nodeExtractInfo | ||
| $ commonmarkToNode [optFootnotes] [extAutolink] | ||
| $ toStrict input | ||
| $ toStrict input) config Nothing | ||
|
|
||
| markdownScanner :: MarkdownConfig -> ScanAction | ||
| markdownScanner config canonicalFile = | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| === Repository data === | ||
|
|
||
| check-html.md: | ||
| - references: | ||
| - reference (file-local) at src:13:1-11: | ||
| - text: "One" | ||
| - link: - | ||
| - anchor: one | ||
| - reference (file-local) at src:14:1-11: | ||
| - text: "Two" | ||
| - link: - | ||
| - anchor: two | ||
| - reference (file-local) at src:15:1-15: | ||
| - text: "Three" | ||
| - link: - | ||
| - anchor: three | ||
| - reference (file-local) at src:16:1-13: | ||
| - text: "Four" | ||
| - link: - | ||
| - anchor: four | ||
| - reference (file-local) at src:17:1-13: | ||
| - text: "Five" | ||
| - link: - | ||
| - anchor: five | ||
| - reference (file-local) at src:18:1-11: | ||
| - text: "Six" | ||
| - link: - | ||
| - anchor: six | ||
| - reference (file-local) at src:19:1-15: | ||
| - text: "Seven" | ||
| - link: - | ||
| - anchor: seven | ||
| - reference (external) at src:21:1-144: | ||
| - text: "" | ||
| - link: https://user-images.githubusercontent.com/5394217/70820564-06b06e00-1dea-11ea-9680-27f661ca2a58.png | ||
| - anchor: - | ||
| - reference (external) at src:23:6-149: | ||
| - text: "" | ||
| - link: https://user-images.githubusercontent.com/5394217/70820564-06b06e00-1dea-11ea-9680-27f661ca2a58.png | ||
| - anchor: - | ||
| - reference (external) at src:25:1-29: | ||
| - text: "serokell" | ||
| - link: https://serokell.io/ | ||
| - anchor: - | ||
| - reference (external) at src:27:6-34: | ||
| - text: "serokell" | ||
| - link: https://serokell.io/ | ||
| - anchor: - | ||
| - reference (file-local) at src:29:1-13: | ||
| - text: "Six" | ||
| - link: - | ||
| - anchor: six | ||
| - reference (file-local) at src:31:6-20: | ||
| - text: "Seven" | ||
| - link: - | ||
| - anchor: seven | ||
| - anchors: | ||
| - title1 (header II) at src:7:1-96 | ||
| - one (hand made) at src:7:4-17 | ||
| - two (hand made) at src:7:19-30 | ||
| - three (hand made) at src:7:32-47 | ||
| - four (hand made) at src:7:49-63 | ||
| - five (hand made) at src:7:69-88 | ||
| - six (hand made) at src:9:1-12 | ||
| - seven (hand made) at src:11:6-17 | ||
|
|
||
| All repository links are valid. |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.