diff --git a/CHANGES.md b/CHANGES.md index a4d037eb..5988548b 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -36,11 +36,13 @@ Unreleased + Now we call references to anchors in current file (e.g. `[a](#b)`) as `file-local` references instead of calling them `current file` (which was ambiguous). * [#233](https://github.com/serokell/xrefcheck/pull/233) - + Now xrefxcheck does not follow redirect links by default. It fails for permanent + + Now xrefcheck does not follow redirect links by default. It fails for permanent redirect responses (i.e. 301 and 308) and passes for temporary ones (i.e. 302, 303, 307). * [#231](https://github.com/serokell/xrefcheck/pull/231) + Anchor analysis takes now into account the appropriate case-sensitivity depending on the configured Markdown flavour. +* [240](https://github.com/serokell/xrefcheck/pull/240) + + Now xrefcheck is able to detect possible copy-pastes relying on links and their names. 0.2.2 ========== diff --git a/README.md b/README.md index 73d76c56..9bfd5c35 100644 --- a/README.md +++ b/README.md @@ -45,6 +45,7 @@ Comparing to alternative solutions, this tool tries to achieve the following poi * Supports external links (`http`, `https`, `ftp` and `ftps`). * Detects broken and ambiguous anchors in local links. * Integration with GitHub Actions. +* Detects possible bad copy-pastes of links. ## Dependencies [↑](#xrefcheck) @@ -148,6 +149,21 @@ There are several ways to fix this: * By default, `xrefcheck` will ignore links to localhost. * This behavior can be disabled by removing the corresponding entry from the `ignoreExternalRefsTo` list in the config file. +1. How do I disable copy-paste check for specific links? + * Add a `` annotation before the link: + ```md + + Links with bad copypaste: + [good link](https://good.link.uri/). + [copypasted link](https://good.link.uri/). + ``` + ```md + A [good link](https://good.link.uri/) + followed by an [copypasted intentionally](https://good.link.uri/). + ``` + * You can use a `` annotation to disable copy-paste check in a paragraph. + * You can use a `` annotation to disable copy-paste check within an entire file. + ## Further work [↑](#xrefcheck) - [ ] Support link detection in different languages, not only Markdown. diff --git a/src/Xrefcheck/Command.hs b/src/Xrefcheck/Command.hs index 16620a86..12ee272e 100644 --- a/src/Xrefcheck/Command.hs +++ b/src/Xrefcheck/Command.hs @@ -28,7 +28,7 @@ import Xrefcheck.Scan import Xrefcheck.Scanners.Markdown (markdownSupport) import Xrefcheck.System (askWithinCI) import Xrefcheck.Util -import Xrefcheck.Verify (reportVerifyErrs, verifyErrors, verifyRepo) +import Xrefcheck.Verify (reportCopyPasteErrors, reportVerifyErrs, verifyErrors, verifyRepo) readConfig :: FilePath -> IO Config readConfig path = fmap (normaliseConfigFilePaths . overrideConfig) do @@ -81,11 +81,12 @@ defaultAction Options{..} = do whenJust (nonEmpty $ sortBy (compare `on` seFile) scanErrs) $ reportScanErrs - verifyRes <- allowRewrite showProgressBar $ \rw -> do + (verifyRes, copyPasteErrors) <- allowRewrite showProgressBar $ \rw -> do let fullConfig = config { cNetworking = addNetworkingOptions (cNetworking config) oNetworkingOptions } verifyRepo rw fullConfig oMode oRoot repoInfo + whenJust (nonEmpty copyPasteErrors) reportCopyPasteErrors case verifyErrors verifyRes of Nothing | null scanErrs -> fmtLn "All repository links are valid." Nothing -> exitFailure diff --git a/src/Xrefcheck/Config.hs b/src/Xrefcheck/Config.hs index 1496167b..a0d2ff4b 100644 --- a/src/Xrefcheck/Config.hs +++ b/src/Xrefcheck/Config.hs @@ -73,6 +73,8 @@ data ScannersConfig' f = ScannersConfig , scAnchorSimilarityThreshold :: Field f Double -- ^ On 'anchor not found' error, how much similar anchors should be displayed as -- hint. Number should be between 0 and 1, larger value means stricter filter. + , scCopyPasteCheckEnabled :: Field f Bool + -- ^ Whether copy-paste check is enabled globally. } deriving stock (Generic) makeLensesWith postfixFields ''Config' @@ -94,6 +96,9 @@ overrideConfig config , scAnchorSimilarityThreshold = fromMaybe (scAnchorSimilarityThreshold defScanners) $ scAnchorSimilarityThreshold (cScanners config) + , scCopyPasteCheckEnabled = + fromMaybe (scCopyPasteCheckEnabled defScanners) + $ scCopyPasteCheckEnabled (cScanners config) } } where diff --git a/src/Xrefcheck/Config/Default.hs b/src/Xrefcheck/Config/Default.hs index 72bcd1a2..8550b0bd 100644 --- a/src/Xrefcheck/Config/Default.hs +++ b/src/Xrefcheck/Config/Default.hs @@ -67,6 +67,9 @@ scanners: # # This affects which anchors are generated for headers. flavor: #s{flavor} + + # Whether copy-paste check is enabled globally. + copyPasteCheckEnabled: True |] where ignoreLocalRefsFrom :: NonEmpty Text diff --git a/src/Xrefcheck/Core.hs b/src/Xrefcheck/Core.hs index 9c3ae450..18e02114 100644 --- a/src/Xrefcheck/Core.hs +++ b/src/Xrefcheck/Core.hs @@ -12,6 +12,7 @@ module Xrefcheck.Core where import Universum import Control.Lens (makeLenses) +import Control.Lens.Combinators (makeLensesWith) import Data.Aeson (FromJSON (..), withText) import Data.Char (isAlphaNum) import Data.Char qualified as C @@ -70,14 +71,17 @@ instance Given ColorMode => Buildable Position where -- | Full info about a reference. data Reference = Reference - { rName :: Text + { rName :: Text -- ^ Text displayed as reference. - , rLink :: Text + , rLink :: Text -- ^ File or site reference points to. - , rAnchor :: Maybe Text + , rAnchor :: Maybe Text -- ^ Section or custom anchor tag. - , rPos :: Position + , rPos :: Position + , rCheckCopyPaste :: Bool + -- ^ Whether to check bad copy/paste for this link } deriving stock (Show, Generic) +makeLensesWith postfixFields ''Reference -- | Context of anchor. data AnchorType @@ -102,9 +106,9 @@ data FileInfoDiff = FileInfoDiff } makeLenses ''FileInfoDiff -diffToFileInfo :: FileInfoDiff -> FileInfo -diffToFileInfo (FileInfoDiff refs anchors) = - FileInfo (DList.toList refs) (DList.toList anchors) +diffToFileInfo :: Bool -> FileInfoDiff -> FileInfo +diffToFileInfo ignoreCpcInFile (FileInfoDiff refs anchors) = + FileInfo (DList.toList refs) (DList.toList anchors) ignoreCpcInFile instance Semigroup FileInfoDiff where FileInfoDiff a b <> FileInfoDiff c d = FileInfoDiff (a <> c) (b <> d) @@ -114,13 +118,14 @@ instance Monoid FileInfoDiff where -- | All information regarding a single file we care about. data FileInfo = FileInfo - { _fiReferences :: [Reference] - , _fiAnchors :: [Anchor] + { _fiReferences :: [Reference] + , _fiAnchors :: [Anchor] + , _fiCopyPasteCheck :: Bool } deriving stock (Show, Generic) makeLenses ''FileInfo instance Default FileInfo where - def = diffToFileInfo mempty + def = diffToFileInfo True mempty data ScanPolicy = OnlyTracked diff --git a/src/Xrefcheck/Scan.hs b/src/Xrefcheck/Scan.hs index da312a0e..91270a14 100644 --- a/src/Xrefcheck/Scan.hs +++ b/src/Xrefcheck/Scan.hs @@ -117,18 +117,27 @@ data ScanErrorDescription = LinkErr | FileErr | ParagraphErr Text + | LinkErrCpc + | FileErrCpc + | ParagraphErrCpc Text | UnrecognisedErr Text deriving stock (Show, Eq) instance Buildable ScanErrorDescription where build = \case LinkErr -> [int||Expected a LINK after "ignore link" annotation|] + LinkErrCpc -> [int||Expected a LINK after "no duplication check in link" annotation|] FileErr -> [int||Annotation "ignore all" must be at the top of \ markdown or right after comments at the top|] + FileErrCpc -> [int||Annotation "no duplication check in file" must be at the top of \ + markdown or right after comments at the top|] ParagraphErr txt -> [int||Expected a PARAGRAPH after \ "ignore paragraph" annotation, but found #{txt}|] - UnrecognisedErr txt -> [int||Unrecognised option "#{txt}" perhaps you meant \ - <"ignore link"|"ignore paragraph"|"ignore all">|] + ParagraphErrCpc txt -> [int||Expected a PARAGRAPH after \ + "no duplication check in paragraph" annotation, but found #{txt}|] + UnrecognisedErr txt -> [int||Unrecognised option "#{txt}", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "?|] specificFormatsSupport :: [([Extension], ScanAction)] -> FormatsSupport specificFormatsSupport formats = \ext -> M.lookup ext formatsMap diff --git a/src/Xrefcheck/Scanners/Markdown.hs b/src/Xrefcheck/Scanners/Markdown.hs index 8a053cbf..077b38eb 100644 --- a/src/Xrefcheck/Scanners/Markdown.hs +++ b/src/Xrefcheck/Scanners/Markdown.hs @@ -54,6 +54,24 @@ instance Buildable C.Node where build (C.Node _mpos ty mSubs) = nameF (show ty) $ maybe "[]" interpolateBlockListF (nonEmpty mSubs) +data Node a = Node + { _ndPos :: Maybe PosInfo + , _ndType :: NodeType + , _ndInfo :: a + , _ndSubs :: [Node a] + } + +instance Buildable (Node a) where + build (Node _mpos ty _info mSubs) = nameF (show ty) $ + maybe "[]" interpolateBlockListF (nonEmpty mSubs) + +-- Here and below CPC stands for "copy/paste check" +type NodeCPC = Node CopyPasteCheck + +newtype CopyPasteCheck = CopyPasteCheck + { cpcShouldCheck :: Bool + } deriving stock (Show, Eq, Generic) + toPosition :: Maybe PosInfo -> Position toPosition = Position . \case Nothing -> Nothing @@ -68,7 +86,7 @@ toPosition = Position . \case |] -- | Extract text from the topmost node. -nodeExtractText :: (C.Node) -> Text +nodeExtractText :: Node info -> Text nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten where extractText = \case @@ -76,8 +94,8 @@ nodeExtractText = T.strip . mconcat . map extractText . nodeFlatten CODE t -> t _ -> "" - nodeFlatten :: (C.Node) -> [NodeType] - nodeFlatten (C.Node _pos ty subs) = ty : concatMap nodeFlatten subs + nodeFlatten :: Node info -> [NodeType] + nodeFlatten (Node _pos ty _info subs) = ty : concatMap nodeFlatten subs data IgnoreMode @@ -120,6 +138,7 @@ makeLensesFor [("_ignoreMode", "ignoreMode")] 'Ignore data GetAnnotation = IgnoreAnnotation IgnoreMode + | IgnoreCopyPasteCheck IgnoreMode | InvalidAnnotation Text deriving stock (Eq) @@ -127,6 +146,8 @@ data GetAnnotation data ScannerState = ScannerState { _ssIgnore :: Maybe Ignore + , _ssIgnoreCopyPasteCheck :: Maybe Ignore + , _ssParagraphExpectedAfterCpcAnnotation :: Bool , _ssParentNodeType :: Maybe NodeType -- ^ @cataNodeWithParentNodeInfo@ allows to get a @NodeType@ of parent node from this field } @@ -135,7 +156,9 @@ makeLenses ''ScannerState initialScannerState :: ScannerState initialScannerState = ScannerState { _ssIgnore = Nothing + , _ssIgnoreCopyPasteCheck = Nothing , _ssParentNodeType = Nothing + , _ssParagraphExpectedAfterCpcAnnotation = False } type ScannerM a = StateT ScannerState (Writer [ScanError]) a @@ -155,40 +178,49 @@ cataNodeWithParentNodeInfo f node = cataNode f' node map (ssParentNodeType .= Just ty >>) childScanners -- | Find ignore annotations (ignore paragraph and ignore link) --- and remove nodes that should be ignored. -processAnnotations :: FilePath -> C.Node -> Writer [ScanError] C.Node +-- and remove nodes that should be ignored; +-- find copy/paste check annotations (ignore for paragraph and for link) +-- and label nodes with a boolean meaning whether they should be +-- copy/paste checked. +processAnnotations :: FilePath -> C.Node -> Writer [ScanError] NodeCPC processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process where process :: Maybe PosInfo -> NodeType - -> [ScannerM C.Node] - -> ScannerM C.Node + -> [ScannerM NodeCPC] + -> ScannerM NodeCPC process pos ty subs = do let node = C.Node pos ty [] - use ssIgnore >>= \ign -> do + use ssIgnore >>= \ign -> + use ssIgnoreCopyPasteCheck >>= \ignCPC -> do -- When no `Ignore` state is set check next node for annotation, -- if found then set it as new `IgnoreMode` otherwise skip node. - let mbAnnotation = getAnnotation node - case mbAnnotation of + let shouldCheckCPC = CopyPasteCheck $ isNothing ignCPC + let traverseChildren = Node pos ty shouldCheckCPC <$> sequence subs + case getAnnotation node of Just ann -> handleAnnotation pos ty ann Nothing -> do case ty of - PARAGRAPH -> handleParagraph ign pos ty subs - LINK {} -> handleLink ign pos ty subs - IMAGE {} -> handleLink ign pos ty subs - _ -> handleOther ign pos ty subs + PARAGRAPH -> handleParagraph ign traverseChildren + LINK {} -> handleLink ign ty traverseChildren + IMAGE {} -> handleLink ign ty traverseChildren + _ -> handleOther ign ty traverseChildren handleLink :: Maybe Ignore -> - Maybe PosInfo -> NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleLink ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs - -- It can be checked that it's correct for all the cases + ScannerM NodeCPC -> + ScannerM NodeCPC + handleLink ign ty traverseChildren = do + -- It's common for all ignore states ssIgnore .= Nothing + -- If there was a copy/paste ignore annotation that expected link, + -- reset this state + resetCpcIgnoreIfLink + -- If right now there was a copy/paste ignore annotation for paragraph, + -- emit an error and reset these states. + reportExpectedParagraphAfterIgnoreCpcAnnotation ty case ign of Nothing -> traverseChildren @@ -200,73 +232,122 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process handleParagraph :: Maybe Ignore -> - Maybe PosInfo -> - NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleParagraph ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs + ScannerM NodeCPC -> + ScannerM NodeCPC + handleParagraph ign traverseChildren = do + -- If a new paragraph was expected (this stands for True), now we + -- don't expect paragraphs any more. + ssParagraphExpectedAfterCpcAnnotation .= False node <- case ign of - Nothing -> traverseChildren + Nothing -> + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren Just (Ignore IMSParagraph _) -> do ssIgnore .= Nothing pure defNode Just (Ignore (IMSLink ignoreLinkState) modePos) -> - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos $ + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren + + ssIgnoreCopyPasteCheck .= Nothing use ssIgnore >>= \case - Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> + Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> do lift $ tell $ makeError pragmaPos fp LinkErr + ssIgnore .= Nothing + _ -> pass + use ssIgnoreCopyPasteCheck >>= \case + Just (Ignore (IMSLink ExpectingLinkInParagraph) pragmaPos) -> do + lift $ tell $ makeError pragmaPos fp LinkErrCpc + ssIgnoreCopyPasteCheck .= Nothing _ -> pass + pure node handleOther :: Maybe Ignore -> - Maybe PosInfo -> NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - handleOther ign pos ty subs = do - let traverseChildren = C.Node pos ty <$> sequence subs + ScannerM NodeCPC -> + ScannerM NodeCPC + handleOther ign ty traverseChildren = do + -- If right now there was a copy/paste ignore annotation for paragraph, + -- emit an error and reset these states. + reportExpectedParagraphAfterIgnoreCpcAnnotation ty case ign of - Nothing -> traverseChildren + Nothing -> + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren Just (Ignore IMSParagraph modePos) -> do reportExpectedParagraphAfterIgnoreAnnotation modePos ty ssIgnore .= Nothing - traverseChildren - Just (Ignore (IMSLink ignoreLinkState) modePos) -> do - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren + Just (Ignore (IMSLink ignoreLinkState) modePos) -> + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos $ + wrapTraverseNodeWithLinkExpectedForCpc traverseChildren reportExpectedParagraphAfterIgnoreAnnotation :: Maybe PosInfo -> NodeType -> ScannerM () reportExpectedParagraphAfterIgnoreAnnotation modePos ty = lift . tell . makeError modePos fp . ParagraphErr $ prettyType ty - traverseNodeWithLinkExpected :: + resetCpcIgnoreIfLink :: ScannerM () + resetCpcIgnoreIfLink = do + curCpcIgnore <- use ssIgnoreCopyPasteCheck + case _ignoreMode <$> curCpcIgnore of + Just (IMSLink _) -> ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + + reportExpectedParagraphAfterIgnoreCpcAnnotation :: + NodeType -> ScannerM () + reportExpectedParagraphAfterIgnoreCpcAnnotation ty = + use ssIgnoreCopyPasteCheck >>= \case + Just (Ignore IMSParagraph modePos) -> + whenM (use ssParagraphExpectedAfterCpcAnnotation) $ do + lift . tell . makeError modePos fp . ParagraphErrCpc $ prettyType ty + ssParagraphExpectedAfterCpcAnnotation .= False + ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + + wrapTraverseNodeWithLinkExpected :: IgnoreLinkState -> Maybe PosInfo -> - Maybe PosInfo -> - NodeType -> - [ScannerM C.Node] -> - ScannerM C.Node - traverseNodeWithLinkExpected ignoreLinkState modePos pos ty subs = do - when (ignoreLinkState == ExpectingLinkInSubnodes) $ + ScannerM NodeCPC -> + ScannerM NodeCPC + wrapTraverseNodeWithLinkExpected ignoreLinkState modePos = + if ignoreLinkState /= ExpectingLinkInSubnodes + then id + else \traverse' -> do ssIgnore . _Just . ignoreMode .= IMSLink ParentExpectsLink - node' <- C.Node pos ty <$> sequence subs - when (ignoreLinkState == ExpectingLinkInSubnodes) $ do + node' <- traverse' currentIgnore <- use ssIgnore case currentIgnore of Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do lift $ tell $ makeError modePos fp LinkErr ssIgnore .= Nothing _ -> pass - return node' + return node' + + wrapTraverseNodeWithLinkExpectedForCpc :: + ScannerM NodeCPC -> + ScannerM NodeCPC + wrapTraverseNodeWithLinkExpectedForCpc traverse' = do + ignoreCpc <- use ssIgnoreCopyPasteCheck + case ignoreCpc of + Just (Ignore (IMSLink ExpectingLinkInSubnodes) modePos) -> do + ssIgnoreCopyPasteCheck . _Just . ignoreMode .= IMSLink ParentExpectsLink + node' <- traverse' + currentIgnore <- use ssIgnoreCopyPasteCheck + case currentIgnore of + Just (Ignore {_ignoreMode = IMSLink ParentExpectsLink}) -> do + lift $ tell $ makeError modePos fp LinkErrCpc + ssIgnoreCopyPasteCheck .= Nothing + _ -> pass + return node' + _ -> traverse' handleAnnotation :: Maybe PosInfo -> NodeType -> GetAnnotation - -> ScannerM C.Node + -> ScannerM NodeCPC handleAnnotation pos nodeType = \case IgnoreAnnotation mode -> do let reportIfThereWasAnnotation :: ScannerM () @@ -300,6 +381,41 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process whenJust mbIgnoreModeState $ \ignoreModeState -> (ssIgnore .= Just (Ignore ignoreModeState correctPos)) pure defNode + IgnoreCopyPasteCheck mode -> do + mbIgnoreModeState <- case mode of + IMLink -> use ssParentNodeType <&> Just . IMSLink . \case + Just PARAGRAPH -> ExpectingLinkInParagraph + _ -> ExpectingLinkInSubnodes + + IMParagraph -> do + ssParagraphExpectedAfterCpcAnnotation .= True + pure $ Just IMSParagraph + + -- We don't expect to find an `ignore all` annotation here, + -- since that annotation should be at the top of the file and + -- any correct annotations should be handled in `checkGlobalAnnotations` + -- function. + IMAll -> do + lift . tell $ makeError correctPos fp FileErrCpc + pure Nothing + + whenJust mbIgnoreModeState $ \ignoreModeState -> do + let setupNewCpcState = ssIgnoreCopyPasteCheck .= Just (Ignore ignoreModeState correctPos) + use ssIgnoreCopyPasteCheck >>= \case + Nothing -> setupNewCpcState + Just (Ignore curIgn prevPos) + | IMSLink _ <- curIgn -> do + lift $ tell $ makeError prevPos fp LinkErrCpc + setupNewCpcState + | IMSParagraph <- curIgn -> case ignoreModeState of + IMSParagraph -> do + lift . tell . makeError prevPos fp . ParagraphErrCpc $ prettyType nodeType + setupNewCpcState + -- It's OK to have link annotation when paragraph is ignored + -- because in this case all links and all annotations are ignored. + _ -> pass + pure defNode + InvalidAnnotation msg -> do lift . tell $ makeError correctPos fp $ UnrecognisedErr msg pure defNode @@ -312,8 +428,8 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process in fromMaybe "" mType withIgnoreMode - :: ScannerM C.Node - -> Writer [ScanError] C.Node + :: ScannerM (Node info) + -> Writer [ScanError] (Node info) withIgnoreMode action = action `runStateT` initialScannerState >>= \case -- We expect `Ignore` state to be `Nothing` when we reach EOF, -- otherwise that means there was an annotation that didn't match @@ -328,8 +444,8 @@ processAnnotations fp = withIgnoreMode . cataNodeWithParentNodeInfo process (node, _) -> pure node -- | Custom `foldMap` for source tree. -foldNode :: (Monoid a, Monad m) => (C.Node -> m a) -> C.Node -> m a -foldNode action node@(C.Node _ _ subs) = do +foldNode :: (Monoid a, Monad m) => (Node info -> m a) -> Node info -> m a +foldNode action node@(Node _ _ _ subs) = do a <- action node b <- concatForM subs (foldNode action) return (a <> b) @@ -342,16 +458,16 @@ nodeExtractInfo -> C.Node -> ExtractorM FileInfo nodeExtractInfo fp (C.Node nPos nTy nSubs) = do - let (ignoreFile, contentNodes) = checkGlobalAnnotations nSubs + let (ignoreFile, ignoreCpcInFile, contentNodes) = checkGlobalAnnotations nSubs if ignoreFile then return def - else diffToFileInfo <$> + else diffToFileInfo (not ignoreCpcInFile) <$> (lift (processAnnotations fp $ C.Node nPos nTy contentNodes) >>= foldNode extractor) where - extractor :: C.Node -> ExtractorM FileInfoDiff - extractor node@(C.Node pos ty _) = + extractor :: NodeCPC -> ExtractorM FileInfoDiff + extractor node@(Node pos ty info _) = case ty of HTML_BLOCK _ -> do return mempty @@ -401,15 +517,17 @@ nodeExtractInfo fp (C.Node nPos nTy nSubs) = do t : ts -> (t, Just $ T.intercalate "#" ts) [] -> error "impossible" return $ FileInfoDiff - (DList.singleton $ Reference {rName, rPos, rLink, rAnchor}) + (DList.singleton $ + Reference {rName, rPos, rLink, rAnchor, rCheckCopyPaste = cpcShouldCheck info}) DList.empty -- | Check for global annotations, ignoring simple comments if there are any. -checkGlobalAnnotations :: [C.Node] -> (Bool, [C.Node]) +checkGlobalAnnotations :: [C.Node] -> (Bool, Bool, [C.Node]) checkGlobalAnnotations nodes = do let (headerNodes, contentsNodes) = span isHeaderNode nodes ignoreFile = any isIgnoreFile headerNodes - (ignoreFile, contentsNodes) + ignoreCpcInFile = any isIgnoreCpcWithinFile headerNodes + (ignoreFile, ignoreCpcInFile, contentsNodes) where isSimpleComment :: C.Node -> Bool isSimpleComment node = do @@ -420,15 +538,20 @@ checkGlobalAnnotations nodes = do isIgnoreFile :: C.Node -> Bool isIgnoreFile = (Just (IgnoreAnnotation IMAll) ==) . getAnnotation + isIgnoreCpcWithinFile :: C.Node -> Bool + isIgnoreCpcWithinFile = (Just (IgnoreCopyPasteCheck IMAll) ==) . getAnnotation + isHeaderNode :: C.Node -> Bool isHeaderNode node = any ($ node) [ isSimpleComment , isIgnoreFile + , isIgnoreCpcWithinFile ] -defNode :: C.Node -defNode = C.Node Nothing DOCUMENT [] -- hard-coded default Node +-- | Hard-coded default Node +defNode :: NodeCPC +defNode = Node Nothing DOCUMENT (CopyPasteCheck False) [] makeError :: Maybe PosInfo @@ -473,6 +596,8 @@ textToMode :: Text -> GetAnnotation textToMode annText = case wordsList of ("ignore" : [x]) | Just ignMode <- getIgnoreMode x -> IgnoreAnnotation ignMode + ("no" : "duplication" : "check" : "in" : [x]) + | Just ignMode <- getIgnoreMode x -> IgnoreCopyPasteCheck ignMode _ -> InvalidAnnotation annText where wordsList = words annText @@ -482,6 +607,7 @@ getIgnoreMode = \case "link" -> Just IMLink "paragraph" -> Just IMParagraph "all" -> Just IMAll + "file" -> Just IMAll _ -> Nothing parseFileInfo :: MarkdownConfig -> FilePath -> LT.Text -> (FileInfo, [ScanError]) diff --git a/src/Xrefcheck/Verify.hs b/src/Xrefcheck/Verify.hs index 1ed1fdbb..6980b737 100644 --- a/src/Xrefcheck/Verify.hs +++ b/src/Xrefcheck/Verify.hs @@ -28,7 +28,10 @@ module Xrefcheck.Verify -- * URI parsing , parseUri + + -- * Reporting errors , reportVerifyErrs + , reportCopyPasteErrors ) where import Universum @@ -37,9 +40,11 @@ import Control.Concurrent.Async (Async, async, cancel, poll, wait, withAsync) import Control.Exception (AsyncException (..), throwIO) import Control.Monad.Except (MonadError (..)) import Data.ByteString qualified as BS +import Data.Char (isAlphaNum) import Data.List qualified as L import Data.Map qualified as M import Data.Reflection (Given) +import Data.Text qualified as T import Data.Text.Metrics (damerauLevenshteinNorm) import Data.Time (UTCTime, defaultTimeLocale, formatTime, readPTime, rfc822DateFormat) import Data.Time.Clock.POSIX (getPOSIXTime) @@ -255,6 +260,21 @@ instance Given ColorMode => Buildable VerifyError where #{redirectedUrl} |] +data CopyPasteCheckResult = CopyPasteCheckResult + { crFile :: FilePath, + crOriginalRef :: Reference, + crCopiedRef :: Reference + } + +instance (Given ColorMode) => Buildable CopyPasteCheckResult where + build CopyPasteCheckResult {..} = + [int|| + In file #{styleIfNeeded Faint (styleIfNeeded Bold crFile)} + #{crCopiedRef}\ + is possibly a bad copy paste of + #{crOriginalRef} + |] + reportVerifyErrs :: Given ColorMode => NonEmpty (WithReferenceLoc VerifyError) -> IO () reportVerifyErrs errs = fmt @@ -265,6 +285,17 @@ reportVerifyErrs errs = fmt Invalid references dumped, #{length errs} in total. |] +reportCopyPasteErrors + :: Given ColorMode => NonEmpty CopyPasteCheckResult -> IO () +reportCopyPasteErrors errs = fmt + [int|| + === Possible copy/paste errors === + + #{interpolateIndentF 2 (interpolateBlockListF' "➥ " build errs)} + Possible copy/paste errors dumped, #{length errs} in total. + |] + + data RetryAfter = Date UTCTime | Seconds (Time Second) deriving stock (Show, Eq) @@ -355,7 +386,7 @@ verifyRepo -> VerifyMode -> FilePath -> RepoInfo - -> IO (VerifyResult $ WithReferenceLoc VerifyError) + -> IO (VerifyResult $ WithReferenceLoc VerifyError, [CopyPasteCheckResult]) verifyRepo rw config@Config{..} @@ -363,24 +394,32 @@ verifyRepo root repoInfo'@(RepoInfo files _) = do - let toScan = do - (file, fileInfo) <- M.toList files + + let filesToScan = flip mapMaybe (M.toList files) $ \(file, fileInfo) -> do guard . not $ matchesGlobPatterns root (ecIgnoreRefsFrom cExclusions) file case fileInfo of Scanned fi -> do - ref <- _fiReferences fi - return (file, ref) - NotScannable -> empty -- No support for such file, can do nothing. - NotAddedToGit -> empty -- If this file is scannable, we've notified + Just (file, fi) + NotScannable -> Nothing -- No support for such file, can do nothing. + NotAddedToGit -> Nothing -- If this file is scannable, we've notified -- user that we are scanning only files -- added to Git while gathering RepoInfo. + toCheckCopyPaste = map (second _fiReferences) $ filter (_fiCopyPasteCheck . snd) filesToScan + toScan = concatMap (\(file, fileInfo) -> map (file,) $ _fiReferences fileInfo) filesToScan + copyPasteErrors = if scCopyPasteCheckEnabled cScanners + then [ res + | (file, refs) <- toCheckCopyPaste, + res <- checkCopyPaste file refs + ] + else [] + progressRef <- newIORef $ initVerifyProgress (map snd toScan) accumulated <- loopAsyncUntil (printer progressRef) do forConcurrentlyCaching toScan ifExternalThenCache $ \(file, ref) -> verifyReference config mode progressRef repoInfo' root file ref - case accumulated of + (, copyPasteErrors) <$> case accumulated of Right res -> return $ fold res Left (exception, partialRes) -> do -- The user has hit Ctrl+C; display any verification errors we managed to find and exit. @@ -412,6 +451,41 @@ verifyRepo ExternalLoc -> CacheUnderKey rLink _ -> NoCaching +checkCopyPaste :: FilePath -> [Reference] -> [CopyPasteCheckResult] +checkCopyPaste file refs = do + let getLinkAndAnchor x = (rLink x, rAnchor x) + groupedRefs = + L.groupBy ((==) `on` getLinkAndAnchor) $ + sortBy (compare `on` getLinkAndAnchor) $ + filter rCheckCopyPaste refs + concatMap checkGroup groupedRefs + where + checkGroup :: [Reference] -> [CopyPasteCheckResult] + checkGroup refsInGroup = do + let mergeLinkAndAnchor ref = maybe (rLink ref) (rLink ref <>) $ rAnchor ref + let refsInGroup' = flip map refsInGroup $ \ref -> + (ref, (prepareNameForCheck $ rName ref, + prepareNameForCheck $ mergeLinkAndAnchor ref)) + -- most of time this will be Nothing and we won't need `others` + let mbSubstrRef = fst <$> find (textIsLinkSubstr . snd) refsInGroup' + others = fst <$> filter (not . textIsLinkSubstr . snd) refsInGroup' + maybe [] (\substrRef -> map (CopyPasteCheckResult file substrRef) others) mbSubstrRef + + textIsLinkSubstr :: (Text, Text) -> Bool + textIsLinkSubstr (prepName, prepLink) = prepName `isSubSeq` prepLink + + prepareNameForCheck :: Text -> Text + prepareNameForCheck = T.toLower . T.filter isAlphaNum + + isSubSeq :: Text -> Text -> Bool + isSubSeq "" _str = True + isSubSeq _que "" = False + isSubSeq que str + | qhead == shead = isSubSeq qtail stail + | otherwise = isSubSeq que stail + where (qhead, qtail) = T.splitAt 1 que + (shead, stail) = T.splitAt 1 str + shouldCheckLocType :: VerifyMode -> LocationType -> Bool shouldCheckLocType mode locType | isExternal locType = shouldCheckExternal mode diff --git a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs index c905bb5c..a2cb14a6 100644 --- a/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreAnnotationsSpec.hs @@ -18,7 +18,8 @@ import Xrefcheck.Scanners.Markdown test_ignoreAnnotations :: [TestTree] test_ignoreAnnotations = - [ testGroup "Parsing failures" + [ testGroup "Parsing failures" $ + [ testGroup "Ignore annotations" [ testCase "Check if broken link annotation produce error" do let file = "tests/markdowns/with-annotations/no_link.md" errs <- getErrs file @@ -31,35 +32,71 @@ test_ignoreAnnotations = let file = "tests/markdowns/with-annotations/unexpected_ignore_file.md" errs <- getErrs file errs @?= makeError (Just $ PosInfo 9 1 9 29) file FileErr - , testCase "Check if broken unrecognised annotation produce error" do + ] + , testGroup "Ignore copypaste check annotations" + [ testCase "Check if broken copypaste link annotation produce error" do + let file = "tests/markdowns/with-annotations/no_link_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 7 1 7 48) file LinkErrCpc + , testCase "Check if broken copypaste paragraph annotation produce error" do + let file = "tests/markdowns/with-annotations/no_paragraph_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 7 1 7 53) file (ParagraphErrCpc "HEADING") + , testCase "Check if broken copypaste ignore file annotation produce error" do + let file = "tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md" + errs <- getErrs file + errs @?= makeError (Just $ PosInfo 9 1 9 47) file FileErrCpc + ] + , testCase "Check if broken unrecognised annotation produce error" do let file = "tests/markdowns/with-annotations/unrecognised_option.md" errs <- getErrs file errs @?= makeError (Just $ PosInfo 7 1 7 46) file (UnrecognisedErr "ignore unrecognised-option") - ] - , testGroup "\"ignore link\" mode" - [ testCase "Check \"ignore link\" performance" $ do - let file = "tests/markdowns/with-annotations/ignore_link.md" - (fi, errs) <- parse GitHub file - getRefs fi @?= - ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] - errs @?= makeError (Just $ PosInfo 42 1 42 31) file LinkErr - ] - , testGroup "\"ignore paragraph\" mode" - [ testCase "Check \"ignore paragraph\" performance" $ do - (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph.md" - getRefs fi @?= ["blog", "contacts"] - errs @?= [] - ] - , testGroup "\"ignore all\" mode" - [ testCase "Check \"ignore all\" performance" $ do - (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file.md" - getRefs fi @?= [] - errs @?= [] - ] + ] + , testGroup "Check ignore pragmas" $ + [ testGroup "\"ignore link\" mode" + [ testCase "Check \"ignore link\" performance" $ do + let file = "tests/markdowns/with-annotations/ignore_link.md" + (fi, errs) <- parse GitHub file + getRefs fi @?= + ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] + errs @?= makeError (Just $ PosInfo 42 1 42 31) file LinkErr + ] + , testGroup "\"ignore paragraph\" mode" + [ testCase "Check \"ignore paragraph\" performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph.md" + getRefs fi @?= ["blog", "contacts"] + errs @?= [] + ] + , testGroup "\"ignore all\" mode" + [ testCase "Check \"ignore all\" performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file.md" + getRefs fi @?= [] + errs @?= [] + ] + ] + , testGroup "Check ignore copypaste check pragmas" $ + [ testCase "Check ignore duplication check for link pragmas" $ do + let file = "tests/markdowns/with-annotations/ignore_link_cpc.md" + (fi, errs) <- parse GitHub file + getRefsWithCpc fi @?= + ["team", "team", "team", "hire-us", "how-we-work", "privacy", "link2", "link2", "link3"] + errs @?= makeError (Just $ PosInfo 42 1 42 48) file LinkErrCpc + , testCase "Check ignore copypaste check for paragraph pragmas" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_paragraph_cpc.md" + getRefsWithCpc fi @?= ["blog", "contacts"] + errs @?= [] + , testCase "Check ignore copypaste check in file performance" $ do + (fi, errs) <- parse GitHub "tests/markdowns/with-annotations/ignore_file_cpc.md" + fi ^. fiCopyPasteCheck @?= False + errs @?= [] + ] ] where getRefs :: FileInfo -> [Text] getRefs fi = map rName $ fi ^. fiReferences + getRefsWithCpc :: FileInfo -> [Text] + getRefsWithCpc fi = map rName $ filter rCheckCopyPaste $ fi ^. fiReferences + getErrs :: FilePath -> IO [ScanError] getErrs path = snd <$> parse GitHub path diff --git a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs index c3143086..31bd1d53 100644 --- a/tests/Test/Xrefcheck/IgnoreRegexSpec.hs +++ b/tests/Test/Xrefcheck/IgnoreRegexSpec.hs @@ -44,7 +44,7 @@ test_ignoreRegex = give WithoutColors $ verifyRes <- allowRewrite showProgressBar $ \rw -> verifyRepo rw config verifyMode root $ srRepoInfo scanResult - let brokenLinks = pickBrokenLinks verifyRes + let brokenLinks = pickBrokenLinks $ fst verifyRes let matchedLinks = [ "https://bad.referenc/" diff --git a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs index 8b07d490..a61ae50e 100644 --- a/tests/Test/Xrefcheck/TooManyRequestsSpec.hs +++ b/tests/Test/Xrefcheck/TooManyRequestsSpec.hs @@ -63,7 +63,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp @@ -88,7 +88,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = fromMaybe (sec 0) $ ttTimeToCompletion <$> pTaskTimestamp @@ -114,7 +114,7 @@ test_tooManyRequests = testGroup "429 response tests" } } _ <- verifyReferenceWithProgress - (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing)) + (Reference "" "http://127.0.0.1:5000/429" Nothing (Position Nothing) False) progressRef Progress{..} <- vrExternal <$> readIORef progressRef let ttc = ttTimeToCompletion <$> pTaskTimestamp diff --git a/tests/Test/Xrefcheck/UtilRequests.hs b/tests/Test/Xrefcheck/UtilRequests.hs index 8582cd8c..1285a600 100644 --- a/tests/Test/Xrefcheck/UtilRequests.hs +++ b/tests/Test/Xrefcheck/UtilRequests.hs @@ -62,7 +62,7 @@ checkLinkAndProgressWithServer mock link progress vrExpectation = verifyLink :: Text -> IO (VerifyResult VerifyError, Progress Int) verifyLink link = do - let reference = Reference "" link Nothing (Position Nothing) + let reference = Reference "" link Nothing (Position Nothing) False progRef <- newIORef $ initVerifyProgress [reference] result <- verifyReferenceWithProgress reference progRef p <- readIORef progRef diff --git a/tests/configs/github-config.yaml b/tests/configs/github-config.yaml index 35e3a088..c0698528 100644 --- a/tests/configs/github-config.yaml +++ b/tests/configs/github-config.yaml @@ -56,3 +56,6 @@ scanners: # # This affects which anchors are generated for headers. flavor: GitHub + + # Whether copy-paste check is enabled globally. + copyPasteCheckEnabled: True diff --git a/tests/golden/check-copy-paste/check-copy-paste.bats b/tests/golden/check-copy-paste/check-copy-paste.bats new file mode 100644 index 00000000..45971b8a --- /dev/null +++ b/tests/golden/check-copy-paste/check-copy-paste.bats @@ -0,0 +1,17 @@ +#!/usr/bin/env bats + +# SPDX-FileCopyrightText: 2022 Serokell +# +# SPDX-License-Identifier: MPL-2.0 + +load '../helpers/bats-support/load' +load '../helpers/bats-assert/load' +load '../helpers/bats-file/load' +load '../helpers' + + +@test "Check possible copy-paste errors and copy-paste annotations " { + to_temp xrefcheck + + assert_diff expected.gold +} diff --git a/tests/golden/check-copy-paste/expected.gold b/tests/golden/check-copy-paste/expected.gold new file mode 100644 index 00000000..1077dd7d --- /dev/null +++ b/tests/golden/check-copy-paste/expected.gold @@ -0,0 +1,72 @@ +=== Scan errors found === + + ➥ In file second-file.md + scan error at src:35:1-25: + + Unrecognised option "no dh", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "? + + ➥ In file second-file.md + scan error at src:40:1-53: + + Expected a PARAGRAPH after "no duplication check in paragraph" annotation, but found HEADING + + ➥ In file second-file.md + scan error at src:46:1-48: + + Expected a LINK after "no duplication check in link" annotation + + ➥ In file second-file.md + scan error at src:51:1-48: + + Annotation "no duplication check in file" must be at the top of markdown or right after comments at the top + +Scan errors dumped, 4 in total. +=== Possible copy/paste errors === + + ➥ In file second-file.md + reference (relative) at src:20:1-29: + - text: "Lol Kek" + - link: ./first-file.md + - anchor: - + is possibly a bad copy paste of + reference (relative) at src:10:1-34: + - text: "First file" + - link: ./first-file.md + - anchor: - + + ➥ In file second-file.md + reference (relative) at src:21:1-30: + - text: "Baz quux" + - link: ./first-file.md + - anchor: - + is possibly a bad copy paste of + reference (relative) at src:10:1-34: + - text: "First file" + - link: ./first-file.md + - anchor: - + + ➥ In file second-file.md + reference (relative) at src:31:1-29: + - text: "fdw" + - link: ./first-file.md + - anchor: chor + is possibly a bad copy paste of + reference (relative) at src:30:1-32: + - text: "ff-cho" + - link: ./first-file.md + - anchor: chor + + ➥ In file second-file.md + reference (external) at src:70:1-28: + - text: "gitlab" + - link: https://github.com + - anchor: - + is possibly a bad copy paste of + reference (external) at src:69:1-28: + - text: "github" + - link: https://github.com + - anchor: - + +Possible copy/paste errors dumped, 4 in total. diff --git a/tests/golden/check-copy-paste/first-file.md b/tests/golden/check-copy-paste/first-file.md new file mode 100644 index 00000000..2b4cf38c --- /dev/null +++ b/tests/golden/check-copy-paste/first-file.md @@ -0,0 +1,18 @@ + + + + + +[ Second - ---file- ](./second-file.md) +[ Link 2](./second-file.md) + +# heading + +# anch + +# chor diff --git a/tests/golden/check-copy-paste/second-file.md b/tests/golden/check-copy-paste/second-file.md new file mode 100644 index 00000000..43297e4a --- /dev/null +++ b/tests/golden/check-copy-paste/second-file.md @@ -0,0 +1,75 @@ + + + + +[ First file ](./first-file.md) + + + +[ Link 2](./first-file.md) + + +[ Link 3](./first-file.md#heading) + + +[ Lol Kek](./first-file.md) +[ Baz quux](./first-file.md) + + + +[ asd](./first-file.md#anch) +[ fdw](./first-file.md#anch) + + + +[ ff-cho](./first-file.md#chor) +[ fdw](./first-file.md#chor) + + + + + + + + + + +# asd + + + + + +# asd + + + + + + + +[ Link 3](./first-file.md) + + + + +hello, how are you, bye + + + +[github](https://github.com) +[gitlab](https://github.com) + + +[github](https://github.com) +[gitlab](https://github.com) + + + +[github](https://github.com) +[gitlab](https://github.com) diff --git a/tests/golden/check-scan-errors/expected.gold b/tests/golden/check-scan-errors/expected.gold index a1933ffe..ca734335 100644 --- a/tests/golden/check-scan-errors/expected.gold +++ b/tests/golden/check-scan-errors/expected.gold @@ -18,7 +18,9 @@ ➥ In file check-scan-errors.md scan error at src:21:1-50: - Unrecognised option "ignore unrecognised-annotation" perhaps you meant <"ignore link"|"ignore paragraph"|"ignore all"> + Unrecognised option "ignore unrecognised-annotation", perhaps you meant + <"ignore link"|"ignore paragraph"|"ignore all"> + or "no duplication check in "? ➥ In file check-second-file.md scan error at src:9:1-29: diff --git a/tests/markdowns/with-annotations/ignore_file_cpc.md b/tests/markdowns/with-annotations/ignore_file_cpc.md new file mode 100644 index 00000000..691eadac --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_file_cpc.md @@ -0,0 +1,19 @@ + + + + + + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + +Serokell [blog](https://serokell.io/blog) + +Serokell [labs](https://serokell.io/labs) + +Serokell [contacts](https://serokell.io/contacts) diff --git a/tests/markdowns/with-annotations/ignore_link_cpc.md b/tests/markdowns/with-annotations/ignore_link_cpc.md new file mode 100644 index 00000000..b4e6e231 --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_link_cpc.md @@ -0,0 +1,49 @@ + + +### Do not check the first link in the paragraph + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + + + +Serokell [blog](https://serokell.io/blog) + +Serokell [labs](https://serokell.io/labs) + +Serokell +[contacts](https://serokell.io/contacts) and again +[team](https://serokell.io/team) + +### Do not check not the first link in the paragraph + +[team](https://serokell.io/team) again and [projects](https://serokell.io/projects) + +Also [hire-us](https://serokell.io/hire-us) and +[fintech](https://serokell.io/fintech-development) +development + +Here are [how-we-work](https://serokell.io/how-we-work) and [privacy](https://serokell.io/privacy) +and [ml consulting](https://serokell.io/machine-learning-consulting) + + +Do not check link bug _regression test_ [link1](link1) [link2](link2) + + +Another no duplication check in link bug _some [link1](link1) emphasis_ [link2](link2) + +### Do not check pragma should be followed by + + + +This annotation expects link in paragraph right after it. + +So [link3](link3) is not checked for copypaste. + +Annotation inside paragraph allows +softbreaks and __other *things*__ in paragraph, so [link4](link4) is checked for copypaste. diff --git a/tests/markdowns/with-annotations/ignore_paragraph_cpc.md b/tests/markdowns/with-annotations/ignore_paragraph_cpc.md new file mode 100644 index 00000000..633cc96e --- /dev/null +++ b/tests/markdowns/with-annotations/ignore_paragraph_cpc.md @@ -0,0 +1,16 @@ + + + +Serokell [web-site](https://serokell.io/) +Serokell [team](https://serokell.io/team) + +Serokell [blog](https://serokell.io/blog) + + +Serokell [labs](https://serokell.io/labs) + +Serokell [contacts](https://serokell.io/contacts) diff --git a/tests/markdowns/with-annotations/no_link_cpc.md b/tests/markdowns/with-annotations/no_link_cpc.md new file mode 100644 index 00000000..e1671e52 --- /dev/null +++ b/tests/markdowns/with-annotations/no_link_cpc.md @@ -0,0 +1,8 @@ + + + +not a link diff --git a/tests/markdowns/with-annotations/no_paragraph_cpc.md b/tests/markdowns/with-annotations/no_paragraph_cpc.md new file mode 100644 index 00000000..03967d47 --- /dev/null +++ b/tests/markdowns/with-annotations/no_paragraph_cpc.md @@ -0,0 +1,9 @@ + + + + +# not a paragraph diff --git a/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md b/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md new file mode 100644 index 00000000..70981ebe --- /dev/null +++ b/tests/markdowns/with-annotations/unexpected_ignore_file_cpc.md @@ -0,0 +1,11 @@ + + +the first paragraph + + + +the second paragraph