From 34fe8d1e0cd2d34bd5550ec18c0f3c3081174e73 Mon Sep 17 00:00:00 2001 From: Ivan Yudin Date: Thu, 28 Feb 2019 11:35:27 +0000 Subject: [PATCH] Add error messages for incomplete parsing As result of applying > runParser . prs to a boomerang > b :: Boomerang e tok a b and initial position > initalPosition (Nothing :: Maybe e) one gets a list consisting of three type of elements: - those that resulted from errors encoded in primitive parsers of the form > Left e where e contains error message and position where the error occur; - those that produce complete successful parsing of the form > Right ((f,tok), pos) where tok is empty in an appropriate sense; - and those where the parsing stopped before the full input whose exhausted of the form > Right ((f,tok), pos) with non-empty tok and pos been the first place starting from which no parsing was made. In the previous version the elements of last type were silently thrown away, which created incomplete error messages. For example > parseString Text.Boomerang.String.alpha "a(" was given the message This patch fixes the above issue by explicitly handling the results of parsing of the third kind in the function > parse1 This led to the following change of API: - the result of the function "parse" now includes an extra field that contains the position of the first non-parsed element; - the class ErrorPosition got an extra method "setPosition" that permits to add position to the messages generated in "parse1" for incomplete parses; - the instance of "ErrorPosition (ParserError p)" is changed according to the new interface of the class ErrorPosition. --- Text/Boomerang/Error.hs | 1 + Text/Boomerang/Pos.hs | 1 + Text/Boomerang/Prim.hs | 34 +++++++++++++++++++++++++--------- 3 files changed, 27 insertions(+), 9 deletions(-) diff --git a/Text/Boomerang/Error.hs b/Text/Boomerang/Error.hs index d65fd34..fb6848c 100644 --- a/Text/Boomerang/Error.hs +++ b/Text/Boomerang/Error.hs @@ -33,6 +33,7 @@ type instance Pos (ParserError p) = p instance ErrorPosition (ParserError p) where getPosition (ParserError mPos _) = mPos + setPosition mPos (ParserError _ msg) = ParserError (Just mPos) msg {- instance ErrorList ParserError where diff --git a/Text/Boomerang/Pos.hs b/Text/Boomerang/Pos.hs index 3f2cfd3..83134d3 100644 --- a/Text/Boomerang/Pos.hs +++ b/Text/Boomerang/Pos.hs @@ -16,6 +16,7 @@ type family Pos err :: * -- | extract the position information from an error class ErrorPosition err where getPosition :: err -> Maybe (Pos err) + setPosition :: Pos err -> err -> err -- | the initial position for a position type class InitialPosition e where diff --git a/Text/Boomerang/Prim.hs b/Text/Boomerang/Prim.hs index 90f424f..a8e8d0f 100644 --- a/Text/Boomerang/Prim.hs +++ b/Text/Boomerang/Prim.hs @@ -18,6 +18,7 @@ import Control.Monad (MonadPlus(mzero, mplus), ap) import Control.Monad.Error (Error(..)) import Data.Either (partitionEithers) import Data.Function (on) +import Data.List (partition) import Data.Monoid (Monoid(mappend, mempty)) import qualified Data.Semigroup as SG import Text.Boomerang.HStack ((:-)(..), hdMap, hdTraverse) @@ -159,19 +160,34 @@ val rs ss = Boomerang rs' ss' ss' = (\(a :- r) -> map (\f -> (f, r)) (ss a)) -- | Give all possible parses or errors. -parse :: forall e a p tok. (InitialPosition e) => Boomerang e tok () a -> tok -> [Either e (a, tok)] +parse :: forall e a p tok. (InitialPosition e) => Boomerang e tok () a -> tok -> [Either e (a, tok, Pos e)] parse p s = - map (either Left (\((f, tok), _) -> Right (f (), tok))) $ runParser (prs p) s (initialPos (Nothing :: Maybe e)) + map (either Left (\((f, tok), pos) -> Right (f (), tok, pos))) $ runParser (prs p) s (initialPos (Nothing :: Maybe e)) -- | Give the first parse, for Boomerangs with a parser that yields just one value. -- Otherwise return the error (or errors) with the highest error position. -parse1 :: (ErrorPosition e, InitialPosition e, Show e, Ord (Pos e)) => - (tok -> Bool) -> Boomerang e tok () (a :- ()) -> tok -> Either [e] a -parse1 isComplete r paths = - let results = parse r paths - in case [ a | (Right (a,tok)) <- results, isComplete tok ] of - ((u :- ()):_) -> Right u - _ -> Left $ bestErrors [ e | Left e <- results ] +parse1 :: (Error e, ErrorPosition e, InitialPosition e, Show e, Ord (Pos e), Show tok ) => (tok -> Bool) -> Boomerang e tok () (a :- ()) -> tok -> Either [e] a +parse1 isComplete p s = + case partition (either (const False) (\(_,tok,_) -> isComplete tok )) $ + parse p s of + ( [], badParsings ) -> bestMsgs Nothing [] badParsings + ( Right ( u :- (), _, _ ):_, _) -> Right u + where + bestMsgs _ errMsgs [] = Left errMsgs + bestMsgs errPos errMsgs (x:xs) = case x of + Right ( u :- () ,tok, pos) + | justPos > errPos -> bestMsgs justPos (newErrMsg :[]) xs + | otherwise -> bestMsgs errPos errMsgs xs + where + justPos = Just pos + newErrMsg = setPosition pos $ strMsg $ + "no parse starting at " ++ (take 10 $ show tok) + Left e -> let mPos = getPosition e in + case compare mPos errPos of + GT -> bestMsgs mPos [e] xs + EQ -> bestMsgs mPos (e:errMsgs) xs + LT -> bestMsgs errPos errMsgs xs + -- | Give all possible serializations. unparse :: tok -> Boomerang e tok () url -> url -> [tok]