-- |
-- Module    : Replace.Megaparsec.Internal.Text
-- Copyright : ©2019 James Brock
-- License   : BSD2
-- Maintainer: James Brock <jamesbrock@gmail.com>
--
-- This internal module is for 'Data.Text.Text' specializations.
--
-- The functions in this module are intended to be chosen automatically
-- by rewrite rules in the "Replace.Megaparsec" module, so you should never
-- need to import this module.
--
-- Names in this module may change without a major version increment.

{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE TypeOperators #-}

module Replace.Megaparsec.Internal.Text
  (
    -- * Parser combinator
    sepCapText
  , anyTillText
  )
where

import Control.Monad
import qualified Data.Text as T
import Data.Text.Internal (Text(..))
import Text.Megaparsec

{-# INLINE [1] sepCapText #-}
sepCapText
    :: forall e s m a. (MonadParsec e s m, s ~ T.Text)
    => m a -- ^ The pattern matching parser @sep@
    -> m [Either (Tokens s) a]
sepCapText :: forall e s (m :: * -> *) a.
(MonadParsec e s m, s ~ Text) =>
m a -> m [Either (Tokens s) a]
sepCapText m a
sep = m Text
forall e s (m :: * -> *). MonadParsec e s m => m s
getInput m Text -> (Text -> m [Either Text a]) -> m [Either Text a]
forall a b. m a -> (a -> m b) -> m b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> m [Either Text a]
go
  where
    -- the go function will search for the first pattern match,
    -- and then capture the pattern match along with the preceding
    -- unmatched string, and then recurse.
    -- restBegin is the rest of the buffer after the last pattern
    -- match.
    go :: Text -> m [Either Text a]
go restBegin :: Text
restBegin@(Text Array
tarray Int
beginIndx Int
beginLen) = do
        m [Either Text a] -> m [Either Text a] -> m [Either Text a]
forall a. m a -> m a -> m a
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
(<|>)
            ( do
                (Text _ _ thisLen) <- m Text
forall e s (m :: * -> *). MonadParsec e s m => m s
getInput
                -- About 'thisiter':
                -- It looks stupid and introduces a completely unnecessary
                -- Maybe, but when I refactor to eliminate 'thisiter' and
                -- the Maybe then the benchmarks get dramatically worse.
                thisiter <- (<|>)
                    ( do
                        x <- try sep
                        restAfter@(Text _ _ afterLen) <- getInput
                        -- Don't allow a match of a zero-width pattern
                        when (afterLen >= thisLen) empty
                        pure $ Just (x, restAfter)
                    )
                    (anySingle >> pure Nothing)
                case thisiter of
                    (Just (a
x, Text
restAfter)) | Int
thisLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
beginLen -> do
                        -- we've got a match with some preceding unmatched string
                        let unmatched :: Text
unmatched = Array -> Int -> Int -> Text
Text Array
tarray Int
beginIndx (Int
beginLen Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
thisLen)
                        (Text -> Either Text a
forall a b. a -> Either a b
Left Text
unmatchedEither Text a -> [Either Text a] -> [Either Text a]
forall a. a -> [a] -> [a]
:) ([Either Text a] -> [Either Text a])
-> ([Either Text a] -> [Either Text a])
-> [Either Text a]
-> [Either Text a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (a -> Either Text a
forall a b. b -> Either a b
Right a
xEither Text a -> [Either Text a] -> [Either Text a]
forall a. a -> [a] -> [a]
:) ([Either Text a] -> [Either Text a])
-> m [Either Text a] -> m [Either Text a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> m [Either Text a]
go Text
restAfter
                    (Just (a
x, Text
restAfter)) -> do
                        -- we're got a match with no preceding unmatched string
                        (a -> Either Text a
forall a b. b -> Either a b
Right a
xEither Text a -> [Either Text a] -> [Either Text a]
forall a. a -> [a] -> [a]
:) ([Either Text a] -> [Either Text a])
-> m [Either Text a] -> m [Either Text a]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Text -> m [Either Text a]
go Text
restAfter
                    Maybe (a, Text)
Nothing -> Text -> m [Either Text a]
go Text
restBegin -- no match, try again
            )
            ( do
                    -- We're at the end of the input, so return
                    -- whatever unmatched string we've got since offsetBegin
                if Int
beginLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 then
                    [Either Text a] -> m [Either Text a]
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure [Text -> Either Text a
forall a b. a -> Either a b
Left Text
restBegin]
                else [Either Text a] -> m [Either Text a]
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure []
            )

{-# INLINE [1] anyTillText #-}
anyTillText
    :: forall e s m a. (MonadParsec e s m, s ~ T.Text)
    => m a -- ^ The pattern matching parser @sep@
    -> m (Tokens s, a)
anyTillText :: forall e s (m :: * -> *) a.
(MonadParsec e s m, s ~ Text) =>
m a -> m (Tokens s, a)
anyTillText m a
sep = do
    (Text tarray beginIndx beginLen) <- m Text
forall e s (m :: * -> *). MonadParsec e s m => m s
getInput
    (thisLen, x) <- go
    pure (Text tarray beginIndx (beginLen - thisLen), x)
  where
    go :: m (Int, a)
go = do
      (Text _ _ thisLen) <- m Text
forall e s (m :: * -> *). MonadParsec e s m => m s
getInput
      r <- optional $ try sep
      case r of
        Maybe a
Nothing -> m (Token Text)
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
anySingle m (Token Text) -> m (Int, a) -> m (Int, a)
forall a b. m a -> m b -> m b
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> m (Int, a)
go
        Just a
x -> (Int, a) -> m (Int, a)
forall a. a -> m a
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Int
thisLen, a
x)