-- | A custom parsing monad, optimized for speed.
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
module Data.ProtoLens.Encoding.Parser
    ( Parser
    , runParser
    , atEnd
    , isolate
    , getWord8
    , getWord32le
    , getBytes
    , getText
    , (<?>)
    ) where

import Data.Bits (shiftL, (.|.))
import Data.Word (Word8, Word32)
import Data.ByteString (ByteString, packCStringLen)
import qualified Data.ByteString.Unsafe as B
import Data.Text (Text)
import Data.Text.Encoding (decodeUtf8')
import Foreign.Ptr
import Foreign.Storable
import System.IO.Unsafe (unsafePerformIO)

import Data.ProtoLens.Encoding.Parser.Internal

-- | Evaluates a parser on the given input.
--
-- If the parser does not consume all of the input, the rest of the
-- input is discarded and the parser still succeeds.  Parsers may use
-- 'atEnd' to detect whether they are at the end of the input.
--
-- Values returned from actions in this monad will not hold onto the original
-- ByteString, but rather make immutable copies of subsets of its bytes.
runParser :: Parser a -> ByteString -> Either String a
runParser :: forall a. Parser a -> ByteString -> Either String a
runParser (Parser Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m) ByteString
b =
    case IO (ParseResult a) -> ParseResult a
forall a. IO a -> a
unsafePerformIO (IO (ParseResult a) -> ParseResult a)
-> IO (ParseResult a) -> ParseResult a
forall a b. (a -> b) -> a -> b
$ ByteString
-> (CStringLen -> IO (ParseResult a)) -> IO (ParseResult a)
forall a. ByteString -> (CStringLen -> IO a) -> IO a
B.unsafeUseAsCStringLen ByteString
b
            ((CStringLen -> IO (ParseResult a)) -> IO (ParseResult a))
-> (CStringLen -> IO (ParseResult a)) -> IO (ParseResult a)
forall a b. (a -> b) -> a -> b
$ \(Ptr CChar
p, Int
len) -> Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m (Ptr CChar
p Ptr CChar -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len) (Ptr CChar -> Ptr Word8
forall a b. Ptr a -> Ptr b
castPtr Ptr CChar
p) of
        ParseSuccess Ptr Word8
_ a
x -> a -> Either String a
forall a b. b -> Either a b
Right a
x
        ParseFailure String
s -> String -> Either String a
forall a b. a -> Either a b
Left String
s

-- | Returns True if there is no more input left to consume.
atEnd :: Parser Bool
atEnd :: Parser Bool
atEnd = (Ptr Word8 -> Ptr Word8 -> IO (ParseResult Bool)) -> Parser Bool
forall a.
(Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
Parser ((Ptr Word8 -> Ptr Word8 -> IO (ParseResult Bool)) -> Parser Bool)
-> (Ptr Word8 -> Ptr Word8 -> IO (ParseResult Bool)) -> Parser Bool
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
end Ptr Word8
pos -> ParseResult Bool -> IO (ParseResult Bool)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseResult Bool -> IO (ParseResult Bool))
-> ParseResult Bool -> IO (ParseResult Bool)
forall a b. (a -> b) -> a -> b
$ Ptr Word8 -> Bool -> ParseResult Bool
forall a. Ptr Word8 -> a -> ParseResult a
ParseSuccess Ptr Word8
pos (Ptr Word8
pos Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Word8
end)

-- | Parse a one-byte word.
getWord8 :: Parser Word8
getWord8 :: Parser Word8
getWord8 = Int -> String -> (Ptr Word8 -> IO Word8) -> Parser Word8
forall a. Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized Int
1 String
"getWord8: Unexpected end of input" Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek

-- | Parser a 4-byte word in little-endian order.
getWord32le :: Parser Word32
getWord32le :: Parser Word32
getWord32le = Int -> String -> (Ptr Word8 -> IO Word32) -> Parser Word32
forall a. Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized Int
4 String
"getWord32le: Unexpected end of input" ((Ptr Word8 -> IO Word32) -> Parser Word32)
-> (Ptr Word8 -> IO Word32) -> Parser Word32
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
pos -> do
    Word32
b1 <- Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Word32) -> IO Word8 -> IO Word32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
pos
    Word32
b2 <- Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Word32) -> IO Word8 -> IO Word32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word8
pos Ptr Word8 -> Int -> Ptr Word8
forall a. Ptr a -> Int -> Ptr a
`plusPtr'` Int
1)
    Word32
b3 <- Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Word32) -> IO Word8 -> IO Word32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word8
pos Ptr Word8 -> Int -> Ptr Word8
forall a. Ptr a -> Int -> Ptr a
`plusPtr'` Int
2)
    Word32
b4 <- Word8 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8 -> Word32) -> IO Word8 -> IO Word32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word8
pos Ptr Word8 -> Int -> Ptr Word8
forall a. Ptr a -> Int -> Ptr a
`plusPtr'` Int
3)
    let f :: a -> a -> a
f a
b a
b' = a
b a -> Int -> a
forall a. Bits a => a -> Int -> a
`shiftL` Int
8 a -> a -> a
forall a. Bits a => a -> a -> a
.|. a
b'
    Word32 -> IO Word32
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (Word32 -> IO Word32) -> Word32 -> IO Word32
forall a b. (a -> b) -> a -> b
$! Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
f (Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
f (Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
f Word32
b4 Word32
b3) Word32
b2) Word32
b1

-- | Parse a sequence of zero or more bytes of the given length.
--
-- The new ByteString is an immutable copy of the bytes in the input
-- and will be managed separately on the Haskell heap from the original
-- input 'ByteString'.
--
-- Fails the parse if given a negative length.
getBytes :: Int -> Parser ByteString
getBytes :: Int -> Parser ByteString
getBytes Int
n = Int -> String -> (Ptr Word8 -> IO ByteString) -> Parser ByteString
forall a. Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized Int
n String
"getBytes: Unexpected end of input"
                    ((Ptr Word8 -> IO ByteString) -> Parser ByteString)
-> (Ptr Word8 -> IO ByteString) -> Parser ByteString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
pos -> CStringLen -> IO ByteString
packCStringLen (Ptr Word8 -> Ptr CChar
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
pos, Int
n)

getText :: Int -> Parser Text
getText :: Int -> Parser Text
getText Int
n = do
  Either UnicodeException Text
r <- Int
-> String
-> (Ptr Word8 -> IO (Either UnicodeException Text))
-> Parser (Either UnicodeException Text)
forall a. Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized Int
n String
"getText: Unexpected end of input" ((Ptr Word8 -> IO (Either UnicodeException Text))
 -> Parser (Either UnicodeException Text))
-> (Ptr Word8 -> IO (Either UnicodeException Text))
-> Parser (Either UnicodeException Text)
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
pos ->
          ByteString -> Either UnicodeException Text
decodeUtf8' (ByteString -> Either UnicodeException Text)
-> IO ByteString -> IO (Either UnicodeException Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> CStringLen -> IO ByteString
B.unsafePackCStringLen (Ptr Word8 -> Ptr CChar
forall a b. Ptr a -> Ptr b
castPtr Ptr Word8
pos, Int
n)
  (UnicodeException -> Parser Text)
-> (Text -> Parser Text)
-> Either UnicodeException Text
-> Parser Text
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (String -> Parser Text
forall a. String -> Parser a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String -> Parser Text)
-> (UnicodeException -> String) -> UnicodeException -> Parser Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. UnicodeException -> String
forall a. Show a => a -> String
show) Text -> Parser Text
forall a. a -> Parser a
forall (f :: * -> *) a. Applicative f => a -> f a
pure Either UnicodeException Text
r

-- | Helper function for reading bytes from the current position and
-- advancing the pointer.
--
-- Fails the parse if given a negative length.  (GHC will elide the check
-- if the length is a nonnegative constant.)
--
-- It is only safe for @f@ to peek between its argument @p@ and
-- @p `plusPtr` (len - 1)@, inclusive.
withSized :: Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized :: forall a. Int -> String -> (Ptr Word8 -> IO a) -> Parser a
withSized Int
len String
message Ptr Word8 -> IO a
f
    | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a.
(Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
Parser ((Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a)
-> (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
end Ptr Word8
pos ->
        let pos' :: Ptr Word8
pos' = Ptr Word8
pos Ptr Word8 -> Int -> Ptr Word8
forall a. Ptr a -> Int -> Ptr a
`plusPtr'` Int
len
        in if Ptr Word8
pos' Ptr Word8 -> Ptr Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Ptr Word8
end
            then ParseResult a -> IO (ParseResult a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseResult a -> IO (ParseResult a))
-> ParseResult a -> IO (ParseResult a)
forall a b. (a -> b) -> a -> b
$ String -> ParseResult a
forall a. String -> ParseResult a
ParseFailure String
message
            else Ptr Word8 -> a -> ParseResult a
forall a. Ptr Word8 -> a -> ParseResult a
ParseSuccess Ptr Word8
pos' (a -> ParseResult a) -> IO a -> IO (ParseResult a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> IO a
f Ptr Word8
pos
    | Bool
otherwise = String -> Parser a
forall a. String -> Parser a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"withSized: negative length"
{-# INLINE withSized #-}

-- | Run the given parsing action as if there are only
-- @len@ bytes remaining.  That is, once @len@ bytes have been
-- consumed, 'atEnd' will return 'True' and other actions
-- like 'getWord8' will act like there is no input remaining.
--
-- Fails the parse if given a negative length.
isolate :: Int -> Parser a -> Parser a
isolate :: forall a. Int -> Parser a -> Parser a
isolate Int
len (Parser Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m)
    | Int
len Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a.
(Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
Parser ((Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a)
-> (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
end Ptr Word8
pos ->
        let end' :: Ptr b
end' = Ptr Word8
pos Ptr Word8 -> Int -> Ptr b
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len
        in if Ptr Word8
forall {b}. Ptr b
end' Ptr Word8 -> Ptr Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Ptr Word8
end
            then ParseResult a -> IO (ParseResult a)
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return (ParseResult a -> IO (ParseResult a))
-> ParseResult a -> IO (ParseResult a)
forall a b. (a -> b) -> a -> b
$ String -> ParseResult a
forall a. String -> ParseResult a
ParseFailure String
"isolate: unexpected end of input"
            else Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m Ptr Word8
forall {b}. Ptr b
end' Ptr Word8
pos
    | Bool
otherwise = String -> Parser a
forall a. String -> Parser a
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"isolate: negative length"

-- | If the parser fails, prepend an error message.
(<?>) :: Parser a -> String -> Parser a
Parser Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m <?> :: forall a. Parser a -> String -> Parser a
<?> String
msg = (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a.
(Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
Parser ((Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a)
-> (Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)) -> Parser a
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
end Ptr Word8
p -> ParseResult a -> ParseResult a
forall {a}. ParseResult a -> ParseResult a
wrap (ParseResult a -> ParseResult a)
-> IO (ParseResult a) -> IO (ParseResult a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Ptr Word8 -> Ptr Word8 -> IO (ParseResult a)
m Ptr Word8
end Ptr Word8
p
  where
    wrap :: ParseResult a -> ParseResult a
wrap (ParseFailure String
s) = String -> ParseResult a
forall a. String -> ParseResult a
ParseFailure (String
msg String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
": " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
s)
    wrap ParseResult a
r = ParseResult a
r

-- | Advance a pointer.  Unlike 'plusPtr', preserves the type of the input.
plusPtr' :: Ptr a -> Int -> Ptr a
plusPtr' :: forall a. Ptr a -> Int -> Ptr a
plusPtr' = Ptr a -> Int -> Ptr a
forall a b. Ptr a -> Int -> Ptr b
plusPtr