{-# LANGUAGE BangPatterns        #-}
{-# LANGUAGE DefaultSignatures   #-}
{-# LANGUAGE FlexibleContexts    #-}
{-# LANGUAGE FlexibleInstances   #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies        #-}

{-|
Module      : Hectoparsec.Stream
Copyright   : (c) comp 2020
License     : MIT
Maintainer  : onecomputer00@gmail.com
Stability   : stable
Portability : portable

Typeclass for manipulating input streams.

By default, textual input streams are supported by Hectoparsec. Custom streams, like token lists or lexers can also
made into a @'Stream'@.

@
import Data.Bifunctor (second)
import Data.List (uncons)

data Tok = Tok
    { tokSpan  :: ('Pos', 'Pos') -- Start and end span of the token.
    , tokValue :: String
    }

newtype TokStream = TokStream [Tok]

instance 'Stream' TokStream where
    type 'Token' TokStream = Tok
    type 'Chunk' TokStream = [Tok]

    'streamUncons' (TokStream xs) = second TokStream \<$> uncons xs
    'updatePosToken' _ tok _ = snd (tokSpan tok)
@
-}
module Hectoparsec.Stream
    ( -- * Stream typeclass
      Stream(..)
    ) where

import           Data.Bifunctor
import qualified Data.ByteString as B
import qualified Data.ByteString.Lazy as BL
import           Data.Coerce
import           Data.List (foldl', uncons)
import           Data.Proxy
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import           Data.Word
import           Hectoparsec.Pos

{-|
A 'Stream' represents an input stream. These streams can be acted on one token at a time, or with multiple tokens at
a time in a chunk. Each token should represent a span of text (possibly just a character) in the source code.
-}
class Stream s where
    -- | The token type. This is a single element in your stream.
    type Token s

    -- | The type of chunks. This is a sequence of tokens in your stream.
    type Chunk s

    -- | Take the next token out of the stream. If the stream is empty, return 'Nothing'.
    streamUncons :: s -> Maybe (Token s, s)

    {-|
    Take at most the next /n/ tokens out of the stream. If /n/ is negative, return an empty chunk.

    By default, this repeatedly calls 'streamUncons', which may be ineffecient.
    -}
    streamSplitAt :: Int -> s -> (Chunk s, s)
    streamSplitAt Int
n s
xs = ([Token s] -> Chunk s) -> ([Token s], s) -> (Chunk s, s)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (Proxy s -> [Token s] -> Chunk s
forall s (proxy :: * -> *).
Stream s =>
proxy s -> [Token s] -> Chunk s
tokensToChunk Proxy s
proxy) (([Token s], s) -> (Chunk s, s)) -> ([Token s], s) -> (Chunk s, s)
forall a b. (a -> b) -> a -> b
$ Int -> s -> ([Token s], s)
forall {t} {t}.
(Ord t, Num t, Stream t) =>
t -> t -> ([Token t], t)
go Int
n s
xs
        where
            go :: t -> t -> ([Token t], t)
go !t
m t
z | t
m t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 = ([], t
z)
            go !t
m t
z = case t -> Maybe (Token t, t)
forall s. Stream s => s -> Maybe (Token s, s)
streamUncons t
z of
                    Maybe (Token t, t)
Nothing -> ([], t
z)
                    Just (Token t
x, t
z') -> ([Token t] -> [Token t]) -> ([Token t], t) -> ([Token t], t)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (Token t
x Token t -> [Token t] -> [Token t]
forall a. a -> [a] -> [a]
:) (([Token t], t) -> ([Token t], t))
-> ([Token t], t) -> ([Token t], t)
forall a b. (a -> b) -> a -> b
$ t -> t -> ([Token t], t)
go (t
m t -> t -> t
forall a. Num a => a -> a -> a
- t
1) t
z'

            proxy :: Proxy s
proxy = Proxy s
forall {k} (t :: k). Proxy t
Proxy :: Proxy s

    {-|
    Take tokens as long as a predicate holds.

    By default, this repeatedly calls 'streamUncons', which may be ineffecient.
    -}
    streamSpan :: (Token s -> Bool) -> s -> (Chunk s, s)
    streamSpan Token s -> Bool
p s
xs = ([Token s] -> Chunk s) -> ([Token s], s) -> (Chunk s, s)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (Proxy s -> [Token s] -> Chunk s
forall s (proxy :: * -> *).
Stream s =>
proxy s -> [Token s] -> Chunk s
tokensToChunk Proxy s
proxy) (([Token s], s) -> (Chunk s, s)) -> ([Token s], s) -> (Chunk s, s)
forall a b. (a -> b) -> a -> b
$ s -> ([Token s], s)
go s
xs
        where
            go :: s -> ([Token s], s)
go s
z = case s -> Maybe (Token s, s)
forall s. Stream s => s -> Maybe (Token s, s)
streamUncons s
z of
                Just (Token s
x, s
z') | Token s -> Bool
p Token s
x -> ([Token s] -> [Token s]) -> ([Token s], s) -> ([Token s], s)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (Token s
x Token s -> [Token s] -> [Token s]
forall a. a -> [a] -> [a]
:) (([Token s], s) -> ([Token s], s))
-> ([Token s], s) -> ([Token s], s)
forall a b. (a -> b) -> a -> b
$ s -> ([Token s], s)
go s
z'
                Maybe (Token s, s)
_ -> ([], s
z)

            proxy :: Proxy s
proxy = Proxy s
forall {k} (t :: k). Proxy t
Proxy :: Proxy s

    {-|
    Converts a chunk to tokens. This should be an isomorphism with 'tokensToChunk'.

    The default implementation is available if the chunk and tokens are coercible.
    -}
    chunkToTokens :: proxy s -> Chunk s -> [Token s]

    default chunkToTokens :: Coercible (Chunk s) [Token s] => proxy s -> Chunk s -> [Token s]
    chunkToTokens proxy s
_ Chunk s
x = Chunk s -> [Token s]
coerce Chunk s
x

    {-|
    Converts tokens to a chunk. This should be an isomorphism with 'chunkToTokens'.

    The default implementation is available if the chunk and tokens are coercible.
    -}
    tokensToChunk :: proxy s -> [Token s] -> Chunk s

    default tokensToChunk :: Coercible (Chunk s) [Token s] => proxy s -> [Token s] -> Chunk s
    tokensToChunk proxy s
_ [Token s]
x = [Token s] -> Chunk s
coerce [Token s]
x

    {-|
    Gets the length of a chunk.

    By default, this converts the chunk to tokens, which may be inefficient.

    @chunkLength proxy xs = length ('chunkToTokens' proxy xs)@
    -}
    chunkLength :: proxy s -> Chunk s -> Int
    chunkLength proxy s
proxy Chunk s
xs = [Token s] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (proxy s -> Chunk s -> [Token s]
forall s (proxy :: * -> *).
Stream s =>
proxy s -> Chunk s -> [Token s]
chunkToTokens proxy s
proxy Chunk s
xs)
    {-# INLINE chunkLength #-}

    {-|
    Checks whether a chunk is empty.

    By default, this converts the chunk to tokens, which may be inefficient.

    @chunkNull proxy xs = null ('chunkToTokens' proxy xs)@
    -}
    chunkNull :: proxy s -> Chunk s -> Bool
    chunkNull proxy s
proxy Chunk s
xs = [Token s] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null (proxy s -> Chunk s -> [Token s]
forall s (proxy :: * -> *).
Stream s =>
proxy s -> Chunk s -> [Token s]
chunkToTokens proxy s
proxy Chunk s
xs)
    {-# INLINE chunkNull #-}

    {-|
    Performs a fold over a chunk.

    By default, this converts the chunk to tokens then folds over the list. There might be a better performing
    function for your custom stream type.
    -}
    foldChunk :: proxy s -> (b -> Token s -> b) -> b -> Chunk s -> b
    foldChunk proxy s
proxy b -> Token s -> b
f b
z Chunk s
xs = (b -> Token s -> b) -> b -> [Token s] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' b -> Token s -> b
f b
z (proxy s -> Chunk s -> [Token s]
forall s (proxy :: * -> *).
Stream s =>
proxy s -> Chunk s -> [Token s]
chunkToTokens proxy s
proxy Chunk s
xs)
    {-# INLINE foldChunk #-}

    -- | Increments position according to a token.
    updatePosToken :: proxy s -> Token s -> Pos -> Pos

    -- | Increments position according to a chunk.
    updatePosChunk :: proxy s -> Chunk s -> Pos -> Pos
    updatePosChunk proxy s
proxy Chunk s
xs Pos
p = proxy s -> (Pos -> Token s -> Pos) -> Pos -> Chunk s -> Pos
forall s (proxy :: * -> *) b.
Stream s =>
proxy s -> (b -> Token s -> b) -> b -> Chunk s -> b
foldChunk proxy s
proxy ((Token s -> Pos -> Pos) -> Pos -> Token s -> Pos
forall a b c. (a -> b -> c) -> b -> a -> c
flip ((Token s -> Pos -> Pos) -> Pos -> Token s -> Pos)
-> (Token s -> Pos -> Pos) -> Pos -> Token s -> Pos
forall a b. (a -> b) -> a -> b
$ proxy s -> Token s -> Pos -> Pos
forall s (proxy :: * -> *).
Stream s =>
proxy s -> Token s -> Pos -> Pos
updatePosToken proxy s
proxy) Pos
p Chunk s
xs
    {-# INLINE updatePosChunk #-}

    {-# MINIMAL streamUncons, updatePosToken #-}

instance Stream String where
    type Token String = Char
    type Chunk String = String

    streamUncons :: String -> Maybe (Token String, String)
streamUncons String
xs = String -> Maybe (Char, String)
forall a. [a] -> Maybe (a, [a])
uncons String
xs
    streamSplitAt :: Int -> String -> (Chunk String, String)
streamSplitAt Int
n String
xs = Int -> String -> (String, String)
forall a. Int -> [a] -> ([a], [a])
splitAt Int
n String
xs
    streamSpan :: (Token String -> Bool) -> String -> (Chunk String, String)
streamSpan Token String -> Bool
p String
xs = (Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
Token String -> Bool
p String
xs
    chunkToTokens :: forall (proxy :: * -> *).
proxy String -> Chunk String -> [Token String]
chunkToTokens proxy String
_ Chunk String
xs = [Token String]
Chunk String
xs
    tokensToChunk :: forall (proxy :: * -> *).
proxy String -> [Token String] -> Chunk String
tokensToChunk proxy String
_ [Token String]
xs = [Token String]
Chunk String
xs
    chunkLength :: forall (proxy :: * -> *). proxy String -> Chunk String -> Int
chunkLength proxy String
_ Chunk String
xs = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
Chunk String
xs
    chunkNull :: forall (proxy :: * -> *). proxy String -> Chunk String -> Bool
chunkNull proxy String
_ Chunk String
xs = String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null String
Chunk String
xs
    foldChunk :: forall (proxy :: * -> *) b.
proxy String -> (b -> Token String -> b) -> b -> Chunk String -> b
foldChunk proxy String
_ b -> Token String -> b
f b
z Chunk String
xs = (b -> Char -> b) -> b -> String -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' b -> Char -> b
b -> Token String -> b
f b
z String
Chunk String
xs
    updatePosToken :: forall (proxy :: * -> *).
proxy String -> Token String -> Pos -> Pos
updatePosToken proxy String
_ Token String
t Pos
p = (Char -> Bool) -> Char -> Pos -> Pos
forall a. (a -> Bool) -> a -> Pos -> Pos
updatePos (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Char
Token String
t Pos
p

instance Stream T.Text where
    type Token T.Text = Char
    type Chunk T.Text = T.Text

    streamUncons :: Text -> Maybe (Token Text, Text)
streamUncons Text
xs = Text -> Maybe (Char, Text)
T.uncons Text
xs
    streamSplitAt :: Int -> Text -> (Chunk Text, Text)
streamSplitAt Int
n Text
xs = Int -> Text -> (Text, Text)
T.splitAt Int
n Text
xs
    streamSpan :: (Token Text -> Bool) -> Text -> (Chunk Text, Text)
streamSpan Token Text -> Bool
p Text
xs = (Char -> Bool) -> Text -> (Text, Text)
T.span Char -> Bool
Token Text -> Bool
p Text
xs
    chunkToTokens :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> [Token Text]
chunkToTokens proxy Text
_ Chunk Text
xs = Text -> String
T.unpack Text
Chunk Text
xs
    tokensToChunk :: forall (proxy :: * -> *). proxy Text -> [Token Text] -> Chunk Text
tokensToChunk proxy Text
_ [Token Text]
xs = String -> Text
T.pack String
[Token Text]
xs
    chunkLength :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> Int
chunkLength proxy Text
_ Chunk Text
xs = Text -> Int
T.length Text
Chunk Text
xs
    chunkNull :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> Bool
chunkNull proxy Text
_ Chunk Text
xs = Text -> Bool
T.null Text
Chunk Text
xs
    foldChunk :: forall (proxy :: * -> *) b.
proxy Text -> (b -> Token Text -> b) -> b -> Chunk Text -> b
foldChunk proxy Text
_ b -> Token Text -> b
f b
z Chunk Text
xs = (b -> Char -> b) -> b -> Text -> b
forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' b -> Char -> b
b -> Token Text -> b
f b
z Text
Chunk Text
xs
    updatePosToken :: forall (proxy :: * -> *). proxy Text -> Token Text -> Pos -> Pos
updatePosToken proxy Text
_ Token Text
t Pos
p = (Char -> Bool) -> Char -> Pos -> Pos
forall a. (a -> Bool) -> a -> Pos -> Pos
updatePos (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Char
Token Text
t Pos
p

instance Stream TL.Text where
    type Token TL.Text = Char
    type Chunk TL.Text = TL.Text

    streamUncons :: Text -> Maybe (Token Text, Text)
streamUncons Text
xs = Text -> Maybe (Char, Text)
TL.uncons Text
xs
    streamSplitAt :: Int -> Text -> (Chunk Text, Text)
streamSplitAt Int
n Text
xs = Int64 -> Text -> (Text, Text)
TL.splitAt (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) Text
xs
    streamSpan :: (Token Text -> Bool) -> Text -> (Chunk Text, Text)
streamSpan Token Text -> Bool
p Text
xs = (Char -> Bool) -> Text -> (Text, Text)
TL.span Char -> Bool
Token Text -> Bool
p Text
xs
    chunkToTokens :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> [Token Text]
chunkToTokens proxy Text
_ Chunk Text
xs = Text -> String
TL.unpack Text
Chunk Text
xs
    tokensToChunk :: forall (proxy :: * -> *). proxy Text -> [Token Text] -> Chunk Text
tokensToChunk proxy Text
_ [Token Text]
xs = String -> Text
TL.pack String
[Token Text]
xs
    chunkLength :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> Int
chunkLength proxy Text
_ Chunk Text
xs = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ Text -> Int64
TL.length Text
Chunk Text
xs
    chunkNull :: forall (proxy :: * -> *). proxy Text -> Chunk Text -> Bool
chunkNull proxy Text
_ Chunk Text
xs = Text -> Bool
TL.null Text
Chunk Text
xs
    foldChunk :: forall (proxy :: * -> *) b.
proxy Text -> (b -> Token Text -> b) -> b -> Chunk Text -> b
foldChunk proxy Text
_ b -> Token Text -> b
f b
z Chunk Text
xs = (b -> Char -> b) -> b -> Text -> b
forall a. (a -> Char -> a) -> a -> Text -> a
TL.foldl' b -> Char -> b
b -> Token Text -> b
f b
z Text
Chunk Text
xs
    updatePosToken :: forall (proxy :: * -> *). proxy Text -> Token Text -> Pos -> Pos
updatePosToken proxy Text
_ Token Text
t Pos
p = (Char -> Bool) -> Char -> Pos -> Pos
forall a. (a -> Bool) -> a -> Pos -> Pos
updatePos (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\n') Char
Token Text
t Pos
p

instance Stream B.ByteString where
    type Token B.ByteString = Word8
    type Chunk B.ByteString = B.ByteString

    streamUncons :: ByteString -> Maybe (Token ByteString, ByteString)
streamUncons ByteString
xs = ByteString -> Maybe (Word8, ByteString)
B.uncons ByteString
xs
    streamSplitAt :: Int -> ByteString -> (Chunk ByteString, ByteString)
streamSplitAt Int
n ByteString
xs = Int -> ByteString -> (ByteString, ByteString)
B.splitAt Int
n ByteString
xs
    streamSpan :: (Token ByteString -> Bool)
-> ByteString -> (Chunk ByteString, ByteString)
streamSpan Token ByteString -> Bool
p ByteString
xs = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
B.span Word8 -> Bool
Token ByteString -> Bool
p ByteString
xs
    chunkToTokens :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> [Token ByteString]
chunkToTokens proxy ByteString
_ Chunk ByteString
xs = ByteString -> [Word8]
B.unpack ByteString
Chunk ByteString
xs
    tokensToChunk :: forall (proxy :: * -> *).
proxy ByteString -> [Token ByteString] -> Chunk ByteString
tokensToChunk proxy ByteString
_ [Token ByteString]
xs = [Word8] -> ByteString
B.pack [Word8]
[Token ByteString]
xs
    chunkLength :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> Int
chunkLength proxy ByteString
_ Chunk ByteString
xs = ByteString -> Int
B.length ByteString
Chunk ByteString
xs
    chunkNull :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> Bool
chunkNull proxy ByteString
_ Chunk ByteString
xs = ByteString -> Bool
B.null ByteString
Chunk ByteString
xs
    foldChunk :: forall (proxy :: * -> *) b.
proxy ByteString
-> (b -> Token ByteString -> b) -> b -> Chunk ByteString -> b
foldChunk proxy ByteString
_ b -> Token ByteString -> b
f b
z Chunk ByteString
xs = (b -> Word8 -> b) -> b -> ByteString -> b
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
B.foldl' b -> Word8 -> b
b -> Token ByteString -> b
f b
z ByteString
Chunk ByteString
xs
    updatePosToken :: forall (proxy :: * -> *).
proxy ByteString -> Token ByteString -> Pos -> Pos
updatePosToken proxy ByteString
_ Token ByteString
t Pos
p = (Word8 -> Bool) -> Word8 -> Pos -> Pos
forall a. (a -> Bool) -> a -> Pos -> Pos
updatePos (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
10) Word8
Token ByteString
t Pos
p

instance Stream BL.ByteString where
    type Token BL.ByteString = Word8
    type Chunk BL.ByteString = BL.ByteString

    streamUncons :: ByteString -> Maybe (Token ByteString, ByteString)
streamUncons ByteString
xs = ByteString -> Maybe (Word8, ByteString)
BL.uncons ByteString
xs
    streamSplitAt :: Int -> ByteString -> (Chunk ByteString, ByteString)
streamSplitAt Int
n ByteString
xs = Int64 -> ByteString -> (ByteString, ByteString)
BL.splitAt (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) ByteString
xs
    streamSpan :: (Token ByteString -> Bool)
-> ByteString -> (Chunk ByteString, ByteString)
streamSpan Token ByteString -> Bool
p ByteString
xs = (Word8 -> Bool) -> ByteString -> (ByteString, ByteString)
BL.span Word8 -> Bool
Token ByteString -> Bool
p ByteString
xs
    chunkToTokens :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> [Token ByteString]
chunkToTokens proxy ByteString
_ Chunk ByteString
xs = ByteString -> [Word8]
BL.unpack ByteString
Chunk ByteString
xs
    tokensToChunk :: forall (proxy :: * -> *).
proxy ByteString -> [Token ByteString] -> Chunk ByteString
tokensToChunk proxy ByteString
_ [Token ByteString]
xs = [Word8] -> ByteString
BL.pack [Word8]
[Token ByteString]
xs
    chunkLength :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> Int
chunkLength proxy ByteString
_ Chunk ByteString
xs = Int64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int64 -> Int) -> Int64 -> Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int64
BL.length ByteString
Chunk ByteString
xs
    chunkNull :: forall (proxy :: * -> *).
proxy ByteString -> Chunk ByteString -> Bool
chunkNull proxy ByteString
_ Chunk ByteString
xs = ByteString -> Bool
BL.null ByteString
Chunk ByteString
xs
    foldChunk :: forall (proxy :: * -> *) b.
proxy ByteString
-> (b -> Token ByteString -> b) -> b -> Chunk ByteString -> b
foldChunk proxy ByteString
_ b -> Token ByteString -> b
f b
z Chunk ByteString
xs = (b -> Word8 -> b) -> b -> ByteString -> b
forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BL.foldl' b -> Word8 -> b
b -> Token ByteString -> b
f b
z ByteString
Chunk ByteString
xs
    updatePosToken :: forall (proxy :: * -> *).
proxy ByteString -> Token ByteString -> Pos -> Pos
updatePosToken proxy ByteString
_ Token ByteString
t Pos
p = (Word8 -> Bool) -> Word8 -> Pos -> Pos
forall a. (a -> Bool) -> a -> Pos -> Pos
updatePos (Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
10) Word8
Token ByteString
t Pos
p