{-# LANGUAGE Trustworthy, BangPatterns #-}
{-# LANGUAGE CPP, NoImplicitPrelude #-}
{-# OPTIONS_GHC -funbox-strict-fields #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.IO.Buffer
-- Copyright   :  (c) The University of Glasgow 2008
-- License     :  see libraries/base/LICENSE
--
-- Maintainer  :  [email protected]
-- Stability   :  internal
-- Portability :  non-portable (GHC Extensions)
--
-- Buffers used in the IO system
--
-----------------------------------------------------------------------------

module GHC.IO.Buffer (
    -- * Buffers of any element
    Buffer(..), BufferState(..), CharBuffer, CharBufElem,

    -- ** Creation
    newByteBuffer,
    newCharBuffer,
    newBuffer,
    emptyBuffer,

    -- ** Insertion/removal
    bufferRemove,
    bufferAdd,
    slideContents,
    bufferAdjustL,
    bufferAddOffset,
    bufferAdjustOffset,

    -- ** Inspecting
    isEmptyBuffer,
    isFullBuffer,
    isFullCharBuffer,
    isWriteBuffer,
    bufferElems,
    bufferAvailable,
    bufferOffset,
    summaryBuffer,

    -- ** Operating on the raw buffer as a Ptr
    withBuffer,
    withRawBuffer,

    -- ** Assertions
    checkBuffer,

    -- * Raw buffers
    RawBuffer,
    readWord8Buf,
    writeWord8Buf,
    RawCharBuffer,
    peekCharBuf,
    readCharBuf,
    writeCharBuf,
    readCharBufPtr,
    writeCharBufPtr,
    charSize,
 ) where

import GHC.Base
-- import GHC.IO
import GHC.Num
import GHC.Ptr
import GHC.Word
import GHC.Show
import GHC.Real
import GHC.List
import GHC.ForeignPtr  (unsafeWithForeignPtr)
import Foreign.C.Types
import Foreign.ForeignPtr
import Foreign.Storable

-- Char buffers use either UTF-16 or UTF-32, with the endianness matching
-- the endianness of the host.
--
-- Invariants:
--   * a Char buffer consists of *valid* UTF-16 or UTF-32
--   * only whole characters: no partial surrogate pairs

#define CHARBUF_UTF32

-- #define CHARBUF_UTF16
--
-- NB. it won't work to just change this to CHARBUF_UTF16.  Some of
-- the code to make this work is there, and it has been tested with
-- the Iconv codec, but there are some pieces that are known to be
-- broken.  In particular, the built-in codecs
-- e.g. GHC.IO.Encoding.UTF{8,16,32} need to use isFullCharBuffer or
-- similar in place of the ow >= os comparisons.
--
-- Tamar: We need to do this eventually for Windows, as we have to re-encode
-- the text as UTF-16 anyway, so if we can avoid it it would be great.

-- ---------------------------------------------------------------------------
-- Raw blocks of data

type RawBuffer e = ForeignPtr e

readWord8Buf :: RawBuffer Word8 -> Int -> IO Word8
readWord8Buf fp ix = unsafeWithForeignPtr fp $ \p -> peekByteOff p ix

writeWord8Buf :: RawBuffer Word8 -> Int -> Word8 -> IO ()
writeWord8Buf fp ix w = unsafeWithForeignPtr fp $ \p -> pokeByteOff p ix w

#if defined(CHARBUF_UTF16)
type CharBufElem = Word16
#else
type CharBufElem = Char
#endif

type RawCharBuffer = RawBuffer CharBufElem

peekCharBuf :: RawCharBuffer -> Int -> IO Char
peekCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> do
                        (c,_) <- readCharBufPtr p ix
                        return c

{-# INLINE readCharBuf #-}
readCharBuf :: RawCharBuffer -> Int -> IO (Char, Int)
readCharBuf arr ix = unsafeWithForeignPtr arr $ \p -> readCharBufPtr p ix

{-# INLINE writeCharBuf #-}
writeCharBuf :: RawCharBuffer -> Int -> Char -> IO Int
writeCharBuf arr ix c = unsafeWithForeignPtr arr $ \p -> writeCharBufPtr p ix c

{-# INLINE readCharBufPtr #-}
readCharBufPtr :: Ptr CharBufElem -> Int -> IO (Char, Int)
#if defined(CHARBUF_UTF16)
readCharBufPtr p ix = do
  c1 <- peekElemOff p ix
  if (c1 < 0xd800 || c1 > 0xdbff)
     then return (chr (fromIntegral c1), ix+1)
     else do c2 <- peekElemOff p (ix+1)
             return (unsafeChr ((fromIntegral c1 - 0xd800)*0x400 +
                                (fromIntegral c2 - 0xdc00) + 0x10000), ix+2)
#else
readCharBufPtr p ix = do c <- peekElemOff (castPtr p) ix; return (c, ix+1)
#endif

{-# INLINE writeCharBufPtr #-}
writeCharBufPtr :: Ptr CharBufElem -> Int -> Char -> IO Int
#if defined(CHARBUF_UTF16)
writeCharBufPtr p ix ch
  | c < 0x10000 = do pokeElemOff p ix (fromIntegral c)
                     return (ix+1)
  | otherwise   = do let c' = c - 0x10000
                     pokeElemOff p ix (fromIntegral (c' `div` 0x400 + 0xd800))
                     pokeElemOff p (ix+1) (fromIntegral (c' `mod` 0x400 + 0xdc00))
                     return (ix+2)
  where
    c = ord ch
#else
writeCharBufPtr p ix ch = do pokeElemOff (castPtr p) ix ch; return (ix+1)
#endif

charSize :: Int
#if defined(CHARBUF_UTF16)
charSize = 2
#else
charSize = 4
#endif

-- ---------------------------------------------------------------------------
-- Buffers

-- | A mutable array of bytes that can be passed to foreign functions.
--
-- The buffer is represented by a record, where the record contains
-- the raw buffer and the start/end points of the filled portion.  The
-- buffer contents itself is mutable, but the rest of the record is
-- immutable.  This is a slightly odd mix, but it turns out to be
-- quite practical: by making all the buffer metadata immutable, we
-- can have operations on buffer metadata outside of the IO monad.
--
-- The "live" elements of the buffer are those between the 'bufL' and
-- 'bufR' offsets.  In an empty buffer, 'bufL' is equal to 'bufR', but
-- they might not be zero: for example, the buffer might correspond to
-- a memory-mapped file and in which case 'bufL' will point to the
-- next location to be written, which is not necessarily the beginning
-- of the file.
--
-- On Posix systems the I/O manager has an implicit reliance on doing a file
-- read moving the file pointer.  However on Windows async operations the kernel
-- object representing a file does not use the file pointer offset.  Logically
-- this makes sense since operations can be performed in any arbitrary order.
-- OVERLAPPED operations don't respect the file pointer offset as their
-- intention is to support arbitrary async reads to anywhere at a much lower
-- level.  As such we should explicitly keep track of the file offsets of the
-- target in the buffer.  Any operation to seek should also update this entry.
--
-- In order to keep us sane we try to uphold the invariant that any function
-- being passed a Handle is responsible for updating the handles offset unless
-- other behaviour is documented.
data Buffer e
  = Buffer {
        bufRaw    :: !(RawBuffer e),
        bufState  :: BufferState,
        bufSize   :: !Int,          -- in elements, not bytes
        bufOffset :: !Word64,       -- start location for next read/write
        bufL      :: !Int,          -- offset of first item in the buffer
        bufR      :: !Int           -- offset of last item + 1
  }

#if defined(CHARBUF_UTF16)
type CharBuffer = Buffer Word16
#else
type CharBuffer = Buffer Char
#endif

data BufferState = ReadBuffer | WriteBuffer
  deriving Eq -- ^ @since 4.2.0.0

withBuffer :: Buffer e -> (Ptr e -> IO a) -> IO a
withBuffer Buffer{ bufRaw=raw } f = withForeignPtr (castForeignPtr raw) f

withRawBuffer :: RawBuffer e -> (Ptr e -> IO a) -> IO a
withRawBuffer raw f = withForeignPtr (castForeignPtr raw) f

isEmptyBuffer :: Buffer e -> Bool
isEmptyBuffer Buffer{ bufL=l, bufR=r } = l == r

isFullBuffer :: Buffer e -> Bool
isFullBuffer Buffer{ bufR=w, bufSize=s } = s == w

-- if a Char buffer does not have room for a surrogate pair, it is "full"
isFullCharBuffer :: Buffer e -> Bool
#if defined(CHARBUF_UTF16)
isFullCharBuffer buf = bufferAvailable buf < 2
#else
isFullCharBuffer = isFullBuffer
#endif

isWriteBuffer :: Buffer e -> Bool
isWriteBuffer buf = case bufState buf of
                        WriteBuffer -> True
                        ReadBuffer  -> False

bufferElems :: Buffer e -> Int
bufferElems Buffer{ bufR=w, bufL=r } = w - r

bufferAvailable :: Buffer e -> Int
bufferAvailable Buffer{ bufR=w, bufSize=s } = s - w

bufferRemove :: Int -> Buffer e -> Buffer e
bufferRemove i buf@Buffer{ bufL=r } = bufferAdjustL (r+i) buf

bufferAdjustL :: Int -> Buffer e -> Buffer e
bufferAdjustL l buf@Buffer{ bufR=w }
  | l == w    = buf{ bufL=0, bufR=0 }
  | otherwise = buf{ bufL=l, bufR=w }

bufferAdd :: Int -> Buffer e -> Buffer e
bufferAdd i buf@Buffer{ bufR=w } = buf{ bufR=w+i }

bufferOffset :: Buffer e -> Word64
bufferOffset Buffer{ bufOffset=off } = off

bufferAdjustOffset :: Word64 -> Buffer e -> Buffer e
bufferAdjustOffset offs buf = buf{ bufOffset=offs }

-- The adjustment to the offset can be 32bit int on 32 platforms.
-- This is fine, we only use this after reading into/writing from
-- the buffer so we will never overflow here.
bufferAddOffset :: Int -> Buffer e -> Buffer e
bufferAddOffset offs buf@Buffer{ bufOffset=w } =
  buf{ bufOffset=w+(fromIntegral offs) }

emptyBuffer :: RawBuffer e -> Int -> BufferState -> Buffer e
emptyBuffer raw sz state =
  Buffer{ bufRaw=raw, bufState=state, bufOffset=0, bufR=0, bufL=0, bufSize=sz }

newByteBuffer :: Int -> BufferState -> IO (Buffer Word8)
newByteBuffer c st = newBuffer c c st

newCharBuffer :: Int -> BufferState -> IO CharBuffer
newCharBuffer c st = newBuffer (c * charSize) c st

newBuffer :: Int -> Int -> BufferState -> IO (Buffer e)
newBuffer bytes sz state = do
  fp <- mallocForeignPtrBytes bytes
  return (emptyBuffer fp sz state)

-- | slides the contents of the buffer to the beginning
slideContents :: Buffer Word8 -> IO (Buffer Word8)
slideContents buf@Buffer{ bufL=l, bufR=r, bufRaw=raw } = do
  let elems = r - l
  withRawBuffer raw $ \p ->
      do _ <- memmove p (p `plusPtr` l) (fromIntegral elems)
         return ()
  return buf{ bufL=0, bufR=elems }

foreign import ccall unsafe "memmove"
   memmove :: Ptr a -> Ptr a -> CSize -> IO (Ptr a)

summaryBuffer :: Buffer a -> String
summaryBuffer !buf  -- Strict => slightly better code
   = ppr (show $ bufRaw buf) ++ "@buf" ++ show (bufSize buf)
   ++ "(" ++ show (bufL buf) ++ "-" ++ show (bufR buf) ++ ")"
   ++ " (>=" ++ show (bufOffset buf) ++ ")"
  where ppr :: String -> String
        ppr ('0':'x':xs) = let p = dropWhile (=='0') xs
                           in if null p then "0x0" else '0':'x':p
        ppr x = x

-- Note [INVARIANTS on Buffers]
-- ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
--   * r <= w
--   * if r == w, and the buffer is for reading, then r == 0 && w == 0
--   * a write buffer is never full.  If an operation
--     fills up the buffer, it will always flush it before
--     returning.
--   * a read buffer may be full as a result of hLookAhead.  In normal
--     operation, a read buffer always has at least one character of space.

checkBuffer :: Buffer a -> IO ()
checkBuffer buf@Buffer{ bufState = state, bufL=r, bufR=w, bufSize=size } =
     check buf (
        size > 0
        && r <= w
        && w <= size
        && ( r /= w || state == WriteBuffer || (r == 0 && w == 0) )
        && ( state /= WriteBuffer || w < size ) -- write buffer is never full
      )

check :: Buffer a -> Bool -> IO ()
check _   True  = return ()
check buf False = errorWithoutStackTrace ("buffer invariant violation: " ++ summaryBuffer buf)