{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP, NoImplicitPrelude #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  Foreign.C.String
-- Copyright   :  (c) The FFI task force 2001
-- License     :  BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer  :  [email protected]
-- Stability   :  provisional
-- Portability :  portable
--
-- Utilities for primitive marshalling of C strings.
--
-- The marshalling converts each Haskell character, representing a Unicode
-- code point, to one or more bytes in a manner that, by default, is
-- determined by the current locale.  As a consequence, no guarantees
-- can be made about the relative length of a Haskell string and its
-- corresponding C string, and therefore all the marshalling routines
-- include memory allocation.  The translation between Unicode and the
-- encoding of the current locale may be lossy.
--
-----------------------------------------------------------------------------

module Foreign.C.String (   -- representation of strings in C
  -- * C strings

  CString,
  CStringLen,

  -- ** Using a locale-dependent encoding

  -- | These functions are different from their @CAString@ counterparts
  -- in that they will use an encoding determined by the current locale,
  -- rather than always assuming ASCII.

  -- conversion of C strings into Haskell strings
  --
  peekCString,
  peekCStringLen,

  -- conversion of Haskell strings into C strings
  --
  newCString,
  newCStringLen,

  -- conversion of Haskell strings into C strings using temporary storage
  --
  withCString,
  withCStringLen,

  charIsRepresentable,

  -- ** Using 8-bit characters

  -- | These variants of the above functions are for use with C libraries
  -- that are ignorant of Unicode.  These functions should be used with
  -- care, as a loss of information can occur.

  castCharToCChar,
  castCCharToChar,

  castCharToCUChar,
  castCUCharToChar,
  castCharToCSChar,
  castCSCharToChar,

  peekCAString,
  peekCAStringLen,
  newCAString,
  newCAStringLen,
  withCAString,
  withCAStringLen,

  -- * C wide strings

  -- | These variants of the above functions are for use with C libraries
  -- that encode Unicode using the C @wchar_t@ type in a system-dependent
  -- way.  The only encodings supported are
  --
  -- * UTF-32 (the C compiler defines @__STDC_ISO_10646__@), or
  --
  -- * UTF-16 (as used on Windows systems).

  CWString,
  CWStringLen,

  peekCWString,
  peekCWStringLen,
  newCWString,
  newCWStringLen,
  withCWString,
  withCWStringLen,

  ) where

import Foreign.Marshal.Array
import Foreign.C.Types
import Foreign.Ptr
import Foreign.Storable

import Data.Word

import GHC.Char
import GHC.List
import GHC.Real
import GHC.Num
import GHC.Base

import {-# SOURCE #-} GHC.IO.Encoding
import qualified GHC.Foreign as GHC

-----------------------------------------------------------------------------
-- Strings

-- representation of strings in C
-- ------------------------------

-- | A C string is a reference to an array of C characters terminated by NUL.
type CString    = Ptr CChar

-- | A string with explicit length information in bytes instead of a
-- terminating NUL (allowing NUL characters in the middle of the string).
type CStringLen = (Ptr CChar, Int)

-- exported functions
-- ------------------
--
-- * the following routines apply the default conversion when converting the
--   C-land character encoding into the Haskell-land character encoding

-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCString    :: CString -> IO String
peekCString :: CString -> IO String
peekCString CString
s = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip TextEncoding -> CString -> IO String
GHC.peekCString CString
s

-- | Marshal a C string with explicit length into a Haskell string.
--
peekCStringLen           :: CStringLen -> IO String
peekCStringLen :: CStringLen -> IO String
peekCStringLen CStringLen
s = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip TextEncoding -> CStringLen -> IO String
GHC.peekCStringLen CStringLen
s

-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCString :: String -> IO CString
newCString :: String -> IO CString
newCString String
s = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip TextEncoding -> String -> IO CString
GHC.newCString String
s

-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCStringLen     :: String -> IO CStringLen
newCStringLen :: String -> IO CStringLen
newCStringLen String
s = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip TextEncoding -> String -> IO CStringLen
GHC.newCStringLen String
s

-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCString :: String -> (CString -> IO a) -> IO a
withCString :: forall a. String -> (CString -> IO a) -> IO a
withCString String
s CString -> IO a
f = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TextEncoding
enc -> forall a. TextEncoding -> String -> (CString -> IO a) -> IO a
GHC.withCString TextEncoding
enc String
s CString -> IO a
f

-- | Marshal a Haskell string into a C string (ie, character array)
-- in temporary storage, with explicit length information.
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCStringLen         :: String -> (CStringLen -> IO a) -> IO a
withCStringLen :: forall a. String -> (CStringLen -> IO a) -> IO a
withCStringLen String
s CStringLen -> IO a
f = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \TextEncoding
enc -> forall a. TextEncoding -> String -> (CStringLen -> IO a) -> IO a
GHC.withCStringLen TextEncoding
enc String
s CStringLen -> IO a
f

-- -- | Determines whether a character can be accurately encoded in a 'CString'.
-- -- Unrepresentable characters are converted to '?' or their nearest visual equivalent.
charIsRepresentable :: Char -> IO Bool
charIsRepresentable :: Char -> IO Bool
charIsRepresentable Char
c = IO TextEncoding
getForeignEncoding forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a b c. (a -> b -> c) -> b -> a -> c
flip TextEncoding -> Char -> IO Bool
GHC.charIsRepresentable Char
c

-- single byte characters
-- ----------------------
--
--   ** NOTE: These routines don't handle conversions! **

-- | Convert a C byte, representing a Latin-1 character, to the corresponding
-- Haskell character.
castCCharToChar :: CChar -> Char
castCCharToChar :: CChar -> Char
castCCharToChar CChar
ch = Int -> Char
unsafeChr (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a b. (Integral a, Num b) => a -> b
fromIntegral CChar
ch :: Word8))

-- | Convert a Haskell character to a C character.
-- This function is only safe on the first 256 characters.
castCharToCChar :: Char -> CChar
castCharToCChar :: Char -> CChar
castCharToCChar Char
ch = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
ch)

-- | Convert a C @unsigned char@, representing a Latin-1 character, to
-- the corresponding Haskell character.
castCUCharToChar :: CUChar -> Char
castCUCharToChar :: CUChar -> Char
castCUCharToChar CUChar
ch = Int -> Char
unsafeChr (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a b. (Integral a, Num b) => a -> b
fromIntegral CUChar
ch :: Word8))

-- | Convert a Haskell character to a C @unsigned char@.
-- This function is only safe on the first 256 characters.
castCharToCUChar :: Char -> CUChar
castCharToCUChar :: Char -> CUChar
castCharToCUChar Char
ch = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
ch)

-- | Convert a C @signed char@, representing a Latin-1 character, to the
-- corresponding Haskell character.
castCSCharToChar :: CSChar -> Char
castCSCharToChar :: CSChar -> Char
castCSCharToChar CSChar
ch = Int -> Char
unsafeChr (forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall a b. (Integral a, Num b) => a -> b
fromIntegral CSChar
ch :: Word8))

-- | Convert a Haskell character to a C @signed char@.
-- This function is only safe on the first 256 characters.
castCharToCSChar :: Char -> CSChar
castCharToCSChar :: Char -> CSChar
castCharToCSChar Char
ch = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
ch)

-- | Marshal a NUL terminated C string into a Haskell string.
--
peekCAString    :: CString -> IO String
peekCAString :: CString -> IO String
peekCAString CString
cp = do
  Int
l <- forall a. (Storable a, Eq a) => a -> Ptr a -> IO Int
lengthArray0 CChar
nUL CString
cp
  if Int
l forall a. Ord a => a -> a -> Bool
<= Int
0 then forall (m :: * -> *) a. Monad m => a -> m a
return String
"" else String -> Int -> IO String
loop String
"" (Int
lforall a. Num a => a -> a -> a
-Int
1)
  where
    loop :: String -> Int -> IO String
loop String
s Int
i = do
        CChar
xval <- forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff CString
cp Int
i
        let val :: Char
val = CChar -> Char
castCCharToChar CChar
xval
        Char
val seq :: forall a b. a -> b -> b
`seq` if Int
i forall a. Ord a => a -> a -> Bool
<= Int
0 then forall (m :: * -> *) a. Monad m => a -> m a
return (Char
valforall a. a -> [a] -> [a]
:String
s) else String -> Int -> IO String
loop (Char
valforall a. a -> [a] -> [a]
:String
s) (Int
iforall a. Num a => a -> a -> a
-Int
1)

-- | Marshal a C string with explicit length into a Haskell string.
--
peekCAStringLen           :: CStringLen -> IO String
peekCAStringLen :: CStringLen -> IO String
peekCAStringLen (CString
cp, Int
len)
  | Int
len forall a. Ord a => a -> a -> Bool
<= Int
0  = forall (m :: * -> *) a. Monad m => a -> m a
return String
"" -- being (too?) nice.
  | Bool
otherwise = String -> Int -> IO String
loop [] (Int
lenforall a. Num a => a -> a -> a
-Int
1)
  where
    loop :: String -> Int -> IO String
loop String
acc Int
i = do
         CChar
xval <- forall a. Storable a => Ptr a -> Int -> IO a
peekElemOff CString
cp Int
i
         let val :: Char
val = CChar -> Char
castCCharToChar CChar
xval
           -- blow away the coercion ASAP.
         if (Char
val seq :: forall a b. a -> b -> b
`seq` (Int
i forall a. Eq a => a -> a -> Bool
== Int
0))
          then forall (m :: * -> *) a. Monad m => a -> m a
return (Char
valforall a. a -> [a] -> [a]
:String
acc)
          else String -> Int -> IO String
loop (Char
valforall a. a -> [a] -> [a]
:String
acc) (Int
iforall a. Num a => a -> a -> a
-Int
1)

-- | Marshal a Haskell string into a NUL terminated C string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCAString :: String -> IO CString
newCAString :: String -> IO CString
newCAString String
str = do
  CString
ptr <- forall a. Storable a => Int -> IO (Ptr a)
mallocArray0 (forall a. [a] -> Int
length String
str)
  let
        go :: String -> Int -> IO ()
go [] Int
n     = forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n CChar
nUL
        go (Char
c:String
cs) Int
n = do forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n (Char -> CChar
castCharToCChar Char
c); String -> Int -> IO ()
go String
cs (Int
nforall a. Num a => a -> a -> a
+Int
1)
  String -> Int -> IO ()
go String
str Int
0
  forall (m :: * -> *) a. Monad m => a -> m a
return CString
ptr

-- | Marshal a Haskell string into a C string (ie, character array) with
-- explicit length information.
--
-- * new storage is allocated for the C string and must be
--   explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCAStringLen     :: String -> IO CStringLen
newCAStringLen :: String -> IO CStringLen
newCAStringLen String
str = do
  CString
ptr <- forall a. Storable a => Int -> IO (Ptr a)
mallocArray0 Int
len
  let
        go :: String -> Int -> IO ()
go [] Int
n     = Int
n seq :: forall a b. a -> b -> b
`seq` forall (m :: * -> *) a. Monad m => a -> m a
return () -- make it strict in n
        go (Char
c:String
cs) Int
n = do forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n (Char -> CChar
castCharToCChar Char
c); String -> Int -> IO ()
go String
cs (Int
nforall a. Num a => a -> a -> a
+Int
1)
  String -> Int -> IO ()
go String
str Int
0
  forall (m :: * -> *) a. Monad m => a -> m a
return (CString
ptr, Int
len)
  where
    len :: Int
len = forall a. [a] -> Int
length String
str

-- | Marshal a Haskell string into a NUL terminated C string using temporary
-- storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCAString :: String -> (CString -> IO a) -> IO a
withCAString :: forall a. String -> (CString -> IO a) -> IO a
withCAString String
str CString -> IO a
f =
  forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray0 (forall a. [a] -> Int
length String
str) forall a b. (a -> b) -> a -> b
$ \CString
ptr ->
      let
        go :: String -> Int -> IO ()
go [] Int
n     = forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n CChar
nUL
        go (Char
c:String
cs) Int
n = do forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n (Char -> CChar
castCharToCChar Char
c); String -> Int -> IO ()
go String
cs (Int
nforall a. Num a => a -> a -> a
+Int
1)
      in do
      String -> Int -> IO ()
go String
str Int
0
      CString -> IO a
f CString
ptr

-- | Marshal a Haskell string into a C string (ie, character array)
-- in temporary storage, with explicit length information.
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCAStringLen         :: String -> (CStringLen -> IO a) -> IO a
withCAStringLen :: forall a. String -> (CStringLen -> IO a) -> IO a
withCAStringLen String
str CStringLen -> IO a
f    =
  forall a b. Storable a => Int -> (Ptr a -> IO b) -> IO b
allocaArray Int
len forall a b. (a -> b) -> a -> b
$ \CString
ptr ->
      let
        go :: String -> Int -> IO ()
go [] Int
n     = Int
n seq :: forall a b. a -> b -> b
`seq` forall (m :: * -> *) a. Monad m => a -> m a
return () -- make it strict in n
        go (Char
c:String
cs) Int
n = do forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff CString
ptr Int
n (Char -> CChar
castCharToCChar Char
c); String -> Int -> IO ()
go String
cs (Int
nforall a. Num a => a -> a -> a
+Int
1)
      in do
      String -> Int -> IO ()
go String
str Int
0
      CStringLen -> IO a
f (CString
ptr,Int
len)
  where
    len :: Int
len = forall a. [a] -> Int
length String
str

-- auxiliary definitions
-- ----------------------

-- C's end of string character
--
nUL :: CChar
nUL :: CChar
nUL  = CChar
0

-- allocate an array to hold the list and pair it with the number of elements
newArrayLen        :: Storable a => [a] -> IO (Ptr a, Int)
newArrayLen :: forall a. Storable a => [a] -> IO (Ptr a, Int)
newArrayLen [a]
xs      = do
  Ptr a
a <- forall a. Storable a => [a] -> IO (Ptr a)
newArray [a]
xs
  forall (m :: * -> *) a. Monad m => a -> m a
return (Ptr a
a, forall a. [a] -> Int
length [a]
xs)

-----------------------------------------------------------------------------
-- Wide strings

-- representation of wide strings in C
-- -----------------------------------

-- | A C wide string is a reference to an array of C wide characters
-- terminated by NUL.
type CWString    = Ptr CWchar

-- | A wide character string with explicit length information in 'CWchar's
-- instead of a terminating NUL (allowing NUL characters in the middle
-- of the string).
type CWStringLen = (Ptr CWchar, Int)

-- | Marshal a NUL terminated C wide string into a Haskell string.
--
peekCWString    :: CWString -> IO String
peekCWString :: CWString -> IO String
peekCWString CWString
cp  = do
  [CWchar]
cs <- forall a. (Storable a, Eq a) => a -> Ptr a -> IO [a]
peekArray0 CWchar
wNUL CWString
cp
  forall (m :: * -> *) a. Monad m => a -> m a
return ([CWchar] -> String
cWcharsToChars [CWchar]
cs)

-- | Marshal a C wide string with explicit length into a Haskell string.
--
peekCWStringLen           :: CWStringLen -> IO String
peekCWStringLen :: CWStringLen -> IO String
peekCWStringLen (CWString
cp, Int
len)  = do
  [CWchar]
cs <- forall a. Storable a => Int -> Ptr a -> IO [a]
peekArray Int
len CWString
cp
  forall (m :: * -> *) a. Monad m => a -> m a
return ([CWchar] -> String
cWcharsToChars [CWchar]
cs)

-- | Marshal a Haskell string into a NUL terminated C wide string.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * new storage is allocated for the C wide string and must
--   be explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCWString :: String -> IO CWString
newCWString :: String -> IO CWString
newCWString  = forall a. Storable a => a -> [a] -> IO (Ptr a)
newArray0 CWchar
wNUL forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [CWchar]
charsToCWchars

-- | Marshal a Haskell string into a C wide string (ie, wide character array)
-- with explicit length information.
--
-- * new storage is allocated for the C wide string and must
--   be explicitly freed using 'Foreign.Marshal.Alloc.free' or
--   'Foreign.Marshal.Alloc.finalizerFree'.
--
newCWStringLen     :: String -> IO CWStringLen
newCWStringLen :: String -> IO CWStringLen
newCWStringLen String
str  = forall a. Storable a => [a] -> IO (Ptr a, Int)
newArrayLen (String -> [CWchar]
charsToCWchars String
str)

-- | Marshal a Haskell string into a NUL terminated C wide string using
-- temporary storage.
--
-- * the Haskell string may /not/ contain any NUL characters
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCWString :: String -> (CWString -> IO a) -> IO a
withCWString :: forall a. String -> (CWString -> IO a) -> IO a
withCWString  = forall a b. Storable a => a -> [a] -> (Ptr a -> IO b) -> IO b
withArray0 CWchar
wNUL forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> [CWchar]
charsToCWchars

-- | Marshal a Haskell string into a C wide string (i.e. wide
-- character array) in temporary storage, with explicit length
-- information.
--
-- * the memory is freed when the subcomputation terminates (either
--   normally or via an exception), so the pointer to the temporary
--   storage must /not/ be used after this.
--
withCWStringLen         :: String -> (CWStringLen -> IO a) -> IO a
withCWStringLen :: forall a. String -> (CWStringLen -> IO a) -> IO a
withCWStringLen String
str CWStringLen -> IO a
f    =
  forall a b. Storable a => [a] -> (Int -> Ptr a -> IO b) -> IO b
withArrayLen (String -> [CWchar]
charsToCWchars String
str) forall a b. (a -> b) -> a -> b
$ \ Int
len CWString
ptr -> CWStringLen -> IO a
f (CWString
ptr, Int
len)

-- auxiliary definitions
-- ----------------------

wNUL :: CWchar
wNUL :: CWchar
wNUL = CWchar
0

cWcharsToChars :: [CWchar] -> [Char]
charsToCWchars :: [Char] -> [CWchar]

#if defined(mingw32_HOST_OS)

-- On Windows, wchar_t is 16 bits wide and CWString uses the UTF-16 encoding.

-- coding errors generate Chars in the surrogate range
cWcharsToChars = map chr . fromUTF16 . map fromIntegral
 where
  fromUTF16 (c1:c2:wcs)
    | 0xd800 <= c1 && c1 <= 0xdbff && 0xdc00 <= c2 && c2 <= 0xdfff =
      ((c1 - 0xd800)*0x400 + (c2 - 0xdc00) + 0x10000) : fromUTF16 wcs
  fromUTF16 (c:wcs) = c : fromUTF16 wcs
  fromUTF16 [] = []

charsToCWchars = foldr utf16Char [] . map ord
 where
  utf16Char c wcs
    | c < 0x10000 = fromIntegral c : wcs
    | otherwise   = let c' = c - 0x10000 in
                    fromIntegral (c' `div` 0x400 + 0xd800) :
                    fromIntegral (c' `mod` 0x400 + 0xdc00) : wcs

#else /* !mingw32_HOST_OS */

cWcharsToChars :: [CWchar] -> String
cWcharsToChars [CWchar]
xs  = forall a b. (a -> b) -> [a] -> [b]
map CWchar -> Char
castCWcharToChar [CWchar]
xs
charsToCWchars :: String -> [CWchar]
charsToCWchars String
xs  = forall a b. (a -> b) -> [a] -> [b]
map Char -> CWchar
castCharToCWchar String
xs

-- These conversions only make sense if __STDC_ISO_10646__ is defined
-- (meaning that wchar_t is ISO 10646, aka Unicode)

castCWcharToChar :: CWchar -> Char
castCWcharToChar :: CWchar -> Char
castCWcharToChar CWchar
ch = Int -> Char
chr (forall a b. (Integral a, Num b) => a -> b
fromIntegral CWchar
ch )

castCharToCWchar :: Char -> CWchar
castCharToCWchar :: Char -> CWchar
castCharToCWchar Char
ch = forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
ord Char
ch)

#endif /* !mingw32_HOST_OS */