{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE CPP
           , NoImplicitPrelude
           , NondecreasingIndentation
  #-}
{-# OPTIONS_HADDOCK not-home #-}

-----------------------------------------------------------------------------
-- |
-- Module      :  GHC.IO.Encoding.Iconv
-- Copyright   :  (c) The University of Glasgow, 2008-2009
-- License     :  see libraries/base/LICENSE
--
-- Maintainer  :  [email protected]
-- Stability   :  internal
-- Portability :  non-portable
--
-- This module provides text encoding/decoding using iconv
--
-----------------------------------------------------------------------------

module GHC.IO.Encoding.Iconv (
#if !defined(mingw32_HOST_OS)
   iconvEncoding, mkIconvEncoding,
   localeEncodingName
#endif
 ) where

#include "MachDeps.h"
#include "HsBaseConfig.h"

#if defined(mingw32_HOST_OS)
import GHC.Base () -- For build ordering
#else

import Foreign
import Foreign.C hiding (charIsRepresentable)
import Data.Maybe
import GHC.Base
import GHC.Foreign (charIsRepresentable)
import GHC.IO.Buffer
import GHC.IO.Encoding.Failure
import GHC.IO.Encoding.Types
import GHC.List (span)
import GHC.Num
import GHC.Show
import GHC.Real
import System.IO.Unsafe (unsafePerformIO)
import System.Posix.Internals

c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP :: Bool
c_DEBUG_DUMP = Bool
False

iconv_trace :: String -> IO ()
iconv_trace :: String -> IO ()
iconv_trace String
s
 | Bool
c_DEBUG_DUMP = String -> IO ()
puts String
s
 | Bool
otherwise    = forall (m :: * -> *) a. Monad m => a -> m a
return ()

-- -----------------------------------------------------------------------------
-- iconv encoders/decoders

{-# NOINLINE localeEncodingName #-}
localeEncodingName :: String
localeEncodingName :: String
localeEncodingName = forall a. IO a -> a
unsafePerformIO forall a b. (a -> b) -> a -> b
$ do
   -- Use locale_charset() or nl_langinfo(CODESET) to get the encoding
   -- if we have either of them.
   CString
cstr <- IO CString
c_localeEncoding
   CString -> IO String
peekCAString CString
cstr -- Assume charset names are ASCII

-- We hope iconv_t is a storable type.  It should be, since it has at least the
-- value -1, which is a possible return value from iconv_open.
type IConv = CLong -- ToDo: (#type iconv_t)

foreign import ccall unsafe "hs_iconv_open"
    hs_iconv_open :: CString -> CString -> IO IConv

foreign import ccall unsafe "hs_iconv_close"
    hs_iconv_close :: IConv -> IO CInt

foreign import ccall unsafe "hs_iconv"
    hs_iconv :: IConv -> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize
          -> IO CSize

foreign import ccall unsafe "localeEncoding"
    c_localeEncoding :: IO CString

haskellChar :: String
#if defined(WORDS_BIGENDIAN)
haskellChar | charSize == 2 = "UTF-16BE"
            | otherwise     = "UTF-32BE"
#else
haskellChar :: String
haskellChar | Int
charSize forall a. Eq a => a -> a -> Bool
== Int
2 = String
"UTF-16LE"
            | Bool
otherwise     = String
"UTF-32LE"
#endif

char_shift :: Int
char_shift :: Int
char_shift | Int
charSize forall a. Eq a => a -> a -> Bool
== Int
2 = Int
1
           | Bool
otherwise     = Int
2

iconvEncoding :: String -> IO (Maybe TextEncoding)
iconvEncoding :: String -> IO (Maybe TextEncoding)
iconvEncoding = CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding CodingFailureMode
ErrorOnCodingFailure

-- | Construct an iconv-based 'TextEncoding' for the given character set and
-- 'CodingFailureMode'.
--
-- As iconv is missing in some minimal environments (e.g. #10298), this
-- checks to ensure that iconv is working properly before returning the
-- encoding, returning 'Nothing' if not.
mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding :: CodingFailureMode -> String -> IO (Maybe TextEncoding)
mkIconvEncoding CodingFailureMode
cfm String
charset = do
    let enc :: TextEncoding
enc = TextEncoding {
                  textEncodingName :: String
textEncodingName = String
charset,
                  mkTextDecoder :: IO (TextDecoder ())
mkTextDecoder = forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
raw_charset (String
haskellChar forall a. [a] -> [a] -> [a]
++ String
suffix)
                                           (CodingFailureMode
-> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)
recoverDecode CodingFailureMode
cfm) IConv -> DecodeBuffer
iconvDecode,
                  mkTextEncoder :: IO (TextEncoder ())
mkTextEncoder = forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
haskellChar String
charset
                                           (CodingFailureMode
-> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)
recoverEncode CodingFailureMode
cfm) IConv -> EncodeBuffer
iconvEncode}
    Bool
good <- TextEncoding -> Char -> IO Bool
charIsRepresentable TextEncoding
enc Char
'a'
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ if Bool
good
               then forall a. a -> Maybe a
Just TextEncoding
enc
               else forall a. Maybe a
Nothing
  where
    -- An annoying feature of GNU iconv is that the //PREFIXES only take
    -- effect when they appear on the tocode parameter to iconv_open:
    (String
raw_charset, String
suffix) = forall a. (a -> Bool) -> [a] -> ([a], [a])
span (forall a. Eq a => a -> a -> Bool
/= Char
'/') String
charset

newIConv :: String -> String
   -> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
   -> (IConv -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
   -> IO (BufferCodec a b ())
newIConv :: forall a b.
String
-> String
-> (Buffer a -> Buffer b -> IO (Buffer a, Buffer b))
-> (IConv
    -> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b))
-> IO (BufferCodec a b ())
newIConv String
from String
to Buffer a -> Buffer b -> IO (Buffer a, Buffer b)
rec IConv
-> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)
fn =
  -- Assume charset names are ASCII
  forall a. String -> (CString -> IO a) -> IO a
withCAString String
from forall a b. (a -> b) -> a -> b
$ \ CString
from_str ->
  forall a. String -> (CString -> IO a) -> IO a
withCAString String
to   forall a b. (a -> b) -> a -> b
$ \ CString
to_str -> do
    IConv
iconvt <- forall a. (Eq a, Num a) => String -> IO a -> IO a
throwErrnoIfMinus1 String
"mkTextEncoding" forall a b. (a -> b) -> a -> b
$ CString -> CString -> IO IConv
hs_iconv_open CString
to_str CString
from_str
    let iclose :: IO ()
iclose = forall a. (Eq a, Num a) => String -> IO a -> IO ()
throwErrnoIfMinus1_ String
"Iconv.close" forall a b. (a -> b) -> a -> b
$ IConv -> IO CInt
hs_iconv_close IConv
iconvt
    forall (m :: * -> *) a. Monad m => a -> m a
return BufferCodec{
                encode :: Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)
encode = IConv
-> Buffer a -> Buffer b -> IO (CodingProgress, Buffer a, Buffer b)
fn IConv
iconvt,
                recover :: Buffer a -> Buffer b -> IO (Buffer a, Buffer b)
recover = Buffer a -> Buffer b -> IO (Buffer a, Buffer b)
rec,
                close :: IO ()
close  = IO ()
iclose,
                -- iconv doesn't supply a way to save/restore the state
                getState :: IO ()
getState = forall (m :: * -> *) a. Monad m => a -> m a
return (),
                setState :: () -> IO ()
setState = forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Monad m => a -> m a
return ()
                }

iconvDecode :: IConv -> DecodeBuffer
iconvDecode :: IConv -> DecodeBuffer
iconvDecode IConv
iconv_t Buffer Word8
ibuf Buffer Char
obuf = forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t Buffer Word8
ibuf Int
0 Buffer Char
obuf Int
char_shift

iconvEncode :: IConv -> EncodeBuffer
iconvEncode :: IConv -> EncodeBuffer
iconvEncode IConv
iconv_t Buffer Char
ibuf Buffer Word8
obuf = forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t Buffer Char
ibuf Int
char_shift Buffer Word8
obuf Int
0

iconvRecode :: IConv -> Buffer a -> Int -> Buffer b -> Int
            -> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode :: forall a b.
IConv
-> Buffer a
-> Int
-> Buffer b
-> Int
-> IO (CodingProgress, Buffer a, Buffer b)
iconvRecode IConv
iconv_t
  input :: Buffer a
input@Buffer{  bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer a
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
iw, bufSize :: forall e. Buffer e -> Int
bufSize=Int
_  }  Int
iscale
  output :: Buffer b
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer b
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_,  bufR :: forall e. Buffer e -> Int
bufR=Int
ow, bufSize :: forall e. Buffer e -> Int
bufSize=Int
os }  Int
oscale
  = do
    String -> IO ()
iconv_trace (String
"haskellChar=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show String
haskellChar)
    String -> IO ()
iconv_trace (String
"iconvRecode before, input=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (forall a. Buffer a -> String
summaryBuffer Buffer a
input))
    String -> IO ()
iconv_trace (String
"iconvRecode before, output=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (forall a. Buffer a -> String
summaryBuffer Buffer b
output))
    forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a
withRawBuffer RawBuffer a
iraw forall a b. (a -> b) -> a -> b
$ \ Ptr a
piraw -> do
    forall e a. RawBuffer e -> (Ptr e -> IO a) -> IO a
withRawBuffer RawBuffer b
oraw forall a b. (a -> b) -> a -> b
$ \ Ptr b
poraw -> do
    forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Ptr a
piraw forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
ir forall a. Bits a => a -> Int -> a
`shiftL` Int
iscale)) forall a b. (a -> b) -> a -> b
$ \ Ptr CString
p_inbuf -> do
    forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (Ptr b
poraw forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
ow forall a. Bits a => a -> Int -> a
`shiftL` Int
oscale)) forall a b. (a -> b) -> a -> b
$ \ Ptr CString
p_outbuf -> do
    forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Int
iwforall a. Num a => a -> a -> a
-Int
ir) forall a. Bits a => a -> Int -> a
`shiftL` Int
iscale)) forall a b. (a -> b) -> a -> b
$ \ Ptr CSize
p_inleft -> do
    forall a b. Storable a => a -> (Ptr a -> IO b) -> IO b
with (forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Int
osforall a. Num a => a -> a -> a
-Int
ow) forall a. Bits a => a -> Int -> a
`shiftL` Int
oscale)) forall a b. (a -> b) -> a -> b
$ \ Ptr CSize
p_outleft -> do
      CSize
res <- IConv
-> Ptr CString -> Ptr CSize -> Ptr CString -> Ptr CSize -> IO CSize
hs_iconv IConv
iconv_t Ptr CString
p_inbuf Ptr CSize
p_inleft Ptr CString
p_outbuf Ptr CSize
p_outleft
      CSize
new_inleft  <- forall a. Storable a => Ptr a -> IO a
peek Ptr CSize
p_inleft
      CSize
new_outleft <- forall a. Storable a => Ptr a -> IO a
peek Ptr CSize
p_outleft
      let
          new_inleft' :: Int
new_inleft'  = forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
new_inleft forall a. Bits a => a -> Int -> a
`shiftR` Int
iscale
          new_outleft' :: Int
new_outleft' = forall a b. (Integral a, Num b) => a -> b
fromIntegral CSize
new_outleft forall a. Bits a => a -> Int -> a
`shiftR` Int
oscale
          new_input :: Buffer a
new_input
            | CSize
new_inleft forall a. Eq a => a -> a -> Bool
== CSize
0  = Buffer a
input { bufL :: Int
bufL = Int
0, bufR :: Int
bufR = Int
0 }
            | Bool
otherwise        = Buffer a
input { bufL :: Int
bufL = Int
iw forall a. Num a => a -> a -> a
- Int
new_inleft' }
          new_output :: Buffer b
new_output = Buffer b
output{ bufR :: Int
bufR = Int
os forall a. Num a => a -> a -> a
- Int
new_outleft' }
      String -> IO ()
iconv_trace (String
"iconv res=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show CSize
res)
      String -> IO ()
iconv_trace (String
"iconvRecode after,  input=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (forall a. Buffer a -> String
summaryBuffer Buffer a
new_input))
      String -> IO ()
iconv_trace (String
"iconvRecode after,  output=" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (forall a. Buffer a -> String
summaryBuffer Buffer b
new_output))
      if (CSize
res forall a. Eq a => a -> a -> Bool
/= -CSize
1)
        then -- all input translated
           forall (m :: * -> *) a. Monad m => a -> m a
return (CodingProgress
InputUnderflow, Buffer a
new_input, Buffer b
new_output)
        else do
      Errno
errno <- IO Errno
getErrno
      case Errno
errno of
        Errno
e | Errno
e forall a. Eq a => a -> a -> Bool
== Errno
e2BIG  -> forall (m :: * -> *) a. Monad m => a -> m a
return (CodingProgress
OutputUnderflow, Buffer a
new_input, Buffer b
new_output)
          | Errno
e forall a. Eq a => a -> a -> Bool
== Errno
eINVAL -> forall (m :: * -> *) a. Monad m => a -> m a
return (CodingProgress
InputUnderflow, Buffer a
new_input, Buffer b
new_output)
           -- Sometimes iconv reports EILSEQ for a
           -- character in the input even when there is no room
           -- in the output; in this case we might be about to
           -- change the encoding anyway, so the following bytes
           -- could very well be in a different encoding.
           --
           -- Because we can only say InvalidSequence if there is at least
           -- one element left in the output, we have to special case this.
          | Errno
e forall a. Eq a => a -> a -> Bool
== Errno
eILSEQ -> forall (m :: * -> *) a. Monad m => a -> m a
return (if Int
new_outleft' forall a. Eq a => a -> a -> Bool
== Int
0 then CodingProgress
OutputUnderflow else CodingProgress
InvalidSequence, Buffer a
new_input, Buffer b
new_output)
          | Bool
otherwise -> do
              String -> IO ()
iconv_trace (String
"iconv returned error: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (String -> Errno -> Maybe Handle -> Maybe String -> IOError
errnoToIOError String
"iconv" Errno
e forall a. Maybe a
Nothing forall a. Maybe a
Nothing))
              forall a. String -> IO a
throwErrno String
"iconvRecoder"

#endif /* !mingw32_HOST_OS */