|
2 | 2 | {-# LANGUAGE DefaultSignatures #-} |
3 | 3 | {-# LANGUAGE GADTs #-} |
4 | 4 | {-# LANGUAGE InstanceSigs #-} |
| 5 | +{-# LANGUAGE MultiWayIf #-} |
5 | 6 | {-# LANGUAGE ScopedTypeVariables #-} |
6 | 7 | {-# LANGUAGE PatternGuards #-} |
7 | 8 | {-# LANGUAGE PolyKinds #-} |
@@ -57,6 +58,7 @@ module Data.Binary.Class ( |
57 | 58 | import Prelude hiding (Foldable(..)) |
58 | 59 | import Data.Foldable (Foldable(..)) |
59 | 60 |
|
| 61 | +import Data.Char (chr) |
60 | 62 | import Data.Word |
61 | 63 | import Data.Bits |
62 | 64 | import Data.Int |
@@ -499,35 +501,44 @@ instance Binary a => Binary (Complex a) where |
499 | 501 |
|
500 | 502 | ------------------------------------------------------------------------ |
501 | 503 |
|
502 | | --- Char is serialised as UTF-8 |
| 504 | +-- | Uses WTF-8 (like UTF-8, but surrogates are allowed). |
503 | 505 | instance Binary Char where |
504 | 506 | put = putCharUtf8 |
505 | 507 | putList str = put (length str) <> putStringUtf8 str |
506 | 508 | get = do |
507 | | - let getByte = liftM (fromIntegral :: Word8 -> Int) get |
508 | | - shiftL6 = flip shiftL 6 :: Int -> Int |
509 | | - w <- getByte |
510 | | - r <- case () of |
511 | | - _ | w < 0x80 -> return w |
512 | | - | w < 0xe0 -> do |
513 | | - x <- liftM (xor 0x80) getByte |
514 | | - return (x .|. shiftL6 (xor 0xc0 w)) |
515 | | - | w < 0xf0 -> do |
516 | | - x <- liftM (xor 0x80) getByte |
517 | | - y <- liftM (xor 0x80) getByte |
518 | | - return (y .|. shiftL6 (x .|. shiftL6 |
519 | | - (xor 0xe0 w))) |
520 | | - | otherwise -> do |
521 | | - x <- liftM (xor 0x80) getByte |
522 | | - y <- liftM (xor 0x80) getByte |
523 | | - z <- liftM (xor 0x80) getByte |
524 | | - return (z .|. shiftL6 (y .|. shiftL6 |
525 | | - (x .|. shiftL6 (xor 0xf0 w)))) |
526 | | - getChr r |
| 509 | + w <- fmap fromIntegral getWord8 |
| 510 | + if |
| 511 | + | w < 0x80 -> getChr w |
| 512 | + | w < 0xc2 -> invalid -- continuation byte or overlong encoding |
| 513 | + | w < 0xe0 -> do |
| 514 | + x <- getWord8 >>= continuationByte |
| 515 | + getChr ((x .&. 0x3f) .|. ((w .&. 0x1f) `shiftL` 6)) |
| 516 | + | w < 0xf0 -> do |
| 517 | + x <- getWord8 >>= continuationByte |
| 518 | + when (w == 0xe0 && x < 0xa0) invalid -- overlong encoding |
| 519 | + y <- getWord8 >>= continuationByte |
| 520 | + getChr ((y .&. 0x3f) .|. ((x .&. 0x3f) `shiftL` 6) .|. ((w .&. 0x0f) `shiftL` 12)) |
| 521 | + | w < 0xf5 -> do |
| 522 | + x <- getWord8 >>= continuationByte |
| 523 | + when (w == 0xf0 && x < 0x90) invalid -- overlong encoding |
| 524 | + when (w == 0xf4 && x > 0x8f) invalid -- outside of Unicode range |
| 525 | + y <- getWord8 >>= continuationByte |
| 526 | + z <- getWord8 >>= continuationByte |
| 527 | + getChr ((z .&. 0x3f) .|. ((y .&. 0x3f) `shiftL` 6) .|. ((x .&. 0x3f) `shiftL` 12) .|. ((w .&. 0x07) `shiftL` 18)) |
| 528 | + | otherwise -> invalid |
527 | 529 | where |
528 | | - getChr w |
529 | | - | w <= 0x10ffff = return $! toEnum $ fromEnum w |
530 | | - | otherwise = fail "Not a valid Unicode code point!" |
| 530 | + invalid :: Get a |
| 531 | + invalid = fail "invalid Char encoding" |
| 532 | + |
| 533 | + continuationByte :: Word8 -> Get Int |
| 534 | + continuationByte x = |
| 535 | + let x' = x .&. 0xc0 |
| 536 | + in if x' == 0x80 |
| 537 | + then pure (fromIntegral x) |
| 538 | + else invalid -- no continuation byte |
| 539 | + |
| 540 | + getChr :: Int -> Get Char |
| 541 | + getChr i = pure $! chr i |
531 | 542 |
|
532 | 543 | ------------------------------------------------------------------------ |
533 | 544 | -- Instances for the first few tuples |
|
0 commit comments