-- | Multihash encoding/decoding.
--
-- Multihash is a self-describing hash format: <hash-function-code><digest-size><digest>
-- Used for Peer ID derivation and content addressing.
module Network.LibP2P.Core.Multihash
  ( HashFunction (..)
  , encodeMultihash
  , decodeMultihash
  , validateMultihash
  ) where

import Crypto.Hash (Digest, SHA256, hash)
import Data.ByteArray (convert)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import Data.Word (Word64)
import Numeric (showHex)
import Network.LibP2P.Core.Varint (decodeUvarint, encodeUvarint)

-- | Supported hash functions for multihash encoding.
data HashFunction
  = Identity -- ^ Code 0x00: raw bytes, no hashing
  | SHA256   -- ^ Code 0x12: SHA-256 (32-byte digest)
  deriving (Int -> HashFunction -> ShowS
[HashFunction] -> ShowS
HashFunction -> String
(Int -> HashFunction -> ShowS)
-> (HashFunction -> String)
-> ([HashFunction] -> ShowS)
-> Show HashFunction
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> HashFunction -> ShowS
showsPrec :: Int -> HashFunction -> ShowS
$cshow :: HashFunction -> String
show :: HashFunction -> String
$cshowList :: [HashFunction] -> ShowS
showList :: [HashFunction] -> ShowS
Show, HashFunction -> HashFunction -> Bool
(HashFunction -> HashFunction -> Bool)
-> (HashFunction -> HashFunction -> Bool) -> Eq HashFunction
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: HashFunction -> HashFunction -> Bool
== :: HashFunction -> HashFunction -> Bool
$c/= :: HashFunction -> HashFunction -> Bool
/= :: HashFunction -> HashFunction -> Bool
Eq)

-- | Multicodec code for each hash function.
hashCode :: HashFunction -> Word64
hashCode :: HashFunction -> Word64
hashCode HashFunction
Identity = Word64
0x00
hashCode HashFunction
SHA256 = Word64
0x12

-- | Lookup hash function from multicodec code.
fromHashCode :: Word64 -> Either String HashFunction
fromHashCode :: Word64 -> Either String HashFunction
fromHashCode Word64
0x00 = HashFunction -> Either String HashFunction
forall a b. b -> Either a b
Right HashFunction
Identity
fromHashCode Word64
0x12 = HashFunction -> Either String HashFunction
forall a b. b -> Either a b
Right HashFunction
SHA256
fromHashCode Word64
c = String -> Either String HashFunction
forall a b. a -> Either a b
Left (String -> Either String HashFunction)
-> String -> Either String HashFunction
forall a b. (a -> b) -> a -> b
$ String
"decodeMultihash: unknown hash function code 0x" String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Word64 -> ShowS
forall a. Integral a => a -> ShowS
showHex Word64
c String
""

-- | Encode data as a multihash.
-- For Identity: stores raw bytes. For SHA256: hashes first, stores digest.
encodeMultihash :: HashFunction -> ByteString -> ByteString
encodeMultihash :: HashFunction -> ByteString -> ByteString
encodeMultihash HashFunction
hf ByteString
input =
  let digest :: ByteString
digest = case HashFunction
hf of
        HashFunction
Identity -> ByteString
input
        HashFunction
SHA256 -> Digest SHA256 -> ByteString
forall bin bout.
(ByteArrayAccess bin, ByteArray bout) =>
bin -> bout
convert (ByteString -> Digest SHA256
forall ba a.
(ByteArrayAccess ba, HashAlgorithm a) =>
ba -> Digest a
hash ByteString
input :: Digest SHA256)
   in Word64 -> ByteString
encodeUvarint (HashFunction -> Word64
hashCode HashFunction
hf)
        ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Word64 -> ByteString
encodeUvarint (Int -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (ByteString -> Int
BS.length ByteString
digest))
        ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
digest

-- | Decode a multihash into its hash function and digest/data.
-- Note: does not reject trailing bytes after the digest.
decodeMultihash :: ByteString -> Either String (HashFunction, ByteString)
decodeMultihash :: ByteString -> Either String (HashFunction, ByteString)
decodeMultihash ByteString
bs = do
  (code, rest1) <- ByteString -> Either String (Word64, ByteString)
decodeUvarint ByteString
bs
  hf <- fromHashCode code
  (len, rest2) <- decodeUvarint rest1
  let digestLen = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
len :: Int
  if BS.length rest2 < digestLen
    then Left $ "decodeMultihash: expected " <> show digestLen <> " bytes but got " <> show (BS.length rest2)
    else Right (hf, BS.take digestLen rest2)

-- | Validate a multihash strictly: decode, check digest length constraints,
-- and reject trailing bytes.
validateMultihash :: ByteString -> Either String (HashFunction, ByteString)
validateMultihash :: ByteString -> Either String (HashFunction, ByteString)
validateMultihash ByteString
bs = do
  (code, rest1) <- ByteString -> Either String (Word64, ByteString)
decodeUvarint ByteString
bs
  hf <- fromHashCode code
  (len, rest2) <- decodeUvarint rest1
  let digestLen = Word64 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word64
len :: Int
  if BS.length rest2 < digestLen
    then Left $ "validateMultihash: expected " <> show digestLen <> " bytes but got " <> show (BS.length rest2)
    else do
      let digest = Int -> ByteString -> ByteString
BS.take Int
digestLen ByteString
rest2
          trailing = Int -> ByteString -> ByteString
BS.drop Int
digestLen ByteString
rest2
      if not (BS.null trailing)
        then Left $ "validateMultihash: " <> show (BS.length trailing) <> " trailing bytes"
        else case hf of
          HashFunction
SHA256
            | Int
digestLen Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
32 ->
                String -> Either String (HashFunction, ByteString)
forall a b. a -> Either a b
Left (String -> Either String (HashFunction, ByteString))
-> String -> Either String (HashFunction, ByteString)
forall a b. (a -> b) -> a -> b
$ String
"validateMultihash: SHA-256 digest must be 32 bytes, got " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
digestLen
          HashFunction
Identity
            | Int
digestLen Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
42 ->
                String -> Either String (HashFunction, ByteString)
forall a b. a -> Either a b
Left (String -> Either String (HashFunction, ByteString))
-> String -> Either String (HashFunction, ByteString)
forall a b. (a -> b) -> a -> b
$ String
"validateMultihash: Identity digest must be ≤42 bytes, got " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Int -> String
forall a. Show a => a -> String
show Int
digestLen
          HashFunction
_ -> (HashFunction, ByteString)
-> Either String (HashFunction, ByteString)
forall a b. b -> Either a b
Right (HashFunction
hf, ByteString
digest)