As learning to be an electronic engineer, my lecturer has requested, based on the original Xtea, to develop the FPGA solution to run 128-bit block size and 128-bit key size cryptography. I have decided to solve it by dividing the 128-bit block into four 32-bit blocks and running as double encryption. I wanted to make sure mathematics was correct first. There is the Python code I have developed. However, the decription process is not correct. Could anyone help me with that?
Python code:
def xtea_encipher_128_modified(num_rounds, v, k):
    y, z, w, x = v
    DELTA = 0x9e3779b9  # (sqrt(5) - 1) * 2**31
    sum = 0
    for _ in range(num_rounds):
        y += ((z << 4) ^ (z >> 5)) + z ^ (sum + k[sum & 3])
        y = y & 0xffffffff  # Ensure y stays within 32-bit bounds
        sum = (sum + DELTA) & 0xffffffff
        z += ((y << 4) ^ (y >> 5)) + y ^ (sum + k[sum >> 11 & 3])
        z = z & 0xffffffff  # Ensure z stays within 32-bit bounds
        w += ((x << 4) ^ (x >> 5)) + x ^ (sum + k[sum & 3])
        w = w & 0xffffffff  # Ensure w stays within 32-bit bounds
        sum = (sum + DELTA) & 0xffffffff
        x += ((w << 4) ^ (w >> 5)) + w ^ (sum + k[sum >> 11 & 3])
        x = x & 0xffffffff  # Ensure x stays within 32-bit bounds
        
    return y, z, w, x
def xtea_decipher_128_modified(num_rounds, v, k):
    y, z, w, x = v
    DELTA = 0x9e3779b9  # (sqrt(5) - 1) * 2**31
    sum = (DELTA * num_rounds) & 0xffffffff
    for _ in range(num_rounds):
        x -= ((w << 4 ^ w >> 5) + w) ^ (sum + k[(sum >> 11) & 3])
        x = x & 0xffffffff  # Ensure x stays within 32-bit bounds
        sum = (sum - DELTA) & 0xffffffff
        w -= ((x << 4 ^ x >> 5) + x) ^ (sum + k[sum & 3])
        w = w & 0xffffffff  # Ensure w stays within 32-bit bounds
        z -= ((y << 4 ^ y >> 5) + y) ^ (sum + k[(sum >> 11) & 3])
        z = z & 0xffffffff  # Ensure z stays within 32-bit bounds
        sum = (sum - DELTA) & 0xffffffff
        y -= ((z << 4 ^ z >> 5) + z) ^ (sum + k[sum & 3])
        y = y & 0xffffffff  # Ensure y stays within 32-bit bounds
    return y, z, w, x
# usage example
plaintext = (0xA5A5A5A5,0x01234567,0xFEDCBA98,0x5A5A5A5A)
key = (0xDEADBEEF,0x01234567,0x89ABCDEF, 0xDEADBEEF)
num_rounds = 32  # number of rounds
ciphertext = xtea_encipher_128_modified(num_rounds, plaintext, key)
print(ciphertext)
plaintext = xtea_decipher_128_modified(num_rounds, ciphertext, key)
print(plaintext)