Textbook-HMAC

MD5, SHA256과 같은 해시에 사용 가능한 공격인 Length extension attack 에 대한 문제

1. 코드 분석

import hashlib
import os

K = os.urandom(500)
flag = open("flag.txt", "r").read()

def HMAC(M):
    return hashlib.md5(K + M).digest()

m1 = b"Dreamhack"
h1 = HMAC(m1)

print(f"HMAC(\"Dreamhack\") = {bytes.hex(h1)}")

m2 = bytes.fromhex(input("Your message: "))
h2 = bytes.fromhex(input("Your hash: "))

if m2 != m1 and h2 == HMAC(m2):
    print(flag)

2. MD5 의사코드 (Pseudocode)

MD5 해시 알고리즘의 개요

MD5(Message Digest Algorithm 5)는 128비트 해시 값을 생성하는 암호화 해시 함수입니다. 이 알고리즘은 네 개의 주요 스테이지로 구성되며, 각 스테이지는 16개의 연산을 포함합니다. 각 연산에서 특정 비트 시프트(rotate left) 양을 사용하여 데이터를 처리합니다.

// : All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
var int s[64], K[64]
var int i

// s specifies the per-round shift amounts
s[ 0..15] := { 7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22,  7, 12, 17, 22 }
s[16..31] := { 5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20,  5,  9, 14, 20 }
s[32..47] := { 4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23,  4, 11, 16, 23 }
s[48..63] := { 6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21,  6, 10, 15, 21 }

// Use binary integer part of the sines of integers (Radians) as constants:
for i from 0 to 63 do
    K[i] := floor(232 × abs(sin(i + 1)))
end for
// (Or just use the following precomputed table):
K[ 0.. 3] := { 0xd76aa478, 0xe8c7b756, 0x242070db, 0xc1bdceee }
...생략...
K[60..63] := { 0xf7537e82, 0xbd3af235, 0x2ad7d2bb, 0xeb86d391 }

// Initialize variables:
//MD5 128 비트의 각 영역이 될 32 비트 4영역 초기화
var int a0 := 0x67452301   // A
var int b0 := 0xefcdab89   // B
var int c0 := 0x98badcfe   // C
var int d0 := 0x10325476   // D

// Pre-processing: adding a single 1 bit
//첫 단일 비트, 1을 메시지 끝부분에 추가함.
append "1" bit to message<    
 // Notice: the input bytes are considered as bit strings,
 //  where the first bit is the most significant bit of the byte.[53]

// Pre-processing: padding with zeros
//메세지 길이가 513 일때 (514비트) 첫번째 블록 512 + 2비트 + 446비트를 0으로 채우고 
// 마지막 64비트는 513에 대한 16진수 입력 (0x0000000000000201)
// 메세지 길이가 63 (64비트)일때, 384 만큼 0으로 채움
// 마지막 64비트 0x000000000000003F
append "0" bit until message length in bits ≡ 448 (mod 512)

// Notice: the two padding steps above are implemented in a simpler way
 //  in implementations that only work with complete bytes: append 0x80
 //  and pad with 0x00 bytes so that the message length in bytes ≡ 56 (mod 64).

append original length in bits mod 264 to message

// Process the message in successive 512-bit chunks:
for each 512-bit chunk of padded message do
    break chunk into sixteen 32-bit words M[j], 0 ≤ j ≤ 15
    // Initialize hash value for this chunk:
    var int A := a0
    var int B := b0
    var int C := c0
    var int D := d0
    //M,A,B,C,D 모두 32비트
    //Mi는 입력 메시지의 32-비트 블록
    // Main loop:
    for i from 0 to 63 do
        var int F, g
        if 0 ≤ i ≤ 15 then
            F := (B and C) or ((not B) and D)
            g := i
        else if 16 ≤ i ≤ 31 then
            F := (D and B) or ((not D) and C)
            g := (5×i + 1) mod 16
        else if 32 ≤ i ≤ 47 then
            F := B xor C xor D
            g := (3×i + 5) mod 16
        else if 48 ≤ i ≤ 63 then
            F := C xor (B or (not D))
            g := (7×i) mod 16
        // Be wary of the below definitions of a,b,c,d
        F := F + A + K[i] + M[g]  // M[g] must be a 32-bit block
        A := D
        D := C
        C := B
        B := B + leftrotate(F, s[i])
    end for
    // Add this chunk's hash to result so far:
    a0 := a0 + A
    b0 := b0 + B
    c0 := c0 + C
    d0 := d0 + D
    // 32비트 더하기 연산에서 33비트 이상이 될 경우가 있어 &0XFFFFFFFF 수행
    //MD5 알고리즘의 의사코드는 다음과 같은 단계로 구성됩니다:
    //1 상수와 초기 변수 설정
    //1 메시지 패딩
    //1 512비트 블록 단위로 메시지 처리
    //1 최종 해시 값 생성
    
end for

var char digest[16] := a0 append b0 append c0 append d0 // (Output is in little-endian)

Python으로 변환

import math


def leftrotate(x, c):
    return (x << c) or (x >> (32 - c))


# All variables are unsigned 32 bit and wrap modulo 2^32 when calculating
s = [None] * 64
K = [None] * 64

# s specifies the per-round shift amounts
s[0:16] = [7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22]
s[16:32] = [5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20, 5, 9, 14, 20]
s[32:48] = [4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23]
s[48:64] = [6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21]

# Use binary integer part of the sines of integers (Radians) as constants:
K = [math.floor(math.pow(2, 32) * math.fabs(math.sin(i + 1))) for i in range(64)]

# Initialize variables
a0 = 0x67452301
b0 = 0xefcdab89
c0 = 0x98badcfe
d0 = 0x10325476

msg0 = "The quick brown fox jumps over the lazy dog"
# Converting String to binary
msg = ''.join(format(ord(i), 'b') for i in msg0)

# Pre-processing: adding a single bit
#   The input bytes are considered as bits strings,
#   where the first bit is the most significant bit of the byte
message = list(msg)
message.append("1")

# Pre-processing: padding with zeros
for i in range(447 - len(msg)):
    message.append("0")
for i in range(64):
    message.append("64")

msg = "".join(message)

M = []
for i in range(0, 512, 32):
    M.append("".join(message[i:i + 32]))
# Initialize hash value for this chunk
A = a0
B = b0
C = c0
D = d0

for i in range(64):
    F = 0
    g = 0
    if 0 <= i <= 15:
        F = D ^ (B and (C ^ D))
        g = i
    elif 16 <= i <= 31:
        F = C ^ (D and (B ^ C))
        g = (5 * i + 1) % 16
    elif 32 <= i <= 47:
        F = B ^ C ^ D
        g = (3 * i + 5) % 16
    elif 48 <= i <= 63:
        F = C ^ (B or (not D))
        g = (7 * i) % 16
    # Be wary of the below definitions of a, b, c, d
    F = F + A + K[i] + int(M[g])
    A = D
    D = C
    C = B
    B = B + leftrotate(F, s[i])

a0 += A
b0 += B
c0 += C
d0 += D

print(a0 + b0 + c0 + d0)

3. Length extension attack

# 정의

MD 방식 Hash 함수(MD5, SHA-1, SHA-2) 방식은 블록 단위로 나누어 hash함수를 돌리고 압축함수를 돌린 다음 블록과 이전 작업을 반복하는 특징이 있음.

이때 블록크기를 맞춰야 하기 때문에 Padding을 추가하여 작동함

이점을 이용하여 key 에 대해 몰라도 이미 완성된 hash 함수 sig 와 원본 msg 만 알고 있어도 msg에 패딩이 추가된 새로운 hasg 함수 sig 를 만들 수 있다.

# 이문제 활용 방안

우선,

pad, process, digest, state 의 개념을 명확히 이해하느라 너무 힘들었다.

pad : msg를 512비트 블록화 하는 것

process : 블록화된 pad를 가지고 초기값(state) md5를 구하는 것

==> 이것을 너무 잘 표현한 코드가 이것이다.

for block in blocks:
	state = process(state,block)

512비트 하나하나가 block 이고 (맨앞에 1 값 (비트) 붙이고, 448 (512-64) 비트까지 0으로 채우고, 나머지 64비트에 문자의 길이를 입력한 값) block 들이 모인게 blocks

md5 최초 계산 시 state 는 주어지며, process (16라운드 중 1라운드) 가 끝나면 해당 결과값이 다음라운드의 state 가 됨

그럼 msg2, h2를 구해보자

h1 = md5(msg)

msg1 = bytes(500) + m1(b"Dreamhack")

padding1 = pad(msg1)[len(msg1):] (msg1 의 패딩에서 msg1의 길이를 제외한것이 padding1)

msg2 = pad(msg1)

padding2= pad(msg2)[len(msg2):] (위와 같은 논리)

state2diest(h1)

blocks = split_block(padding2)

//split_block 함수는 패딩이 완료된 64바이트 배수인 바이트 스트링을 64 바이트 블록으로 나누어 리스트 하는 함수

for block in blocks:

state= process(state, block)

h2 = state2digest(state)

state 4개를 리틀엔디언으로 append 하여 md5 해시 값 생성

(m1+padding1) 이 msg2 문제에서 m2가 되고

h2 가 이에 대한 해시값이 된다.

4. Exploit

from pwn import *
from md5 import *

def digest2state(digest):
    return [int.from_bytes(digest[i:i + 4], byteorder='little') for i in range(0, 16, 4)]

def split_block(msg):
    assert len(msg) % 64 == 0
    return [msg[i:i + 64] for i in range(0, len(msg), 64)]

io = remote("host3.dreamhack.games", 24212)

m1 = b"Dreamhack"
io.recvuntil(b"= ")
h1 = bytes.fromhex(io.recvline()[:-1].decode())

msg1 = bytes(500) + m1
padding1 = pad(msg1)[len(msg1):]
# M      = m1                             : 9 bytes
# msg1   = K + m1                         : 509 bytes
# blocks = split_block(K + m1 + padding1) : 576 = 64 * 9 bytes
# padding1                                : 67 bytes

msg2 = pad(msg1)
padding2 = pad(msg2)[len(msg2):]
# M      = m1 + padding1                             : 76 bytes
# msg2   = K + m1 + padding1                         : 576 bytes
# blocks = split_block(K + m1 + padding1 + padding2) : 640 = 64 * 10 bytes
# padding2                                           : 64 = 64 * 1 bytes

blocks = split_block(padding2)

state = digest2state(h1)
for block in blocks:
    state = process(state, block)

h2 = state2digest(state)

io.sendlineafter(b": ", (m1 + padding1).hex().encode())
io.sendlineafter(b": ", h2.hex().encode())

io.interactive()

md5.py

import math

rotate_by = [7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22, 7, 12, 17, 22,
             5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20, 5,  9, 14, 20,
             4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23, 4, 11, 16, 23,
             6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21, 6, 10, 15, 21]


constants = [int(abs(math.sin(i+1)) * 4294967296) & 0xFFFFFFFF for i in range(64)]

initial_state = [0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476]


def pad(msg):
    msg_len_in_bits = (8*len(msg)) & 0xffffffffffffffff
    msg += bytes([0x80])

    while len(msg)%64 != 56:
        msg += bytes([0])

    msg += msg_len_in_bits.to_bytes(8, byteorder='little')
    
    return msg


def process(state, block):
    A, B, C, D = state

    for i in range(64):
        if i < 16:
            func = lambda b, c, d: (b & c) | (~b & d)
            index_func = lambda i: i

        elif i >= 16 and i < 32:
            func = lambda b, c, d: (d & b) | (~d & c)
            index_func = lambda i: (5*i + 1)%16

        elif i >= 32 and i < 48:
            func = lambda b, c, d: b ^ c ^ d
            index_func = lambda i: (3*i + 5)%16
        
        elif i >= 48 and i < 64:
            func = lambda b, c, d: c ^ (b | ~d)
            index_func = lambda i: (7*i)%16

        F = func(B, C, D)
        G = index_func(i)

        to_rotate = (A + F + constants[i] + int.from_bytes(block[4*G : 4*G + 4], byteorder='little')) & 0xFFFFFFFF
        newB = (B + (to_rotate << rotate_by[i] | to_rotate >> (32-rotate_by[i]))) & 0xFFFFFFFF
            
        A, B, C, D = D, newB, B, C

    for i, val in enumerate([A, B, C, D]):
        state[i] += val
        state[i] &= 0xFFFFFFFF

    return state


def state2digest(state):
    digest = b""

    for i in range(4):
        digest += state[i].to_bytes(4, byteorder='little')

    return digest


def md5(msg):
    # 1. Padding
    msg = pad(msg)
    assert len(msg) % 64 == 0


    # 2. Process
    state = initial_state[:]
    blocks = [msg[i:i + 64] for i in range(0, len(msg), 64)]

    for block in blocks:
        state = process(state, block)


    # 3. Finalization
    return state2digest(state)


if __name__ == '__main__':
    import os
    import hashlib

    for _ in range(100):
        msg = os.urandom(1337)
        assert hashlib.md5(msg).digest() == md5(msg)

'Hacking > DreamHack' 카테고리의 다른 글

Master Canary (0)	2024.08.13
md5 password (0)	2024.07.31
Stupid GCC (0)	2024.05.22
textbook-des__crypto (1)	2024.05.07
arm traning v2 (0)	2024.05.07

내 블로그 - 관리자 홈 전환	`Q` `Q`
새 글 쓰기	`W` `W`

글 수정 (권한 있는 경우)	`E` `E`
댓글 영역으로 이동	`C` `C`

이 페이지의 URL 복사	`S` `S`
맨 위로 이동	`T` `T`
티스토리 홈 이동	`H` `H`
단축키 안내	`Shift` + `/` `⇧` + `/`

SurTesterS

Textbook-HMAC

1. 코드 분석

2. MD5 의사코드 (Pseudocode)

3. Length extension attack

4. Exploit

'Hacking > DreamHack' 카테고리의 다른 글

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역

Textbook-HMAC

1. 코드 분석

2. MD5 의사코드 (Pseudocode)

3. Length extension attack

4. Exploit

'Hacking > DreamHack' 카테고리의 다른 글

'Hacking/DreamHack' Related Articles

티스토리툴바

단축키

내 블로그

블로그 게시글

모든 영역