AVALON 零散的一些题目 一道油田ctf比赛的算法逆向题解

一道油田ctf比赛的算法逆向题解

环境配置

系统 : Linux ubuntu 4.4.0-116-generic \ win10 64bit
程序 : encry_algo
要求 : 输入口令
使用工具 :ida \ pysm4

开始分析

静态分析

使用ida载入程序:

signed int __cdecl sub_804934A(int a1)
{
  int v1; // eax
  //省略部分代码
  qmemcpy(&v8, "Please input your flag: ", 0x190u);
  v1 = sub_805C180(&v8);
  printf_806E0A0(1, (int)&v8, v1);
  scanf_806E030(0, (int)&my_str, 16);
  sub_8049076(&v7, (int)&v12);
  sub_804911A((int)&v7, 1, 16, (char *)&my_str, (char *)&v48);
  for ( index = 0; index <= 15; ++index )
  {
    if ( *((_BYTE *)&v48 + index) != *(&v32 + index) )
    {
      v2 = sub_805C180(&v10);
      printf_806E0A0(1, (int)&v10, v2);
      result = 1;
      goto LABEL_7;
    }
  }
  v4 = sub_805C180(&v9);
  printf_806E0A0(1, (int)&v9, v4);
  v5 = sub_805C180(&v11);
  printf_806E0A0(1, (int)&v11, v5);
  result = 0;
LABEL_7:
  if ( __readgsdword(0x14u) != v52 )
    sub_806FFB0();
  return result;
}

可知sub_804911A函数是处理my_str的关键函数,进入其中看一下:

int __cdecl sub_804911A(int a1, int a2, int a3, char *my_str, char *buf)
{
  int result; // eax

  while ( a3 > 0 )
  {
    result = sub_8048D31(a1 + 4, my_str, buf);
    my_str += 16;
    buf += 16;
    a3 -= 0x10;
  }
  return result;
}

程序结构有考虑到较长数据的处理,但这里a3为0x10,所以循环体只执行一次。

还原算法

根据动态调试的结果,可以将算法还原如下:

from libnum import n2s,s2n

f = open("./table_o")
table = f.read()
f.close()

f = open("./0x100data_o")
large_data = f.read()
f.close()

def circular_shift_left (int_value,k,bit = 8):
    bit_string = '{:0%db}' % bit
    bin_value = bit_string.format(int_value) # 8 bit binary
    bin_value = bin_value[k:] + bin_value[:k]
    int_value = int(bin_value,2)
    return int_value

# right circular shift

def circular_shift_right (int_value,k,bit = 8):
    bit_string = '{:0%db}' % bit
    bin_value = bit_string.format(int_value) # 8 bit binary
    bin_value = bin_value[-k:] + bin_value[:-k]
    int_value = int(bin_value,2)
    return int_value

def sub_80488A4(a1):
    i2 = a1 >> 0x18 & 0xFF
    v2 = ord(large_data[i2])
    i3 = a1 >> 0x10 & 0xFF
    v3 = ord(large_data[i3])
    i4 = a1 >> 0x8 & 0xFF
    v4 = ord(large_data[i4])
    v5 = (v4 << 8) | (v3 << 16) | (v2 << 24) | ord(large_data[a1 & 0xFF])
    res = circular_shift_right(v5,0xE,32) ^ v5 ^ circular_shift_left(v5,2,32) ^ circular_shift_left(v5,10,32) ^ circular_shift_right(v5,8,32)
    return res

def magic_fun(a1,a2,a3,a4,a5):
    return a1 ^ sub_80488A4(a5 ^ a4 ^ a3 ^ a2)
'''
flag = [BitVec("flag%d"%i,32) for i in range(4)]

a1 = flag[0]
a2 = flag[1]
a3 = flag[2]
a4 = flag[3]
'''
a1 = s2n("0123")
a2 = s2n("4567")
a3 = s2n("89ab")
a4 = s2n("cdef")

res_list = []

for index in range(0,32):
    a5 = int(s2n(str(table[index*4:index*4+4][::-1])))
    tmp = magic_fun(a1,a2,a3,a4,a5)
    res_list.append(tmp)
    a1 = a2
    a2 = a3
    a3 = a4
    a4 = tmp

y1 = 0x86972826
y2 = 0x86241DB9
y3 = 0x2649E50D
y4 = 0x676E9072

print hex(res_list[-1])
print hex(res_list[-2])
print hex(res_list[-3])
print hex(res_list[-4])

这一步就是把算法抄下来,熟悉一下结构顺便看看有没有什么突破点可以写出逆算法。但实际做题中不一定要这么做,调试一下熟悉算法步骤即可。

识别算法

把算法抄了一遍也没发现什么破绽,只好乖乖再看看程序其他的部分:

  printf_806E0A0(1, (int)&v8, v1);
  scanf_806E030(0, (int)&my_str, 16);
  sub_8049076(&v7, (int)&v12);
  sub_804911A((int)&v7, 1, 16, (char *)&my_str, (char *)&v48);

进入scanf和加密函数之间的那个函数:

unsigned int __cdecl sub_8049076(_DWORD *a1, int a2)
{
  *a1 = 1;
  return sub_8048AA3((int)(a1 + 1), (unsigned int *)a2);
}

继续跟:

unsigned int __cdecl sub_8048AA3(int a1, unsigned int *a2)
{
  unsigned int v2; // ST30_4
  unsigned int v3; // ST34_4
  unsigned int v4; // ST38_4
  int v5; // esi
  unsigned int result; // eax
  unsigned int v7; // et1
  unsigned int v8; // [esp+18h] [ebp-B0h]
  unsigned int v9; // [esp+2Ch] [ebp-9Ch]
  unsigned int v10; // [esp+30h] [ebp-98h]
  unsigned int v11; // [esp+34h] [ebp-94h]
  unsigned int v12; // [esp+38h] [ebp-90h]
  unsigned int v13; // [esp+BCh] [ebp-Ch]

  v13 = __readgsdword(0x14u);
  v8 = 0;
  v2 = _byteswap_ulong(a2[1]);
  v3 = _byteswap_ulong(a2[2]);
  v4 = _byteswap_ulong(a2[3]);
  v9 = _byteswap_ulong(*a2) ^ 0xA3B1BAC6;
  v10 = v2 ^ 0x56AA3350;
  v11 = v3 ^ 0x677D9197;
  v12 = v4 ^ 0xB27022DC;
  while ( v8 <= 0x1F )
  {
    v5 = *(&v9 + v8);
    *(&v9 + v8 + 4) = v5 ^ sub_80489BD(*(&v9 + v8 + 3) ^ *(&v9 + v8 + 2) ^ *(&v9 + v8 + 1) ^ dword_80BC060[v8]);
    *(_DWORD *)(a1 + 4 * v8) = *(&v9 + v8 + 4);
    ++v8;
  }
  v7 = __readgsdword(0x14u);
  result = v7 ^ v13;
  if ( v7 != v13 )
    sub_806FFB0();
  return result;
}

搜索迷之数字0xA3B1BAC6,发现是sm4算法的常数。

逻辑推理

找一找现成的sm4算法,提取程序中的密码和密文,就按照步骤可以解密数据。

编写脚本

按照以上逻辑,编写脚本如下:

➜  playground python
Python 2.7.12 (default, Nov 12 2018, 14:36:49) 
[GCC 5.4.0 20160609] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from pysm4 import encrypt, decrypt
>>> clear_num = 0x0123456789abcdeffedcba9876543210
>>> mk = 0x0123456789abcdeffedcba9876543210
>>> cipher_num = encrypt(clear_num, mk)
>>> hex(cipher_num)[2:].replace('L', '')
'681edf34d206965e86b3e94f536e4246'
>>> clear_num == decrypt(cipher_num, mk)
True
>>> print 'now decrypt the data.'
now decrypt the data.
>>> print  str(" 26 28 97 86 B9 1D 24 86 0D E5 49 26  72 90 6E 67").replace(" ","")
26289786B91D24860DE5492672906E67
>>> cipher_num = 0x26289786B91D24860DE5492672906E67
>>> clear_num = decrypt(cipher_num, mk)
>>> from libnum import n2s
>>> n2s(clear_num)
'aAa_bBb_cCc_dddd'
>>> exit()

夺旗成功

输入flag,程序提示成功:

➜  playground ./encry_algo 
Please input your flag: aAa_bBb_cCc_dddd
Yes, you got it!
flag{ 'your input' }

参考链接

  1. SM4算法的c++实现 https://www.cnblogs.com/elpsycongroo/p/7819814.html