×
AI > 人工智能 > 详情

mysqlbinlog闪回数据

发布时间:2020-05-22 发布时间:
|

  MySQL利用binlog恢复误操作数据

  在人工手动进行一些数据库写操作的时候(比方说数据订正),尤其是一些不可控的批量更新或删除,通常都建议备份后操作。不过不怕万一,就怕一万,有备无患总是好的。在线上或者测试环境误操作导致数据被删除或者更新后,想要恢复,一般有两种方法。

  方法一、利用最近的全量备份+增量binlog备份,恢复到误操作之前的状态,但是随着数据量的增大,binlog的增多,恢复起来很费时。

  方法二、如果binlog的格式为row,那么就可以将binlog解析出来生成反向的原始SQL

  以下是利用方法二写的一个python脚本binlog_rollback.py,可利用此脚本生成反向的原始SQL。

  说明:

  0、前提是binlog的格式为row

  1、要恢复的表操作前后表结构没有发生变更,否则脚本无法解析

  2、只生成DML(insert/update/delete)的rollback语句

  3、最终生成的SQL是逆序的,所以最新的DML会生成在输入文件的最前面,并且带上了时间戳和偏移点,方便查找目标

  4、需要提供一个连接MySQL的只读用户,主要是为了获取表结构

  5、如果binlog过大,建议带上时间范围,也可以指定只恢复某个库的SQL

  6、SQL生成后,请务必在测试环境上测试恢复后再应用到线上

  脚本代码

  #!/bin/env python

  # -*- coding:utf-8 -*-

  import os,sys,re,getopt

  import MySQLdb

  host = ‘127.0.0.1’

  user = ‘’

  password = ‘’

  port = 3306

  start_datetime = ‘1971-01-01 00:00:00’

  stop_datetime = ‘2037-01-01 00:00:00’

  start_position = ‘4’

  stop_position = ‘18446744073709551615’

  database = ‘’

  mysqlbinlog_bin = ‘mysqlbinlog -v’

  binlog = ‘’

  fileContent = ‘’

  output=‘rollback.sql’

  only_primary = 0

  # ----------------------------------------------------------------------------------------

  # 功能:获取参数,生成相应的binlog解析文件

  # ----------------------------------------------------------------------------------------

  def getopts_parse_binlog():

  global host

  global user

  global password

  global port

  global fileContent

  global output

  global binlog

  global start_datetime

  global stop_datetime

  global start_position

  global stop_position

  global database

  global only_primary

  try:

  options, args = getopt.getopt(sys.argv[1:], “f:o:h:u:p:P:d:”, [“help”,“binlog=”,“output=”,“host=”,“user=”,“password=”,“port=”,“start-datetime=”, \

  “stop-datetime=”,“start-position=”,“stop-position=”,“database=”,“only-primary=”])

  except getopt.GetoptError:

  print “参数输入有误!!!!!”

  options = []

  if options == [] or options[0][0] in (“--help”):

  usage()

  sys.exit()

  print “正在获取参数。..。.”

  for name, value in options:

  if name == “-f” or name == “--binlog”:

  binlog = value

  if name == “-o” or name == “--output”:

  output = value

  if name == “-h” or name == “--host”:

  host = value

  if name == “-u” or name == “--user”:

  user = value

  if name == “-p” or name == “--password”:

  password = value

  if name == “-P” or name == “--port”:

  port = value

  if name == “--start-datetime”:

  start_datetime = value

  if name == “--stop-datetime”:

  stop_datetime = value

  if name == “--start-position”:

  start_position = value

  if name == “--stop-position”:

  stop_position = value

  if name == “-d” or name == “--database”:

  database = value

  if name == “--only-primary” :

  only_primary = value

  if binlog == ‘’ :

  print “错误:请指定binlog文件名!”

  usage()

  if user == ‘’ :

  print “错误:请指定用户名!”

  usage()

  if password == ‘’ :

  print “错误:请指定密码!”

  usage()

  if database 《》 ‘’ :

  condition_database = “--database=” + “‘” + database + “’”

  else:

  condition_database = ‘’

  print “正在解析binlog.。..。”

  fileContent=os.popen(“%s %s --base64-output=DECODE-ROWS --start-datetime=‘%s’ --stop-datetime=‘%s’ --start-position=‘%s’ --stop-position=‘%s’ %s\

  |grep ‘###’ -B 2|sed -e ‘s/### //g’ -e ‘s/^INSERT/##INSERT/g’ -e ‘s/^UPDATE/##UPDATE/g’ -e ‘s/^DELETE/##DELETE/g’ ” \

  %(mysqlbinlog_bin,binlog,start_datetime,stop_datetime,start_position,stop_position,condition_database)).read()

  #print fileContent

  # ----------------------------------------------------------------------------------------

  # 功能:初始化binlog里的所有表名和列名,用全局字典result_dict来储存每个表有哪些列

  # ----------------------------------------------------------------------------------------

  def init_col_name():

  global result_dict

  global pri_dict

  global fileContent

  result_dict = {}

  pri_dict = {}

  table_list = re.findall(‘`.*`\\。`.*`’,fileContent)

  table_list = list(set(table_list))

  #table_list 为所有在这段binlog里出现过的表

  print “正在初始化列名。..。.”

  for table in table_list:

  sname = table.split(‘。’)[0].replace(‘`’,‘’)

  tname = table.split(‘。’)[1].replace(‘`’,‘’)

  #连接数据库获取列和列id

  try:

  conn = MySQLdb.connect(host=host,user=user,passwd=password,port=int(port))

  cursor = conn.cursor()

  cursor.execute(“select ordinal_position,column_name \

  from information_schema.columns \

  where table_schema=‘%s’ and table_name=‘%s’ ” %(sname,tname))

  result=cursor.fetchall()

  if result == () :

  print ‘Warning:’+sname+‘。’+tname+‘已删除’

  #sys.exit()

  result_dict[sname+‘。’+tname]=result

  cursor.execute(“select ordinal_position,column_name \

  from information_schema.columns \

  where table_schema=‘%s’ and table_name=‘%s’ and column_key=‘PRI’ ” %(sname,tname))

  pri=cursor.fetchall()

  #print pri

  pri_dict[sname+‘。’+tname]=pri

  cursor.close()

  conn.close()

  except MySQLdb.Error, e:

  try:

  print “Error %d:%s” % (e.args[0], e.args[1])

  except IndexError:

  print “MySQL Error:%s” % str(e)

  sys.exit()

  #print result_dict

  #print pri_dict

  # ----------------------------------------------------------------------------------------

  # 功能:拼凑回滚sql,逆序

  # ----------------------------------------------------------------------------------------

  def gen_rollback_sql():

  global only_primary

  fileOutput = open(output, ‘w’)

  #先将文件根据‘--’分块,每块代表一个sql

  area_list=fileContent.split(‘--\n’)

  #逆序读取分块

  print “正在开始拼凑sql.。..。”

  for area in area_list[::-1]:

  #由于一条sql可能影响多行,每个sql又可以分成多个逐条执行的sql

  sql_list = area.split(‘##’)

  #先将pos点和timestamp传入输出文件中

  for sql_head in sql_list[0].splitlines():

  sql_head = ‘#’+sql_head+‘\n’

  fileOutput.write(sql_head)

  #逐条sql进行替换更新,逆序

  for sql in sql_list[::-1][0:-1]:

  try:

  if sql.split()[0] == ‘INSERT’:

  rollback_sql = re.sub(‘^INSERT INTO’, ‘DELETE FROM’, sql, 1)

  rollback_sql = re.sub(‘SET\n’, ‘WHERE\n’, rollback_sql, 1)

  tablename_pos = 2

  table_name = rollback_sql.split()[tablename_pos].replace(‘`’, ‘’)

  # 获取该sql中的所有列

  col_list = sorted(list(set(re.findall(‘@\d+’, rollback_sql))))

  # 因为第一个列前面没有逗号或者and,所以单独替换

  rollback_sql = rollback_sql.replace(‘@1=’, result_dict[table_name][0][1]+‘=’)

  for col in col_list[1:]:

  i = int(col[1:]) - 1

  rollback_sql = rollback_sql.replace(col+‘=’, ‘AND ’ + result_dict[table_name][i][1]+‘=’,1)

  # 如果only_primary开启且存在主键,where条件里就只列出主键字段

  if int(only_primary) == 1 and pri_dict[table_name] 《》 ():

  sub_where = ‘’

  for primary in pri_dict[table_name]:

  primary_name = primary[1]

  for condition in rollback_sql.split(‘WHERE’, 1)[1].splitlines():

  if re.compile(‘^\s*’+primary_name).match(condition) or re.compile(‘^\s*AND\s*’+primary_name).match(condition):

  sub_where = sub_where + condition + ‘\n’

  sub_where = re.sub(‘^\s*AND’, ‘’, sub_where, 1)

  rollback_sql = rollback_sql.split(‘WHERE’, 1)[0] + ‘WHERE\n’ + sub_where

  if sql.split()[0] == ‘UPDATE’:

  rollback_sql = re.sub(‘SET\n’, ‘#SET#\n’, sql, 1)

  rollback_sql = re.sub(‘WHERE\n’, ‘SET\n’, rollback_sql, 1)

  rollback_sql = re.sub(‘#SET#\n’, ‘WHERE\n’, rollback_sql, 1)

  tablename_pos = 1

  table_name = rollback_sql.split()[tablename_pos].replace(‘`’, ‘’)

  # 获取该sql中的所有列

  col_list = sorted(list(set(re.findall(‘@\d+’, rollback_sql))))

  # 因为第一个列前面没有逗号或者and,所以单独替换

  rollback_sql = rollback_sql.replace(‘@1=’, result_dict[table_name][0][1] + ‘=’)

  for col in col_list[1:]:

  i = int(col[1:]) - 1

  rollback_sql = rollback_sql.replace(col+‘=’, ‘,’ + result_dict[table_name][i][1]+‘=’, 1).replace(col+‘=’,‘AND ’ +result_dict[table_name][i][1]+‘=’)

  # 如果only_primary开启且存在主键,where条件里就只列出主键字段

  if int(only_primary) == 1 and pri_dict[table_name] 《》 ():

  sub_where = ‘’

  for primary in pri_dict[table_name]:

  primary_name = primary[1]

  for condition in rollback_sql.split(‘WHERE’, 1)[1].splitlines():

  if re.compile(‘^\s*’ + primary_name).match(condition) or re.compile(‘^\s*AND\s*’+primary_name).match(condition):

  sub_where = sub_where + condition + ‘\n’

  sub_where = re.sub(‘^\s*AND’, ‘’, sub_where, 1)

  rollback_sql = rollback_sql.split(‘WHERE’, 1)[0] + ‘WHERE\n’ + sub_where

  if sql.split()[0] == ‘DELETE’:

  rollback_sql = re.sub(‘^DELETE FROM’, ‘INSERT INTO’, sql, 1)

  rollback_sql = re.sub(‘WHERE\n’, ‘SET\n’, rollback_sql, 1)

  tablename_pos = 2

  table_name = rollback_sql.split()[tablename_pos].replace(‘`’, ‘’)

  # 获取该sql中的所有列

  col_list = sorted(list(set(re.findall(‘@\d+’, rollback_sql))))

  # 因为第一个列前面没有逗号或者and,所以单独替换

  rollback_sql = rollback_sql.replace(‘@1=’, result_dict[table_name][0][1] + ‘=’)

  for col in col_list[1:]:

  i = int(col[1:]) - 1

  rollback_sql = rollback_sql.replace(col+‘=’, ‘,’ + result_dict[table_name][i][1]+‘=’,1)

  rollback_sql = re.sub(‘\n$’,‘;\n’,rollback_sql)

  #print rollback_sql

  fileOutput.write(rollback_sql)

  except IndexError,e:

  print “Error:%s” % str(e)

  sys.exit()

  print “done!”

  def usage():

  help_info=“”“==========================================================================================

  Command line options :

  --help # OUT : print help info

  -f, --binlog # IN : binlog file. (required)

  -o, --outfile # OUT : output rollback sql file. (default ‘rollback.sql’)

  -h, --host # IN : host. (default ‘127.0.0.1’)

  -u, --user # IN : user. (required)

  -p, --password # IN : password. (required)

  -P, --port # IN : port. (default 3306)

  --start-datetime # IN : start datetime. (default ‘1970-01-01 00:00:00’)

  --stop-datetime # IN : stop datetime. default ‘2070-01-01 00:00:00’

  --start-position # IN : start position. (default ‘4’)

  --stop-position # IN : stop position. (default ‘18446744073709551615’)

  -d, --database # IN : List entries for just this database (No default value)。

  --only-primary # IN : Only list primary key in where condition (default 0)

  Sample :

  shell》 python binlog_rollback.py -f ‘mysql-bin.000001’ -o ‘/tmp/rollback.sql’ -h 192.168.0.1 -u ‘user’ -p ‘pwd’ -P 3307 -d dbname

  ==========================================================================================”“”

  print help_info

  sys.exit()

  if __name__ == ‘__main__’:

  getopts_parse_binlog()

  init_col_name()

  gen_rollback_sql()

  演示

  

  #首先创建一个只读账号

  root:test》 grant select on *.* to ‘query’@‘127.0.0.1’ identified by ‘123456’;

  Query OK, 0 rows affected, 1 warning (0.01 sec)

  #测试表结构如下

  root:test》 CREATE TABLE `table1` (

  -》 `id` int(11) NOT NULL AUTO_INCREMENT,

  -》 `c1` int(11) DEFAULT NULL,

  -》 `c2` varchar(20) DEFAULT NULL,

  -》 `c3` int(11) DEFAULT NULL,

  -》 PRIMARY KEY (`id`)

  -》 );

  Query OK, 0 rows affected (0.09 sec)

  #插入三条数据

  root:test》 insert into table1(c1,c2,c3) values (1,‘a’,1),(2,‘b’,2),(3,‘c’,3);

  Query OK, 3 rows affected (0.01 sec)

  Records: 3 Duplicates: 0 Warnings: 0

  root:test》 select * from table1;

  +----+------+------+------+

  | id | c1 | c2 | c3 |

  +----+------+------+------+

  | 1 | 1 | a | 1 |

  | 2 | 2 | b | 2 |

  | 3 | 3 | c | 3 |

  +----+------+------+------+

  3 rows in set (0.00 sec)

  #更新一条数据

  root:test》 update table1 set c3=10 where id=3;

  Query OK, 1 row affected (0.01 sec)

  Rows matched: 1 Changed: 1 Warnings: 0

  root:test》 select * from table1;

  +----+------+------+------+

  | id | c1 | c2 | c3 |

  +----+------+------+------+

  | 1 | 1 | a | 1 |

  | 2 | 2 | b | 2 |

  | 3 | 3 | c | 10 |

  +----+------+------+------+

  3 rows in set (0.00 sec)

  #删除一条数据

  root:test》 delete from table1 where id=1;

  Query OK, 1 row affected (0.01 sec)

  root:test》 select * from table1;

  +----+------+------+------+

  | id | c1 | c2 | c3 |

  +----+------+------+------+

  | 2 | 2 | b | 2 |

  | 3 | 3 | c | 10 |

  +----+------+------+------+

  2 rows in set (0.00 sec)

  

  接下来利用脚本来生成反向SQL

  [root@diandi ~]# python binlog_rollback.py -f /log/mysql/bin/mysql-bin.000002 -o rollback.sql -u query -p 123456 --start-datetime=‘2016-10-28 00:00:00’ -d test

  正在获取参数。..。.

  正在解析binlog.。..。

  正在初始化列名。..。.

  正在开始拼凑sql.。..。

  done!

  

  查看反向SQL,最新的DML会生成在输入文件的最前面

  [root@diandi ~]# cat rollback.sql

  ## at 155848

  ##161028 17:07:10 server id 22100 end_log_pos 155898 CRC32 0x5000bca7 Delete_rows: table id 351 flags: STMT_END_F

  INSERT INTO `test`。`table1`

  SET

  id=1

  ,c1=1

  ,c2=‘a’

  ,c3=1;

  ## at 155560

  ##161028 17:04:56 server id 22100 end_log_pos 155626 CRC32 0x11d91e2d Update_rows: table id 351 flags: STMT_END_F

  UPDATE `test`。`table1`

  SET

  id=3

  ,c1=3

  ,c2=‘c’

  ,c3=3

  WHERE

  id=3

  AND c1=3

  AND c2=‘c’

  AND c3=10;

  ## at 155258

  ##161028 16:59:31 server id 22100 end_log_pos 155338 CRC32 0x3978c1c1 Write_rows: table id 351 flags: STMT_END_F

  DELETE FROM `test`。`table1`

  WHERE

  id=3

  AND c1=3

  AND c2=‘c’

  AND c3=3;

  DELETE FROM `test`。`table1`

  WHERE

  id=2

  AND c1=2

  AND c2=‘b’

  AND c3=2;

  DELETE FROM `test`。`table1`

  WHERE

  id=1

  AND c1=1

  AND c2=‘a’

  AND c3=1;

  

  执行回滚操作

  #直接source整个文件,table1将恢复到原来的空表状态(实际情况,在测试环境上按需索取,然后再恢复线上)

  root:test》 source /root/rollback.sql

  Query OK, 1 row affected (0.01 sec)

  Query OK, 1 row affected (0.01 sec)

  Rows matched: 1 Changed: 1 Warnings: 0

  Query OK, 1 row affected (0.01 sec)

  Query OK, 1 row affected (0.01 sec)

  Query OK, 1 row affected (0.01 sec)

  root:test》 select * from table1;

  Empty set (0.00 sec)

  

  具体的参数使用方法如下:

  [root@diandi ~]# python binlog_rollback.py

  ==========================================================================================

  Command line options :

  --help # OUT : print help info

  -f, --binlog # IN : binlog file. (required)

  -o, --outfile # OUT : output rollback sql file. (default ‘rollback.sql’)

  -h, --host # IN : host. (default ‘127.0.0.1’)

  -u, --user # IN : user. (required)

  -p, --password # IN : password. (required)

  -P, --port # IN : port. (default 3306)

  --start-datetime # IN : start datetime. (default ‘1970-01-01 00:00:00’)

  --stop-datetime # IN : stop datetime. default ‘2070-01-01 00:00:00’

  --start-position # IN : start position. (default ‘4’)

  --stop-position # IN : stop position. (default ‘18446744073709551615’)

  -d, --database # IN : List entries for just this database (No default value)。

  --only-primary # IN : Only list primary key in where condition (default 0)

  Sample :

  shell》 python binlog_rollback.py -f ‘mysql-bin.000001’ -o ‘/tmp/rollback.sql’ -h 192.168.0.1 -u ‘user’ -p ‘pwd’ -P 3307 -d dbname


『本文转载自网络,版权归原作者所有,如有侵权请联系删除』

热门文章 更多
假如用上AI出狱追踪技术.江西连环杀人案疑凶曾春亮会不会重新犯罪?