概述
分享下最近做的一个mysql大表归档方案,仅供参考。
整体思路
一、明确哪些大表需做归档
1、数据库表概要信息统计
SELECT
t1.table_schema,
t1.table_name,
`ENGINE`,
table_rows,
CAST( data_length / 1024.0 / 1024.0 AS DECIMAL ( 10, 2 ) ) `data_size(M)`,
CAST( index_length / 1024.0 / 1024.0 AS DECIMAL ( 10, 2 ) ) `index_size(M)`,
t2.ct col_count,
t3.ct idx_count,
create_time,
table_comment
FROM
information_schema.TABLES t1
LEFT JOIN -- 字段总数
( SELECT table_name, COUNT( 1 ) ct FROM information_schema.COLUMNS GROUP BY table_name ) t2 ON t1.table_name = t2.table_name
LEFT JOIN -- 索引总数
( SELECT table_name, COUNT( DISTINCT index_name ) ct FROM information_schema.STATISTICS GROUP BY table_name ) t3 ON t1.table_name = t3.table_name
WHERE
t1.table_schema NOT IN ( 'mysql', 'information_schema', 'performance_schema' )
ORDER BY
t1.data_length DESC;
2、整理需备份的大表
二、备份大表
1、表定义
或者用navicat导出表结构
show create table 表名;
2、表主键、外键
--查看表主键信息
SELECT
t.TABLE_NAME,
t.CONSTRAINT_TYPE,
c.COLUMN_NAME,
c.ORDINAL_POSITION
FROM
INFORMATION_SCHEMA.TABLE_CONSTRAINTS AS t,
INFORMATION_SCHEMA.KEY_COLUMN_USAGE AS c
WHERE
t.TABLE_NAME = c.TABLE_NAME
AND t.CONSTRAINT_TYPE = 'PRIMARY KEY'
AND t.TABLE_NAME = '<TABLE_NAME>'
AND t.TABLE_SCHEMA = '<TABLE_SCHEMA>';
--外键定义
SELECT
C.TABLE_SCHEMA,
C.REFERENCED_TABLE_NAME,
C.REFERENCED_COLUMN_NAME,
C.TABLE_NAME,
C.COLUMN_NAME,
C.CONSTRAINT_NAME,
T.TABLE_COMMENT,
R.UPDATE_RULE,
R.DELETE_RULE
FROM
INFORMATION_SCHEMA.KEY_COLUMN_USAGE C
JOIN INFORMATION_SCHEMA.TABLES T ON T.TABLE_NAME = C.TABLE_NAME
JOIN INFORMATION_SCHEMA.REFERENTIAL_CONSTRAINTS R ON R.TABLE_NAME = C.TABLE_NAME
AND R.CONSTRAINT_NAME = C.CONSTRAINT_NAME
AND R.REFERENCED_TABLE_NAME = C.REFERENCED_TABLE_NAME
WHERE
C.REFERENCED_TABLE_NAME = 'TABLE_NAME' or C.TABLE_NAME='TABLE_NAME';
3、表触发器
SELECT * FROM information_schema.triggers where event_object_table='table_name';
show create trigger trigger_schema.trigger_name;
4、表索引
show index from 表名;
三、历史数据归档
1、建立归档库及在归档库中创建归档表
create database pas_arch default character set utf8mb4;
create table pas_arch.t_att_work_hour_report_dmp1 as select * from pas_prod.t_att_work_hour_report where 1=2;
2、归档历史数据
将历史数据插入到归档库中的归档表
insert into pas_arch.t_att_work_hour_report_dmp1 select * from
pas_prod.t_att_work_hour_report where work_date<'2020-01-01 00:00:00'
3、确认数据一致
select count(*) from pas_prod.t_att_work_hour_report where work_date<'2020-01-01 00:00:00';
select count(*) from pas_arch.t_att_work_hour_report_dmp1;
四、表切换
1、在原始库创建中间表
create table pas_prod.t_att_work_hour_report_tmp as select * from pas_prod.t_att_work_hour_report where 1=2;
2、分段insert
insert into pas_prod.t_att_work_hour_report_tmp select * from pas_prod.t_att_work_hour_report where work_date>='2020-01-01 00:00:00' and work_date<'2020-03-13 00:00:00';
insert into pas_prod.t_att_work_hour_report_tmp select * from pas_prod.t_att_work_hour_report where work_date>='2020-03-13 00:00:00' and work_date<'2020-07-13 00:00:00';
commit;
3、切换表
理论上会影响业务一秒钟,建议在业务空闲时间段进行
alter table pas_prod.t_att_work_hour_report rename to pas_prod.t_att_work_hour_report_arch;
alter table pas_prod.t_att_work_hour_report_tmp rename to pas_prod.t_att_work_hour_report;
4、数据补录
insert into t_att_work_hour_report select * from t_att_work_hour_report_arch where work_date>='2020-07-13 00:00:00';
5、依次添加主外键、触发器、约束、索引及授权
6、业务测试
业务测试反馈正常。
上面的添加主外键、触发器、约束、索引及授权步骤可以考虑在切换表之前,理论上影响业务一秒钟。
后面会分享更多devops和DBA方面内容,感兴趣的朋友可以关注下~