PostgreSQL的分区表建立

12年前

在数据库日渐庞大的时候,为了方便对数据库数据的管理,比如按时间,按地区去统计一些数据时,基数过于庞大,多有不便。很多商业数据库都提供分区的概念,按不同的维度去存放数据,便于后期的管理,PG也不例外。下面是分区表创建步骤:

1.建立主表

create table parent_table(          id int,          name character varying(20),          create_time timestamp without time zone);

2.建立子表,继承于主表

create table parent_table_2012_01(  check (create_time>=date '2012-01-01' and create_time<date '2012-02-01'))  inherits(parent_table);

create table parent_table_2012_02( check (create_time>=date '2012-02-01' and create_time<date '2012-03-01')) inherits(parent_table);

create table parent_table_2012_03( check (create_time>=date '2012-03-01' and create_time<date '2012-04-01')) inherits(parent_table);

create table parent_table_2012_04( check (create_time>=date '2012-04-01' and create_time<date '2012-05-01')) inherits(parent_table);

create table parent_table_2012_05( check (create_time>=date '2012-05-01' and create_time<date '2012-06-01')) inherits(parent_table);

create table parent_table_2012_06( check (create_time>=date '2012-06-01' and create_time<date '2012-07-01')) inherits(parent_table);

create table parent_table_2012_07( check (create_time>=date '2012-07-01' and create_time<date '2012-08-01')) inherits(parent_table);

create table parent_table_2012_08( check (create_time>=date '2012-08-01' and create_time<date '2012-09-01')) inherits(parent_table);

create table parent_table_2012_09( check (create_time>=date '2012-09-01' and create_time<date '2012-10-01')) inherits(parent_table);

create table parent_table_2012_10( check (create_time>=date '2012-10-01' and create_time<date '2012-11-01')) inherits(parent_table);

create table parent_table_2012_11( check (create_time>=date '2012-11-01' and create_time<date '2012-12-01')) inherits(parent_table);

create table parent_table_2012_12( check (create_time>=date '2012-12-01' and create_time<date '2013-01-01')) inherits(parent_table);

3.创建触发器函数

CREATE OR REPLACE FUNCTION test.tri_parent_tab_insert()       RETURNS TRIGGER AS $$       --author: kenyon    --created:2012-05-24    BEGIN          IF ( NEW.create_time >= DATE '2012-01-01' AND               NEW.create_time < DATE '2012-02-01' ) THEN              INSERT INTO test.parent_table_2012_01 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-02-01' AND                  NEW.create_time < DATE '2012-03-01' ) THEN              INSERT INTO test.parent_table_2012_02 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-03-01' AND                  NEW.create_time < DATE '2012-04-01' ) THEN              INSERT INTO test.parent_table_2012_03 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-04-01' AND                  NEW.create_time < DATE '2012-05-01' ) THEN              INSERT INTO test.parent_table_2012_04 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-05-01' AND                  NEW.create_time < DATE '2012-06-01' ) THEN              INSERT INTO test.parent_table_2012_05 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-06-01' AND                  NEW.create_time < DATE '2012-07-01' ) THEN              INSERT INTO test.parent_table_2012_06 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-07-01' AND                  NEW.create_time < DATE '2012-08-01' ) THEN              INSERT INTO test.parent_table_2012_07 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-08-01' AND                  NEW.create_time < DATE '2012-09-01' ) THEN              INSERT INTO test.parent_table_2012_08 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-09-01' AND                  NEW.create_time < DATE '2012-10-01' ) THEN              INSERT INTO test.parent_table_2012_09 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-10-01' AND                  NEW.create_time < DATE '2012-11-01' ) THEN              INSERT INTO test.parent_table_2012_10 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-11-01' AND                  NEW.create_time < DATE '2012-12-01' ) THEN              INSERT INTO test.parent_table_2012_11 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSIF ( NEW.create_time >= DATE '2012-12-01' AND                  NEW.create_time < DATE '2013-01-01' ) THEN              INSERT INTO test.parent_table_2012_12 VALUES (NEW.id,NEW.name,NEW.create_time);           ELSE              RAISE EXCEPTION 'Date out of range.Fix the test.parent_table_insert_trigger() function!';           END IF;           RETURN NULL;       END;       $$       LANGUAGE plpgsql; 

4.创建触发器

CREATE TRIGGER tri_insert_parent_table          BEFORE INSERT ON test.parent_table           FOR EACH ROW EXECUTE PROCEDURE test.tri_parent_tab_insert(); 

5.测试
至此就OK了。前端插入时只要插入主表就可以自动将数据按时间分类分插到子表里去。
插入一定的测试数据,来看看效果

kenyon=# select count(1) from test.parent_table_2012_03;      count      ---------     2293760    (1 row)

kenyon=# select count(1) from test.parent_table;   count  ---------  2293761 (1 row)

kenyon=# select pg_size_pretty(pg_relation_size('test.parent_table_2012_03'));  pg_size_pretty ----------------  106 MB (1 row)

kenyon=# select pg_size_pretty(pg_relation_size('test.parent_table'));  pg_size_pretty ----------------  8192 bytes

(1 row)

PS:可以看到实际的数据是存放在子表里去了,父表是没数据的。这么做前端开发会省去不少工作,但是后端DB会增加不少压力,可以后端建好分区表,前端直接按时间插入分区表中去,可减少因触发器带来的DB压力。
可以单独对分区表进行DML或者DDL操作,如truncate