MySql Rand 随机

建立用户及关联表

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
-- 用户表
CREATE TABLE `user` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`type` int(10) unsigned NOT NULL,
`name` varchar(255) NOT NULL DEFAULT '',
PRIMARY KEY (`id`),
KEY `type` (`type`) USING BTREE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

-- 用户映射表
CREATE TABLE `user_map` (
`id` int(10) unsigned NOT NULL AUTO_INCREMENT,
`user_id` int(10) unsigned NOT NULL DEFAULT '0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;

生成测试数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
-- 新建存储过程
DELIMITER $$
CREATE PROCEDURE `user_create` (IN total INT)
BEGIN
DECLARE num int;
DECLARE str VARCHAR(100) DEFAULT 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz1234567890';
DECLARE len int;
declare nickname VARCHAR(20);
set num = 1;

while num <= total DO
set len = 20;
set nickname = '';
while len > 0 DO
set nickname = CONCAT(nickname,SUBSTRING(str,FLOOR(1 + RAND() * 62), 1));
set len = len - 1;
END WHILE;
insert into `user`(type, name)value(FLOOR(RAND() * 4), nickname);
set num = num+1;
end WHILE;
END
$$
DELIMITER;
-- 生成500W条数据
CALL user_create(5000000);

500万数量中随机获取10条

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#第一种 耗时6秒左右
select * from `user` order by rand() limit 100;

#第二种 耗时0.03秒左右,但是id如果是不连续的话,不一定会取出10条数据
select * from `user` u inner JOIN ( select ROUND(RAND() * ((select max(id) - min(id) from `user`) + (select min(id) from `user`))) as rnd ) uu on u.id >= uu.rnd limit 100;

#第三种 建立中间表,中间表id必须连续,耗时0.025左右
SELECT * FROM `user` u
INNER JOIN (
SELECT m.* FROM map m
INNER JOIN (
SELECT ROUND(RAND( ) * ( SELECT MAX( id ) - MIN( id ) FROM map ) + ( SELECT MIN( id ) FROM map ) ) AS rnd FROM map LIMIT 100
) um ON m.id = um.rnd LIMIT 100
) umm ON umm.user_id = u.id;

#第四种 由第二演变而来,执行效率0.025左右, limit 9, 1 保证了id不连续时,尾部确保有10条数据
SELECT * FROM `user` u
INNER JOIN (
SELECT ROUND(
RAND( ) * ( SELECT ( SELECT id FROM `user` ORDER BY id DESC LIMIT 99, 1 ) - min( id ) FROM `user` ) + ( SELECT min( id ) FROM `user` )
) AS rnd
) AS uu ON u.id >= uu.rnd
LIMIT 100;

总结

第一种方法数据量大耗时长,不推荐
第二种方法不一定可以获取10条,不推荐
第三种方法可以确保获取10条,且耗时低,推荐
第四种方法需要建中间表,如果user增加一个type,需要还要不同类型各建立一个map表,灵活性差,不推荐。