Mysql分片后分页排序拉取数据的方法
高并发大流量的互联网架构,一般通过服务层来访问数据库,随着数据量的增大,数据库需要进行水平切分,分库后将数据分布到不同的数据库实例(甚至物理机器)上,以达到降低数据量,增加实例数的扩容目的。
一旦涉及分库,逃不开“分库依据”patition key的概念,使用哪一个字段来水平切分数据库呢:大部分的业务场景,会使用业务主键id。
确定了分库依据patition key后,接下来要确定的是分库算法:大部分的业务场景,会使用业务主键id取模的算法来分库,这样即能够保证每个库的数据分布是均匀的,又能够保证每个库的请求分布是均匀的,实在是简单实现负载均衡的好方法,此法在互联网架构中应用颇多。
一般有四种方式来进行拉取数据。
第一:
假如:“SELECT * FROM USER ORDER BY AGE LIMIT 10,5;”这个意思是拉取第3页的数据。我们一般不分库的时候是按照这个方式去进行分页拉取数据的操作。但是分片以后,这样肯定拉取到的是不准确的数据。
DB1:“SELECT * FROM USER ORDER BY AGE LIMIT 0,15;” DB2:“SELECT * FROM USER ORDER BY AGE LIMIT 0,15;” ..... DBn:“SELECT * FROM USER ORDER BY AGE LIMIT 0,15;”.然后把所有的前3页的数据都拉出来,再内存排序,取第11~15条数据。
第二:
如果我们分页是禁止跳页的
还是第三页 “SELECT * FROM USER ORDER BY AGE LIMIT 10,5;”
DB1:“SELECT * FROM USER ORDER BY AGE LIMIT 0,5;” DB2:“SELECT * FROM USER ORDER BY AGE LIMIT 0,5;” ..... DBn:“SELECT * FROM USER ORDER BY AGE LIMIT 0,5;”.然后把所有的前3页的数据都拉出来,再内存排序,取第0~5条数据。这时我们知道第一页最大的是多少了maxN,这时,我们下一页的sql语句就变成DB1:“SELECT * FROM USER WHERE AGE>maxN ORDER BY AGE LIMIT 5,5;” DB2:“SELECT * FROM USER WHERE AGE>maxN ORDER BY AGE LIMIT 5,5;” ..... DBn:“SELECT * FROM USER WHERE AGE>maxN ORDER BY AGE LIMIT 5,5;”。第三页就根据第二页的最大值去进行查询排序就不用跟第一种一样把前面所有的数据拉出来排序。
第三种就是概率问题了,要5条数据,每个库拉取5/n取整的数据当做最后的数据。
重点来了,第四种:二次查询法。
还是第三页 “SELECT * FROM USER ORDER BY AGE LIMIT 10,5;”
n标识n个库分了。
改写sql语句:DB1:“SELECT * FROM USER ORDER BY AGE LIMIT 10/n,5;” DB2:“SELECT * FROM USER ORDER BY AGE LIMIT 10/n,5;” ..... DBn:“SELECT * FROM USER ORDER BY AGE LIMIT 10/n,5;”
查到以后:获取n个库里面最小的记录minAge;
获取到最小minAge以后,再查询一次数据库:
maxAgeDBn,表示的是第n次查询出来的“SELECT * FROM USER ORDER BY AGE LIMIT 10/n,5;的数据里面的最大值。
DB1:“SELECT * FROM USER WHERE AGE BETWEEN minAge AND maxAgeDB1 ORDER BY AGE LIMIT 10/n,5;” DB2:“SELECT * FROM USER WHERE AGE BETWEEN minAge AND maxAgeDB2 ORDER BY AGE LIMIT 10/n,5;” ..... DBn:“SELECT * FROM USER WHERE AGE BETWEEN minAge AND maxAgeDBn ORDER BY AGE LIMIT 10/n,5;”
把第二次查询的数据按照Age排序,以后可以看出比第一次查询会多出m条数据。
然后我们获取第m+1~m+6条数据就是第3页数据。
list1,list2,list3,假如出来的是3个list。list1没有多出数据,list2多出来2条,list3多出3条。那其实在list1比minAge小的为3条,list2比minAge小的是1条,list3是0条,那么整个数据比minAge小的有3+1+0为4条,那其实minAge在整个的数据中排名第5位。然后我们根据第二次查出的数据排序以后获取第6-11数据就好了。
最后看一下实现的代码:
public static PageResult<T> QueryPage<T>(string sql) { if (sql == null || sql.Length == 0) { throw new Exception("参数Sql为空!!"); } var connections = ShardingConnUtils.GetAllConnection(); if (connections == null || connections.Count == 0) { throw new Exception("请先设置连接字符串!!"); } var resut = new PageResult<T>(); //SELECT * FROM TSTravelInfo ti WHERE 1=1 ORDER BY ti.TICreateTime ASC LIMIT 5,10; var offset = 0; var pageSize = 0; var orderBy = string.Empty; var upperSql = sql.ToUpper(); var isDesc = upperSql.IndexOf(" DESC ")<0?false:true; Regex r = new Regex(" LIMIT "); var splits = r.Split(upperSql); if (splits != null && splits.Length > 1) { var number = splits[1]; var sizes = number.Trim().TrimEnd(';').Split(','); if (sizes.Length == 1) { pageSize = Convert.ToInt32(sizes[0]); } else { offset = Convert.ToInt32(sizes[0]); pageSize = Convert.ToInt32(sizes[1]); } var sqlNoPage = splits[0]; var newOffset = offset / connections.Count; var listData = new List<T>[connections.Count]; var allCount = 0; for (int i = 0; i < connections.Count; i++) { var connection = connections[i]; var sqlNew = sqlNoPage + " LIMIT " + newOffset + " ," + pageSize + ";"; var rCount = new Regex(" FROM "); var sqlCount = "select count(1) FROM " + rCount.Split(sqlNoPage)[1]; var data = connection.Query<T>(sqlNew); allCount += Convert.ToInt32(connection.ExecuteScalar(sqlCount)); listData[i] = data.AsList<T>(); } var minsList = listData.Select(p => p.FirstOrDefault()).ToList(); orderBy = GetOrderBy(sql); SortT(minsList, orderBy,isDesc); var dataMin = minsList.FirstOrDefault(); var value = GetProporyValue<T>(dataMin, orderBy); var newListData = new List<T>[connections.Count]; var addCount = 0; if (!string.IsNullOrEmpty(orderBy)) { for (int i = 0; i < listData.Length; i++) { if (listData[i] == null || !listData[i].Any()) { newListData[i] = (null); continue; } var maxValue = GetProporyValue<T>(listData[i].Last(), orderBy); var secendNewSql = string.Empty; var hasWhere = upperSql.IndexOf(" WHERE ")<0?false:true ; var sqlNoOrder = string.Empty; Regex rOrderBy = new Regex("ORDER\\s+BY"); var sqlNoOrderList = rOrderBy.Split(sql.ToUpper()); if (hasWhere) { sqlNoOrder = sqlNoOrderList[0]; } else { sqlNoOrder= sqlNoOrderList[0]+" where 1=1 "; } var sqlSort =r.Split(sqlNoOrderList[1])[0]; if (isDesc) { secendNewSql = sqlNoOrder + string.Format(" and {0} Between '{1}' and '{2}' Order by {3}", orderBy, maxValue, value,sqlSort); } else { secendNewSql = sqlNoPage + string.Format(" and {0} Between '{1}' and '{2}' Order by {3}", orderBy, value, maxValue,sqlSort); } var secData = connections[i].Query<T>(secendNewSql); var count = secData.Count() - listData[i].Count(); addCount += count; newListData[i] = secData.ToList(); } } var allOffect = offset * connections.Count - addCount; var allData = new List<T>(); foreach (var item in newListData) { allData.AddRange(item); } SortT(allData,orderBy,isDesc); resut.Result = allData.GetRange(addCount, pageSize); resut.TotalCount = allCount; } return resut; } private static string GetOrderBy(string sql) { var orderBy = string.Empty; Regex r = new Regex(" LIMIT "); var splits = r.Split(sql.ToUpper()); if (splits != null && splits.Length > 1) { var r1 = new Regex("ORDER\\s+BY"); var newstr = r1.Split(splits[0]); var b = string.Empty; if (newstr[1].EndsWith("DESC")) { b = newstr[1].TrimEnd(new char[] { 'D', 'E', 'S', 'C' }); } if (newstr[1].EndsWith("ASC")) { b = newstr[1].TrimEnd(new char[] { 'A', 'S', 'C' }); } var index = b.IndexOf('.'); if (index == -1) { var indexLastNo= sql.ToUpper().LastIndexOf(b); return sql.Substring(indexLastNo, b.Length).Trim(); } orderBy = b.Substring(index + 1); } var indexLast=sql.ToUpper().LastIndexOf(orderBy); return sql.Substring(indexLast, orderBy.Length).Trim(); } public static object GetProporyValue<T>(T t, string proName) { var type = t.GetType(); return type.GetProperty(proName).GetValue(t); } private static void SortT<T>(List<T> list, string orderByName,bool isDesc) { var newList = new List<T>(); int lastSwapPos = 0, lastSwapPosTemp = 0, size = list.Count; for (int i = 0; i < size - 1; i++) { lastSwapPos = lastSwapPosTemp; for (int j = size - 1; j > lastSwapPos; j--) { var type = list[j - 1].GetType(); var pro = type.GetProperty(orderByName); bool isBig = false; if (pro.PropertyType.IsValueType) { var value1 = Convert.ToInt64(pro.GetValue(list[j - 1])); var value2 = Convert.ToInt64(pro.GetValue(list[j])); isBig = value1 - value2 > 0 ? true : false; } else { var value1 = pro.GetValue(list[j - 1]).ToString(); var value2 = pro.GetValue(list[j]).ToString(); isBig = value1.CompareTo(value1) > 0 ? true : false; } if (isDesc) { isBig = !isBig; } if (isBig) { T temp = list[j - 1]; list[j - 1] = list[j]; list[j] = temp; lastSwapPosTemp = j; } } if (lastSwapPos == lastSwapPosTemp) break; } }
测试代码:
//注册数据库连接 IDbConnection db0 = new MySqlConnection(ConfigurationManager.ConnectionStrings["db0"].ConnectionString); IDbConnection db1 = new MySqlConnection(ConfigurationManager.ConnectionStrings["db1"].ConnectionString); IDbConnection db2 = new MySqlConnection(ConfigurationManager.ConnectionStrings["db2"].ConnectionString); Dictionary<string, IDbConnection> connectionDic = new Dictionary<string, IDbConnection>(); connectionDic.Add("0", db0); connectionDic.Add("1", db1); connectionDic.Add("2", db2); ShardingConnUtils.RegisConnGroup(connectionDic); var pages=ShardingCore.QueryPage<User>("select * from User order by Age desc limit 10,5;");
最后说一下,这个是看了沈剑沈老师的公众号知道了,具体的可以看下面的链接:http://mp.weixin.qq.com/s/h99sXP4mvVFsJw6Oh3aU5A。
讲的不好的,大家可以去看这篇文章~_~!!。

浙公网安备 33010602011771号