【HAVENT原创】K-Means 算法来分组用户的活动区域
获取用户的历史派件地址记录集合,根据手动设置的组数量,将这些地址按照所在 suburb 进行分组并保存(注释代码是使用 geocluster 进行分组,通过 bias 因子控制,无法手动设置组的数量),代码如下:
var globalConfig = require('../config.json'); //var config = require('./config.json'); const kmeans = require('node-kmeans'); //const geocluster = require("geocluster"); var mysql = require('mysql'); var log4js = require('log4js'); log4js.configure({ // configure to use all types in different files. appenders: [ { type: 'file', filename: globalConfig.mapzone.log, category: 'service', maxLogSize: 20480000, backups: 10 } ] }); var logger = log4js.getLogger('service'); var pool = mysql.createPool({ connectionLimit: 10, host: globalConfig.mysql.host, user: globalConfig.mysql.user, password: globalConfig.mysql.password, database: globalConfig.mysql.db }); var getAddressList = function(runnerId) { return new Promise(function (resolve, reject) { pool.getConnection(function (err, connection) { if(err){ logger.error(err); pool.releaseConnection(connection); return reject(err); } var sqlStr = 'SELECT a.id, a.suburb, a.postcode, a.lat, a.lng FROM job j'; sqlStr += ' INNER JOIN address a ON a.id = j.arrival_address_id'; sqlStr += ' WHERE j.is_deleted = 0 AND j.is_group = 0 AND j.is_fake_job = 0 AND j.arrival_address_id IS NOT NULL'; sqlStr += ' AND a.is_deleted = 0 AND a.lat IS NOT NULL AND a.lng IS NOT NULL AND a.lat <> 0 AND a.lng <> 0'; sqlStr += ' AND j.runner_id = ?'; var params = []; params.push(runnerId); connection.query(sqlStr, params, function (err, rows) { if(err){ logger.error(err); pool.releaseConnection(connection); return reject(err); } resolve(rows); }); }); }); }; var kmeanCluster = function (vectors, numberOfClusters, callback) { kmeans.clusterize(vectors, {k: numberOfClusters}, function(err, res) { if(callback) callback(err, res); }); }; var insertData = function (rows, result, runnerId) { var zoneNum = 0; var values = []; result.forEach(function (cluster, key) { cluster.clusterInd.forEach(function (val, key) { var item = rows[val]; var isDuplicate = false; values.forEach(function (value) { if(item.suburb == value[2]){ isDuplicate = true; } }); if(!isDuplicate) { values.push([runnerId, zoneNum, item.suburb, item.postcode]); } }); zoneNum++; }); //console.log(JSON.stringify(values)); return new Promise(function (resolve, reject) { pool.getConnection(function (err, connection) { if(err){ logger.error(err); pool.releaseConnection(connection); return reject(err); } var sqlStr = 'DELETE FROM user_zone WHERE user_id = ?'; connection.query(sqlStr, [runnerId], function (err, rows) { if(err){ logger.error(err); pool.releaseConnection(connection); return reject(err); } var sqlStr = 'INSERT INTO user_zone (user_id, zone_num, suburb, postcode) VALUES ?'; connection.query(sqlStr, [values], function (err, rows) { if(err){ logger.error(err); pool.releaseConnection(connection); return reject(err); } resolve(rows); }); }); }); }); }; // var geoCluster = function (coordinates, res) { // var bias = 1.5; // multiply stdev with this factor, the smaller the more clusters // var result = geocluster(coordinates, bias); // // res.send(result); // }; module.exports = { groupJobAddressBySuburb: function (req, res) { var runnerId = req.query.userId; var numOfClusters = req.query.num; getAddressList(runnerId).then(function (rows) { var vectors = []; rows.forEach(function (val, key) { vectors.push([val.lat, val.lng]); }); //geoCluster(vectors, res); kmeanCluster(vectors, numOfClusters, function (err, result) { if(err) { res.send(err); return; } insertData(rows, result, runnerId).then(function (rows) { res.send(result); }, function (err) { res.send(err); }); }); }, function (error) { res.send(error); }); } };

浙公网安备 33010602011771号