【HAVENT原创】K-Means 算法来分组用户的活动区域

获取用户的历史派件地址记录集合,根据手动设置的组数量,将这些地址按照所在 suburb 进行分组并保存(注释代码是使用 geocluster 进行分组,通过 bias 因子控制,无法手动设置组的数量),代码如下:

var globalConfig = require('../config.json');
//var config = require('./config.json');

const kmeans = require('node-kmeans');
//const geocluster = require("geocluster");

var mysql = require('mysql');
var log4js = require('log4js');
log4js.configure({ // configure to use all types in different files.
    appenders: [
        {   type: 'file',
            filename: globalConfig.mapzone.log,
            category: 'service',
            maxLogSize: 20480000,
            backups: 10
        }
    ]
});
var logger = log4js.getLogger('service');

var pool = mysql.createPool({
    connectionLimit: 10,
    host: globalConfig.mysql.host,
    user: globalConfig.mysql.user,
    password: globalConfig.mysql.password,
    database: globalConfig.mysql.db
});


var getAddressList = function(runnerId) {
    return new Promise(function (resolve, reject) {
        pool.getConnection(function (err, connection) {
            if(err){
                logger.error(err);
                pool.releaseConnection(connection);
                return reject(err);
            }

            var sqlStr = 'SELECT a.id, a.suburb, a.postcode, a.lat, a.lng FROM job j';
            sqlStr += ' INNER JOIN address a ON a.id = j.arrival_address_id';
            sqlStr += ' WHERE j.is_deleted = 0 AND j.is_group = 0 AND j.is_fake_job = 0 AND j.arrival_address_id IS NOT NULL';
            sqlStr += '     AND a.is_deleted = 0 AND a.lat IS NOT NULL AND a.lng IS NOT NULL AND a.lat <> 0 AND a.lng <> 0';
            sqlStr += '     AND j.runner_id = ?';

            var params = [];
            params.push(runnerId);

            connection.query(sqlStr, params, function (err, rows) {
                if(err){
                    logger.error(err);
                    pool.releaseConnection(connection);
                    return reject(err);
                }

                resolve(rows);
            });
        });
    });
};

var kmeanCluster = function (vectors, numberOfClusters, callback) {
    kmeans.clusterize(vectors, {k: numberOfClusters}, function(err, res) {

        if(callback) callback(err, res);
    });
};

var insertData = function (rows, result, runnerId) {
    var zoneNum = 0;
    var values = [];
    result.forEach(function (cluster, key) {

        cluster.clusterInd.forEach(function (val, key) {
            var item = rows[val];

            var isDuplicate = false;
            values.forEach(function (value) {
                if(item.suburb == value[2]){
                    isDuplicate = true;
                }
            });

            if(!isDuplicate) {
                values.push([runnerId, zoneNum, item.suburb, item.postcode]);
            }
        });

        zoneNum++;
    });

    //console.log(JSON.stringify(values));

    return new Promise(function (resolve, reject) {
        pool.getConnection(function (err, connection) {
            if(err){
                logger.error(err);
                pool.releaseConnection(connection);
                return reject(err);
            }

            var sqlStr = 'DELETE FROM user_zone WHERE user_id = ?';
            connection.query(sqlStr, [runnerId], function (err, rows) {
                if(err){
                    logger.error(err);
                    pool.releaseConnection(connection);
                    return reject(err);
                }

                var sqlStr = 'INSERT INTO user_zone (user_id, zone_num, suburb, postcode) VALUES ?';
                connection.query(sqlStr, [values], function (err, rows) {
                    if(err){
                        logger.error(err);
                        pool.releaseConnection(connection);
                        return reject(err);
                    }

                    resolve(rows);
                });
            });
        });
    });
};

// var geoCluster = function (coordinates, res) {
//     var bias = 1.5; // multiply stdev with this factor, the smaller the more clusters
//     var result = geocluster(coordinates, bias);
//
//     res.send(result);
// };

module.exports = {
    groupJobAddressBySuburb: function (req, res) {
        var runnerId = req.query.userId;
        var numOfClusters = req.query.num;

        getAddressList(runnerId).then(function (rows) {
            var vectors = [];
            rows.forEach(function (val, key) {
                vectors.push([val.lat, val.lng]);
            });

            //geoCluster(vectors, res);

            kmeanCluster(vectors, numOfClusters, function (err, result) {
                if(err) {
                    res.send(err);
                    return;
                }

                insertData(rows, result, runnerId).then(function (rows) {
                    res.send(result);
                }, function (err) {
                    res.send(err);
                });
            });
        }, function (error) {
            res.send(error);
        });
    }
};

 

posted @ 2017-07-12 14:40  夜雨流星℡ℑ  Views(236)  Comments(0)    收藏  举报