wzc570738205 / smartparsepro Goto Github PK
View Code? Open in Web Editor NEW🔥地址智能识别Pro(支持省市区街道/电话/邮编/姓名识别)
Home Page: https://wangzc.wang/smartParsePro/
License: Apache License 2.0
🔥地址智能识别Pro(支持省市区街道/电话/邮编/姓名识别)
Home Page: https://wangzc.wang/smartParsePro/
License: Apache License 2.0
address: 字段信息丢失内容
北京市海淀区西三旗建材城东二里13-2-502,李国强 1388888888
address: "西三旗建材城东二里13"
city: "北京市"
cityCode: "1101"
county: "海淀区"
countyCode: "110108"
name: "李国强"
phone: "1388888888"
province: "北京市"
provinceCode: "11"
13-2-502 内容不全,是不是把’-‘ 当分隔符了?
比如无锡梁溪区
所以对应邮编数据也是有无效的。。
(function() {
'use strict';
var citydata = localStorage.citydata ? JSON.parse(localStorage.citydata) : [];
var province_url = localStorage.province_url ? JSON.parse(localStorage.province_url) : [];
var city_url = localStorage.city_url ? JSON.parse(localStorage.city_url) : [];
var town_url = localStorage.town_url ? JSON.parse(localStorage.town_url) : [];
var step = localStorage.step ? JSON.parse(localStorage.step) : 1;
function in_array(v, array, k = false){
var i;
for(i in array){
if(k){
if(array[i][k] == v){
return true;
}
}else{
if(array[i] == v){
return true;
}
}
}
return false;
}
function find_index(v, array, k =false){
var i;
for(i in array){
if(k){
if(array[i][k] == v){
return i;
}
}else{
if(array[i] == v){
return i;
}
}
}
}
function delete_in_array(v, array){
var i;
for(i in array){
if(array[i] == v){
delete array[i];
}
}
return array;
}
function getRandomInt(min, max) {
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min)) + min; //The maximum is exclusive and the minimum is inclusive
}
function process_province(){
var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
var i;
for(i in a){
var one = {};
if(a[i].href){
one.code = a[i].href.substr(54).replace('.html', '');
one.name = a[i].text;
one.children = [];
if(one.code != '' && one.children.length == 0){
//http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/22.html
if(!in_array(one.code, citydata, 'code')){
citydata.push(one);
}
if(!in_array(a[i].href, province_url)){
province_url.push(a[i].href);
}
}
}
}
localStorage.setItem('province_url', JSON.stringify(province_url));
localStorage.setItem('citydata', JSON.stringify(citydata));
}
function process_city(){
var city_index = location.href.substr(-7, 2);
citydata = JSON.parse(localStorage.citydata);
var city_in_citydata_index = find_index(city_index, citydata, 'code');
var city = citydata[city_in_citydata_index];
var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
var ii;
for(ii in a){
var one = {};
if(a[ii].text){
if(a[ii].text.match(/(\D|\W)/)){
one.code = a[ii].href.substr(57).replace('.html', '');
one.name = a[ii].text;
one.children = [];
if(!in_array(one.code, city.children, 'code')){
city.children.push(one);
}
if(!in_array(a[ii].href, city_url)){
city_url.push(a[ii].href);
}
}
}
}
citydata[city_in_citydata_index] = city;
localStorage.setItem('city_url', JSON.stringify(city_url));
localStorage.setItem('citydata', JSON.stringify(citydata));
}
function process_town(){
var city_index = location.href.substr(-12, 2);
var town_index = location.href.substr(-9, 4);
citydata = JSON.parse(localStorage.citydata);
var city_in_citydata_index = find_index(city_index, citydata, 'code');
var town_in_citydata_index = find_index(town_index, citydata[city_in_citydata_index].children, 'code');
var town = citydata[city_in_citydata_index].children[town_in_citydata_index];
var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
var iii;
for(iii in a){
var one = {};
if(a[iii].text){
if(a[iii].text.match(/(\D|\W)/)){
one.code = a[iii].href.substr(60).replace('.html', '');
one.name = a[iii].text;
if(!in_array(one.code, town.children, 'code')){
town.children.push(one);
}
if(!in_array(a[iii].href, town_url)){
town_url.push(a[iii].href);
}
}
}
}
citydata[city_in_citydata_index].children[town_in_citydata_index] = town;
localStorage.setItem('town_url', JSON.stringify(town_url));
localStorage.setItem('citydata', JSON.stringify(citydata));
}
function loop(arr, save = false){
setTimeout(function(){
var v = arr.pop();
wo = window.open(v, '_blank');
wo.addEventListener("message", process_message, false);
if(arr.length > 0){
if(save){
localStorage.setItem(save, JSON.stringify(arr));
}
loop(arr);
}else{
console.log('loop done');
}
}, getRandomInt(1500, 5000));
}
function process_message(msg){
if(msg.data == 'close'){
wo.close();
}
}
//http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html
if(document.location.href == 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html'){
var wo;
if(step == 1){
process_province();
step = 2;
localStorage.setItem('step', 2);
}
if(step == 2){
loop(province_url, 'province_url');
step = 3;
localStorage.setItem('step', 3);
}
if(step == 3){
loop(city_url, 'city_url');
// for(var c in city_url){
// wo = window.open(city_url[c], '_blank');
// }
}
}
if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}.html/)){
process_city();
window.postMessage('close', "http://www.stats.gov.cn");
}
if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}\/[0-9]{4}.html/)){
process_town();
window.postMessage('close', "http://www.stats.gov.cn");
}
})();
//维基抓邮编
//https://zh.wikipedia.org/wiki/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%A2%83%E5%86%85%E5%9C%B0%E5%8C%BA%E9%82%AE%E6%94%BF%E7%BC%96%E7%A0%81%E5%88%97%E8%A1%A8
// 会有重名
var td = $('.wikitable').find('td');
$('s').remove(); // 已删除的
$('td[colspan=8]').remove();
var postcode = {};
for(var i in td){
if(td[i].textContent && !td[i].textContent.match(/\w/)){// 地区
if(td[i].textContent.replace("\n", '').length == 0)continue;
if(td[i].textContent.length == 0)continue;
var index = parseInt(i) + 1;
if(!td[index]);
var code = td[index].textContent.replace("\n", '');
var area = td[i].textContent.replace(" ", '');
var temp = area;
if(!area.match(/(市|县|区)$/)){
postcode[area] = code;
area = temp + '市';
postcode[area] = code;
area = temp + '县';
postcode[area] = code;
area = temp + '区';
postcode[area] = code;
area = temp.replace('县', '市');
postcode[area] = code;
area = temp.replace('县', '');
postcode[area] = code;
}else{
if(postcode[area]){
console.error(area);
}else{
postcode[area] = code;
}
}
}
}
console.log(JSON.stringify(postcode));
这是我写的油猴脚本,自己用可以。。
一个窗口抓链接,然后循环弹新窗去抓新链接
邮编除了维基里的,其他只能列出所有为空的地名,再去百度。。
广东省汕尾市城区香洲街道奎山乐园运营中心(东方花园对面)
无法识别台港澳收件人信息
葛亚,13515842218,浙江省 宁波市 镇海区 蛟川街道临江小区30幢306室,315200
{
address: ""
city: "宁波市"
cityCode: "3302"
county: "镇海区"
countyCode: "330211"
name: "蛟川街道临江小区30幢306室"
phone: "13515842218"
province: "浙江省"
provinceCode: "33"
zipCode: "315200"
}
例如 天津市河北区三岔河口永乐桥上 ,上海市浦东新区陆家嘴世纪大道1号,北京市海淀区中关村街道金隅嘉华大厦
输入‘广东省深圳市南区 ’,识别出的county:潮南区;countyCode:440514
vue引入也有类似问题 :
输入‘广东省深圳市南山区 ’,识别出city: "深圳市"
cityCode: "4403"
county: "南山区"
countyCode: "230404" //与广东深圳不关联
province: "广东省"
provinceCode: "44"
山西省运城市绛县政府小区(林业局北) 李小 电话13933333333 这个地址识别会为 新绛县
本地处理方法将pcas-code.json,绛县放新绛县前面
这个地址:黑龙江省 齐齐哈尔市 富裕县 富裕镇铁西区兴达街鑫春雷超市
识别出来后是:
address:镇铁西区兴达街鑫春雷超市
待解析字符串:
张好[2568]
18411111111
广东省广州市南沙区东涌镇一建集团[2568]
实际解析结果
姓名:张好
电话:18411111111
邮编:
省:广东省
市:广州市
区:南沙区
街道:东涌镇
详细地址:2568东涌镇一建集团
预期解析结果:
姓名:张好[2568]
电话:18411111111
邮编:
省:广东省
市:广州市
区:南沙区
街道:东涌镇
详细地址:东涌镇一建集团[2568]
期望待解析字符串2568放到详细地址和姓名末尾
待解析字符串:
深圳市罗湖区金湖路,张xx,15012345678
实际解析结果
姓名:金湖路
电话:15012345678
邮编:
省:广东省
市:深圳市
区:罗湖区
街道:undefined
详细地址:张xx
期望待解析字符串姓名跟详细地址置换过来
姓名:张xx
电话:15012345678
邮编:
省:广东省
市:深圳市
区:罗湖区
街道:undefined
详细地址:金湖路
有没有golang 版本,golang 版本的话性能要好的多。
福建省厦门市集美区后溪镇万科广场2号楼2601室 吴兴基 180******393
例如不支持
“湖北黄石市牧羊湖水机路华瑞南岸星城一栋二单元2502。刘红1898611114”
“
收货人: 杨艳
手机号码: 13682222221
所在地区: 广东省深圳市龙岗区龙岗街道
详细地址: 格水村三巷十号楼
”
pcasCode.js在小程序里面占用空间过大,解析模块总体占约2m,小程序每个包最大2m,无法上传发布
项目中引入pcasCode.js、zipCode.js、address_parse.js后在页面中使用smart(“陕西省西安市雁塔区丈八沟街道高新四路高新大都荟710061 刘国良 13593464918 211381198512096810”),,仅返回{"zipCode":"710061","name":"刘国良","phone":"13593464918","idCard":"211381198512096810"},没有返回地址解析结果。
地址:广东省中山市石岐区街道办事处豪程北路666号
期望:
省:广东省
市:中山市
区:石岐区街道办事处
详细地址:豪程北路666号
结果:
省:广东省
市:中山市
区:
详细地址:石岐区街道办事处豪程北路666号
通过你们的接口却可以识别正确
smartParsePro/js/address_parse.js
Lines 369 to 379 in f21de3b
这段代码是在干嘛?
解析地址(孙某某 15023332333 内蒙古自治区乌兰察布市察哈尔右翼中旗科布尔镇西域商城)时,会将 右翼中旗 解析成右翼前旗,就是因为这段代码导致。这段代码核心原理、**是啥?
马云,陕西省西安市雁塔区丈八沟街道高新四路南江国际 13593464918
广东省珠海市香洲区盘山路28号幸福茶庄,陈景勇,13593464918
这个都识别不出来,看示例是能识别出来的
A declarative, efficient, and flexible JavaScript library for building user interfaces.
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
An Open Source Machine Learning Framework for Everyone
The Web framework for perfectionists with deadlines.
A PHP framework for web artisans
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
Some thing interesting about web. New door for the world.
A server is a program made to process requests and deliver data to clients.
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
Some thing interesting about visualization, use data art
Some thing interesting about game, make everyone happy.
We are working to build community through open source technology. NB: members must have two-factor auth.
Open source projects and samples from Microsoft.
Google ❤️ Open Source for everyone.
Alibaba Open Source for everyone
Data-Driven Documents codes.
China tencent open source team.