Giter Site home page Giter Site logo

wzc570738205 / smartparsepro Goto Github PK

View Code? Open in Web Editor NEW
873.0 11.0 168.0 2.16 MB

🔥地址智能识别Pro(支持省市区街道/电话/邮编/姓名识别)

Home Page: https://wangzc.wang/smartParsePro/

License: Apache License 2.0

JavaScript 100.00%
javascript

smartparsepro's People

Stargazers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

Watchers

 avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar  avatar

smartparsepro's Issues

address: 字段信息丢失内容

address: 字段信息丢失内容

北京市海淀区西三旗建材城东二里13-2-502,李国强 1388888888

address: "西三旗建材城东二里13"
city: "北京市"
cityCode: "1101"
county: "海淀区"
countyCode: "110108"
name: "李国强"
phone: "1388888888"
province: "北京市"
provinceCode: "11"

13-2-502 内容不全,是不是把’-‘ 当分隔符了?

省市县数据过期了。。有些区县被撤,或者改名字

比如无锡梁溪区
所以对应邮编数据也是有无效的。。

(function() {
    'use strict';
    var citydata = localStorage.citydata ? JSON.parse(localStorage.citydata) : [];
    var province_url = localStorage.province_url ? JSON.parse(localStorage.province_url) : [];
    var city_url = localStorage.city_url ? JSON.parse(localStorage.city_url) : [];
    var town_url = localStorage.town_url ? JSON.parse(localStorage.town_url) : [];
    var step = localStorage.step ? JSON.parse(localStorage.step) : 1;
    function in_array(v, array, k = false){
        var i;
        for(i in array){
            if(k){
                if(array[i][k] == v){
                    return true;
                }
            }else{
                if(array[i] == v){
                    return true;
                }
            }
        }
        return false;
    }
    function find_index(v, array, k =false){
        var i;
        for(i in array){
            if(k){
                if(array[i][k] == v){
                    return i;
                }
            }else{
                if(array[i] == v){
                    return i;
                }
            }
        }
    }
    function delete_in_array(v, array){
        var i;
        for(i in array){
            if(array[i] == v){
                delete array[i];
            }
        }
        return array;
    }
    function getRandomInt(min, max) {
        min = Math.ceil(min);
        max = Math.floor(max);
        return Math.floor(Math.random() * (max - min)) + min; //The maximum is exclusive and the minimum is inclusive
      }
    function process_province(){
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var i;
        for(i in a){
            var one = {};
            if(a[i].href){
                one.code = a[i].href.substr(54).replace('.html', '');
                one.name = a[i].text;
                one.children = [];
                if(one.code != '' && one.children.length == 0){
                    //http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/22.html
                    if(!in_array(one.code, citydata, 'code')){
                        citydata.push(one);
                    }
                    if(!in_array(a[i].href, province_url)){
                        province_url.push(a[i].href);
                    }
                }
            }
        }
        localStorage.setItem('province_url', JSON.stringify(province_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }

    function process_city(){
        var city_index = location.href.substr(-7, 2);
        citydata = JSON.parse(localStorage.citydata);
        var city_in_citydata_index = find_index(city_index, citydata, 'code');
        var city = citydata[city_in_citydata_index];
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var ii;
        for(ii in a){
            var one = {};
            if(a[ii].text){
                if(a[ii].text.match(/(\D|\W)/)){
                    one.code = a[ii].href.substr(57).replace('.html', '');
                    one.name = a[ii].text;
                    one.children = [];
                    if(!in_array(one.code, city.children, 'code')){
                        city.children.push(one);
                    }
                    if(!in_array(a[ii].href, city_url)){
                        city_url.push(a[ii].href);
                    }
                }
            }
        }
        citydata[city_in_citydata_index] = city;
        localStorage.setItem('city_url', JSON.stringify(city_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }
    function process_town(){
        var city_index = location.href.substr(-12, 2);
        var town_index = location.href.substr(-9, 4);
        citydata = JSON.parse(localStorage.citydata);
        var city_in_citydata_index = find_index(city_index, citydata, 'code');
        var town_in_citydata_index = find_index(town_index, citydata[city_in_citydata_index].children, 'code');
        var town = citydata[city_in_citydata_index].children[town_in_citydata_index];
        var a = document.querySelectorAll('table')[2].querySelectorAll('td a');
        var iii;
        for(iii in a){
            var one = {};
            if(a[iii].text){
                if(a[iii].text.match(/(\D|\W)/)){
                    one.code = a[iii].href.substr(60).replace('.html', '');
                    one.name = a[iii].text;
                    if(!in_array(one.code, town.children, 'code')){
                        town.children.push(one);
                    }
                    if(!in_array(a[iii].href, town_url)){
                        town_url.push(a[iii].href);
                    }
                }
            }
        }
        citydata[city_in_citydata_index].children[town_in_citydata_index] = town;
        localStorage.setItem('town_url', JSON.stringify(town_url));
        localStorage.setItem('citydata', JSON.stringify(citydata));
    }

    function loop(arr, save = false){
        setTimeout(function(){
            var v = arr.pop();
            wo = window.open(v, '_blank');
            wo.addEventListener("message", process_message, false);
            if(arr.length > 0){
                if(save){
                    localStorage.setItem(save, JSON.stringify(arr));
                }
                loop(arr);
            }else{
                console.log('loop done');
            }
        }, getRandomInt(1500, 5000));
    }

    function process_message(msg){
        if(msg.data == 'close'){
            wo.close();
        }
    }
    //http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html
    if(document.location.href == 'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2019/index.html'){
        var wo;
        if(step == 1){
            process_province();
            step = 2;
            localStorage.setItem('step', 2);
        }
        if(step == 2){
            loop(province_url, 'province_url');
            step = 3;
            localStorage.setItem('step', 3);
        }
        if(step == 3){
            loop(city_url, 'city_url');
            // for(var c in city_url){
            //     wo = window.open(city_url[c], '_blank');
            // }
        }
    }

    if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}.html/)){
        process_city();
        window.postMessage('close', "http://www.stats.gov.cn");
    }

    if(document.location.href.match(/tjyqhdmhcxhfdm\/2019\/[0-9]{2}\/[0-9]{4}.html/)){
        process_town();
        window.postMessage('close', "http://www.stats.gov.cn");
    }
})();
//维基抓邮编
//https://zh.wikipedia.org/wiki/%E4%B8%AD%E5%8D%8E%E4%BA%BA%E6%B0%91%E5%85%B1%E5%92%8C%E5%9B%BD%E5%A2%83%E5%86%85%E5%9C%B0%E5%8C%BA%E9%82%AE%E6%94%BF%E7%BC%96%E7%A0%81%E5%88%97%E8%A1%A8
    // 会有重名
    var td = $('.wikitable').find('td');
    $('s').remove(); // 已删除的
    $('td[colspan=8]').remove();
    var postcode = {};
    for(var i in td){
        if(td[i].textContent && !td[i].textContent.match(/\w/)){// 地区
            if(td[i].textContent.replace("\n", '').length == 0)continue;
            if(td[i].textContent.length == 0)continue;
            var index = parseInt(i) + 1;
            if(!td[index]);
            var code = td[index].textContent.replace("\n", '');
            var area = td[i].textContent.replace(" ", '');
            var temp = area;
            if(!area.match(/(市|县|区)$/)){
                postcode[area] = code;
                area = temp + '市';
                postcode[area] = code;
                area = temp + '县';
                postcode[area] = code;
                area = temp + '区';
                postcode[area] = code;
                area = temp.replace('县', '市');
                postcode[area] = code;
                area = temp.replace('县', '');
                postcode[area] = code;
            }else{
                if(postcode[area]){
                    console.error(area);
                }else{
                    postcode[area] = code;
                }
            }
        }
    }
    console.log(JSON.stringify(postcode));

这是我写的油猴脚本,自己用可以。。
一个窗口抓链接,然后循环弹新窗去抓新链接
邮编除了维基里的,其他只能列出所有为空的地名,再去百度。。

识别结果有误

葛亚,13515842218,浙江省 宁波市 镇海区 蛟川街道临江小区30幢306室,315200

{
    address: ""
    city: "宁波市"
    cityCode: "3302"
    county: "镇海区"
    countyCode: "330211"
    name: "蛟川街道临江小区30幢306室"
    phone: "13515842218"
    province: "浙江省"
    provinceCode: "33"
    zipCode: "315200"
}

无法通过npm 安装使用

  • 通过npm 下载的 "version": "2.0.2", 入口index.js文件没有export 任何方法
  • 看到您使用window.smart = smart; 暴露全局方法,但我这边使用服务端渲染,我的js运行时没有window对象。所有无法在服务端使用

直辖市的街道识别不出来

例如 天津市河北区三岔河口永乐桥上 ,上海市浦东新区陆家嘴世纪大道1号,北京市海淀区中关村街道金隅嘉华大厦

识别关联问题

输入‘广东省深圳市南区 ’,识别出的county:潮南区;countyCode:440514
vue引入也有类似问题 :
输入‘广东省深圳市南山区 ’,识别出city: "深圳市"
cityCode: "4403"
county: "南山区"
countyCode: "230404" //与广东深圳不关联
province: "广东省"
provinceCode: "44"

识别有误

山西省运城市绛县政府小区(林业局北) 李小 电话13933333333 这个地址识别会为 新绛县
本地处理方法将pcas-code.json,绛县放新绛县前面

识别问题

这个地址:黑龙江省 齐齐哈尔市 富裕县 富裕镇铁西区兴达街鑫春雷超市
识别出来后是:
address:镇铁西区兴达街鑫春雷超市

地址识别有误

待解析字符串:

张好[2568]
18411111111
广东省广州市南沙区东涌镇一建集团[2568]

实际解析结果

姓名:张好

电话:18411111111

邮编:

省:广东省

市:广州市

区:南沙区

街道:东涌镇

详细地址:2568东涌镇一建集团

预期解析结果:

姓名:张好[2568]

电话:18411111111

邮编:

省:广东省

市:广州市

区:南沙区

街道:东涌镇

详细地址:东涌镇一建集团[2568]

期望待解析字符串2568放到详细地址和姓名末尾

地址识别有问题

待解析字符串:

深圳市罗湖区金湖路,张xx,15012345678

实际解析结果

姓名:金湖路

电话:15012345678

邮编:

省:广东省

市:深圳市

区:罗湖区

街道:undefined

详细地址:张xx

期望待解析字符串姓名跟详细地址置换过来

姓名:张xx

电话:15012345678

邮编:

省:广东省

市:深圳市

区:罗湖区

街道:undefined

详细地址:金湖路

地址识别异常

福建省厦门市集美区后溪镇万科广场2号楼2601室 吴兴基 180******393

无法解析出地址,只能解析出姓名、号码、邮编、身份证

项目中引入pcasCode.js、zipCode.js、address_parse.js后在页面中使用smart(“陕西省西安市雁塔区丈八沟街道高新四路高新大都荟710061 刘国良 13593464918 211381198512096810”),,仅返回{"zipCode":"710061","name":"刘国良","phone":"13593464918","idCard":"211381198512096810"},没有返回地址解析结果。

js识别中山地址无法识别到区

地址:广东省中山市石岐区街道办事处豪程北路666号

期望:
省:广东省
市:中山市
区:石岐区街道办事处
详细地址:豪程北路666号

结果:
省:广东省
市:中山市
区:
详细地址:石岐区街道办事处豪程北路666号

通过你们的接口却可以识别正确

地址解析有问题,右翼中旗解析成右翼前旗,能否说明下如下代码作用是啥?

matchStreet.forEach((res) => {
res.index = 0;
matchStreet.forEach((el) => {
if (res.city == el.city) {
el.index++;
if (res.matchValue.length > el.matchValue.length) {
el.matchValue = res.matchValue;
}
}
});
});

这段代码是在干嘛?
解析地址(孙某某 15023332333 内蒙古自治区乌兰察布市察哈尔右翼中旗科布尔镇西域商城)时,会将 右翼中旗 解析成右翼前旗,就是因为这段代码导致。这段代码核心原理、**是啥?

Recommend Projects

  • React photo React

    A declarative, efficient, and flexible JavaScript library for building user interfaces.

  • Vue.js photo Vue.js

    🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.

  • Typescript photo Typescript

    TypeScript is a superset of JavaScript that compiles to clean JavaScript output.

  • TensorFlow photo TensorFlow

    An Open Source Machine Learning Framework for Everyone

  • Django photo Django

    The Web framework for perfectionists with deadlines.

  • D3 photo D3

    Bring data to life with SVG, Canvas and HTML. 📊📈🎉

Recommend Topics

  • javascript

    JavaScript (JS) is a lightweight interpreted programming language with first-class functions.

  • web

    Some thing interesting about web. New door for the world.

  • server

    A server is a program made to process requests and deliver data to clients.

  • Machine learning

    Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.

  • Game

    Some thing interesting about game, make everyone happy.

Recommend Org

  • Facebook photo Facebook

    We are working to build community through open source technology. NB: members must have two-factor auth.

  • Microsoft photo Microsoft

    Open source projects and samples from Microsoft.

  • Google photo Google

    Google ❤️ Open Source for everyone.

  • D3 photo D3

    Data-Driven Documents codes.