Answer the question
In order to leave comments, you need to log in
Where can I find a suitable address parser?
There is a huge list of addresses as strings in the database.
You need to parse a string.
The key task is to put everything on the shelves.
I suspect there are probably ready-made solutions, libraries.
List example.
[
{
"reg_ex": "бульвар\\s+([А-яёъь 0-9\\.\\-]+)(,|$)",
"type": "бульвар",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\-]+)бульвар",
"type": "бульвар",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\-]+)пер($|,)",
"type": "переулок",
"group": 1
},
{
"reg_ex": "улица\\s+([А-яёъь 0-9\\.\\-]+)(,|$)",
"type": "улица",
"group": 1
},
{
"reg_ex": "ул\\.\\s+([А-яёъь 0-9\\.\\-]+)(,|$)",
"type": "улица",
"group": 1
},
{
"reg_ex": "(ул).\\s+([А-яёъь 0-9\\.\\-]+)($|,|д\\.)",
"type": "улица",
"group": 2
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\-]+)улица($|,)",
"type": "улица",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\-]+)ул\\.($|,)",
"type": "улица",
"group": 1
},
{
"reg_ex": "ул\\.([А-яёъь 0-9\\.\\-]+)",
"type": "улица",
"group": 1
},
{
"reg_ex": ",\\s+улица([A-zА-яёъь0-9\\.\\- ]+)",
"type": "улица",
"group": 1
},
{
"reg_ex": "переулок\\s+(.*?)($|,)",
"type": "переулок",
"group": 1
},
{
"reg_ex": ".*?,([A-zА-яёъь 0-9\\.\\-]+)переулок",
"type": "переулок",
"group": 1
},
{
"reg_ex": "площадь\\s+([A-zА-яёъь 0-9\\.\\-]+)($|,)",
"type": "площадь",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\-]+)площадь",
"type": "площадь",
"group": 1
},
{
"reg_ex": "проезд\\s+(.*?)($|,)",
"type": "проезд",
"group": 1
},
{
"reg_ex": ",([А-яёъь 0-9\\-]+)проезд",
"type": "проезд",
"group": 1
},
{
"reg_ex": ".*?,([A-zА-яёъь 0-9 \\-]+)пр.*?д",
"type": "проезд",
"group": 1
},
{
"reg_ex": ".*?,([A-zА-яёъь0-9\\.\\-]+)проезд",
"type": "проезд",
"group": 1
},
{
"reg_ex": ".*?,([A-zА-яёъь 0-9 \\-]+)переезд",
"type": "переезд",
"group": 1
},
{
"reg_ex": "шоссе\\s+(.*?)($|,)",
"type": "шоссе",
"group": 1
},
{
"reg_ex": ",\\s+([А-яA-z0-9 \\.\\-]+)\\s+ш.",
"type": "шоссе",
"group": 1
},
{
"reg_ex": ".*?,([A-zА-яёъь 0-9\\.\\-]+)шоссе($|,)",
"type": "шоссе",
"group": 1
},
{
"reg_ex": "проспект\\s+(.*?)($|,)",
"type": "проспект",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-я.0-9\\.\\- ]+)\\s+просп.",
"type": "проспект",
"group": 1
},
{
"reg_ex": ",\\s+пр.*?т\\s+([A-zА-я.0-9\\.\\- ]+)",
"type": "проспект",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\- ]+)проспект($|,)",
"type": "проспект",
"group": 1
},
{
"reg_ex": ",\\s+просп.\\s+([A-zА-я.0-9\\.\\- ]+)",
"type": "проспект",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9 \\-\\.]+)\\s+пр\\.",
"type": "проспект",
"group": 1
},
{
"reg_ex": "дорога\\s+(.*?)($|,)",
"type": "дорога",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-я.0-9\\.\\- ]+)\\s+дорога",
"type": "дорога",
"group": 1
},
{
"reg_ex": "набережная\\s+(.*?)($|,)",
"type": "набережная",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\- ]+)набережная($|,)",
"type": "набережная",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9\\.\\- ]+)магистраль($|,)",
"type": "магистраль",
"group": 1
},
{
"reg_ex": "квартал\\s+([А-яёъь 0-9\\.\\- ]+)($|,)",
"type": "квартал",
"group": 1
},
{
"reg_ex": ".*?аллея([А-яёъь 0-9\\.\\- ]+)",
"type": "аллея",
"group": 1
},
{
"reg_ex": ",\\s+аллея([А-я0-9\\.\\- ]+)",
"type": "аллея",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-я.0-9 ]+)\\s+аллея",
"type": "аллея",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9 \\-]+)тупик",
"type": "тупик",
"group": 1
},
{
"reg_ex": ".*?,([А-яёъь 0-9 \\-]+)парк",
"type": "парк",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-я0-9 ]+)\\s+просек",
"type": "просек",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-я.0-9 \\-\\.]+)\\s+тракт",
"type": "тракт",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9 \\-\\.]+)\\s+сквер",
"type": "сквер",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9 \\-\\.]+)\\s+пер\\.",
"type": "пер",
"group": 1
},
{
"reg_ex": ",\\s+(.*(линия)[А-я ]+)",
"type": "линия",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9\\-\\. ]+)\\s+линия",
"type": "линия",
"group": 1
},
{
"reg_ex": ",\\s+пр\\s+([A-zА-яёъь0-9\\-\\. ]+)",
"type": "проезд",
"group": 1
},
{
"reg_ex": ",\\s+посёлок\\s+([A-zА-яёъь0-9\\-\\. ]+)",
"type": "посёлок",
"group": 1
},
{
"reg_ex": ",\\s+пл\\.\\s+([A-zА-яёъь0-9\\-\\. ]+)",
"type": "площадь",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9\\-\\. ]+)спуск",
"type": "спуск",
"group": 1
},
{
"reg_ex": ",\\s+сквер\\s+([A-zА-яёъь0-9\\-\\. ]+)",
"type": "сквер",
"group": 1
},
{
"reg_ex": ",\\s+станция\\s+([A-zА-яёъь0-9\\-\\. ]+)",
"type": "станция",
"group": 1
},
{
"reg_ex": ",\\s+([A-zА-яёъь0-9\\-\\. ]+)(К|к)вартал",
"type": "квартал",
"group": 1
}
]
Answer the question
In order to leave comments, you need to log in
In your example, you have very clean neat addresses.
It is not very clear what you meant by "sort it out" and what kind of shelves you need.
The general approach for processing such arrays of loosely structured data is as follows.
Search on the github using the phrase FIAS
https://github.com/zabralex85/fias.parser
Of the bonuses, there will be an absolutely exact address, of the minuses, even the optimized database takes 10 gigs.
However, I shrunk the data to 100 kilobytes, but I only needed regions and cities
Didn't find what you were looking for?
Ask your questionAsk a Question
731 491 924 answers to any question