Abstract
This paper describes an approach for performing recognition and resolution of place names mentioned over the descriptive metadata records of typical digital libraries. Our approach exploits evidence provided by the existing structured attributes within the metadata records to support the place name recognition and resolution, in order to achieve better results than by just using lexical evidence from the textual values of these attributes. In metadata records, lexical evidence is very often insufficient for this task, since short sentences and simple expressions are predominant. Our implementation uses a dictionary based technique for recognition of place names (with names provided by Geonames), and machine learning for reasoning on the evidences and choosing a possible resolution candidate. The evaluation of our approach was performed in data sets with a metadata schema
rich in Dublin Core elements. Two evaluation methods were used. First, we used cross-validation, which showed that our solution is able to achieve a very high precision of 0,99 at 0,55 recall, or a recall of 0,79 at 0,86 precision. Second, we used a comparative evaluation with an existing commercial service, where our solution performed better on any confidence level (p<0,001).
Links
BibTeX (Download)
@inproceedings{freire2011metadata, title = {A metadata geoparsing system for place name recognition and resolution in metadata records}, author = { Nuno Freire and José Borbinha and Pável Calado and Bruno Martins}, url = {http://dl.acm.org/citation.cfm?id=1998140}, year = {2011}, date = {2011-01-01}, booktitle = {Proceedings of the 11th annual international ACM/IEEE joint conference on Digital libraries}, pages = {339--348}, organization = {ACM}, abstract = {This paper describes an approach for performing recognition and resolution of place names mentioned over the descriptive metadata records of typical digital libraries. Our approach exploits evidence provided by the existing structured attributes within the metadata records to support the place name recognition and resolution, in order to achieve better results than by just using lexical evidence from the textual values of these attributes. In metadata records, lexical evidence is very often insufficient for this task, since short sentences and simple expressions are predominant. Our implementation uses a dictionary based technique for recognition of place names (with names provided by Geonames), and machine learning for reasoning on the evidences and choosing a possible resolution candidate. The evaluation of our approach was performed in data sets with a metadata schema rich in Dublin Core elements. Two evaluation methods were used. First, we used cross-validation, which showed that our solution is able to achieve a very high precision of 0,99 at 0,55 recall, or a recall of 0,79 at 0,86 precision. Second, we used a comparative evaluation with an existing commercial service, where our solution performed better on any confidence level (p<0,001). }, keywords = {Entity Recognition, Entity Resolution, Geographic Information, Information extraction, Metadata}, pubstate = {published}, tppubtype = {inproceedings} }