Abstract
This paper presents a machine learning method for disambiguating place references in text. Solving this task can have important applications in
the digital humanities and computational social sciences, by supporting the geospatial analysis of large document collections. We combine multiple
features that capture the similarity between candidate disambiguations, the place references, and the context where the place references occur, in order to rank and
choose from a set of candidate disambiguations, obtained from a knowledge base containing geospatial coordinates and textual descriptions for different places from all around the world. The proposed method was evaluated through English corpora used in previous work in this area, and also with a subset of the English Wikipedia. Experimental results demonstrate that the proposed method is indeed effective, showing that out-of-the-box learning algorithms and
relatively simple features can obtain a high accuracy in this task.
Links
BibTeX (Download)
@article{santos2014using, title = {Using machine learning methods for disambiguating place references in textual documents}, author = { João Santos and Ivo Anastácio and Bruno Martins}, url = {http://dx.doi.org/10.1007/s10708-014-9553-y}, issn = {0343-2521}, year = {2014}, date = {2014-01-01}, journal = {GeoJournal}, pages = {1--18}, publisher = {Springer}, abstract = {This paper presents a machine learning method for disambiguating place references in text. Solving this task can have important applications in the digital humanities and computational social sciences, by supporting the geospatial analysis of large document collections. We combine multiple features that capture the similarity between candidate disambiguations, the place references, and the context where the place references occur, in order to rank and choose from a set of candidate disambiguations, obtained from a knowledge base containing geospatial coordinates and textual descriptions for different places from all around the world. The proposed method was evaluated through English corpora used in previous work in this area, and also with a subset of the English Wikipedia. Experimental results demonstrate that the proposed method is indeed effective, showing that out-of-the-box learning algorithms and relatively simple features can obtain a high accuracy in this task.}, keywords = {Entity linking in text, Geographic Text Mining and Retrieval, Learning to Rank, Place Reference Disambiguation}, pubstate = {published}, tppubtype = {article} }