Segerer, Guillaume and Flavier, Sébastien. 2011-2016 RefLex: Reference Lexicon of Africa, Version 1.1. Paris, Lyon. http://reflex.cnrs.fr/ Borin, Lars, Shafqat Mumtaz Virk & Anju Saxena. 2017. Language technology for digital linguistics: turning the Linguistic Survey of India into a rich source of linguistic information. Paper presented at the 18th International Conference on Computational Linguistics and Intelligent Text Processing (CICLing), April 17 to 23, 2017, Budapest, Hungary. @MastersThesis{dream:Foster, author = {Daniel Foster}, title = {Automatic Frame-Semantic Parsing for Linguistic Descriptions: Extracting typological linguistic information from unstructured text}, school = {University of Gothenburg}, year = 2019 } @MastersThesis{dream:Aslam, author = {Aslam, Muhammad Irfan}, title = {Semantic frame based automatic extraction of typological information from descriptive grammars}, school = {University of Skövde}, year = 2019, url = {http://his.diva-portal.org/smash/record.jsf?pid=diva2%3A1371627&dswid=-9783} } @incollection{dream:Virk:Frame-Extraction, author = {Shafqat Mumtaz Virk and Azam Sheikh Muhammad and Lars Borin and Muhammad Irfan Aslam and Saania Iqbal and Nazia Khurram}, title = {Exploiting Frame-Semantics and Frame-Semantic Parsing for Automatic Extraction of Typological Information from Descriptive Grammars of Natural Languages}, booktitle = {Proceedings of RANLP 2019}, year = 2019 } @incollection{dream:Virk, author = {Shafqat Virk and Per Malm and Lars Borin and Anju Saxena}, title = {LingFN: A FrameNet for the Linguistics Domain}, booktitle = {Proceedings of CICLing 2019}, year = 2019 } @Misc{dream:WichmannRama, author = {S\o{}ren Wichmann and Taraka Rama}, title = {Towards unsupervised extraction of linguistic typological features from language descriptions}, howpublished = {First Workshop on Typology for Polyglot NLP, Florence, Aug. 1, 2019 (Co-located with ACL, July 28-Aug. 2, 2019)}, year = 2019 } @Misc{dream:Wichmann:UFMG, author = {S\o{}ren Wichmann and Harald Hammarström and Shafqat Virk}, title = {Information extraction of linguistic typological information from grammatical descriptions}, howpublished = {Presentation at the Universidade Federal de Minas Gerais, 4 Nov 2019}, year = 2019 } @Misc{dream:Wichmann:Dream, author = {S\o{}ren Wichmann}, title = {The DReaM Project: A dictionary/grammar reading machine}, howpublished = {Presentation at the Kazan University philological faculty}, year = 2017 } @Misc{dream:Hammarstrom:Extensa, author = {Harald Hammarstr\"om}, title = {¿Cuál es la gramática mas extensa? Ideas computacionales para medir la cantidad de una descripción gramatical de una lengua}, howpublished = {Presentation at the Pontificia Universidad Cat\'olica de Per\'u, 9 May 2019, Lima}, year = 2019 } @Misc{dream:Virk:LingFN-CLT, author = {Shafqat Virk and Lars Borin and Per Malm and Anju Saxena and Markus Forsberg and Harald Hammarstr\"om and M. Azam and M. Irfan}, title = {LingFN: a FrameNet for the Linguistics Domain}, howpublished = {Presentation at the CLT Retreat, 8 May 2019}, year = 2019 } @Misc{dream:Hammarstrom:Text-Mining, author = {Harald Hammarstr\"om and Shafqat Virk and Markus Forsberg}, title = {Text Mining on Grammatical Descriptions of the Languages of the World}, howpublished = {Presentation at the Infrastructural Tensions workshop, Uppsala, 29-30 Aug 2019}, year = 2019 } @Misc{Hammarstrom:Grammar-Grammars, author = {Harald Hammarstr\"om and Shafqat Virk and Markus Forsberg}, title = {Extracting Grammar from Grammars: From Raw-Text Descriptions to Grammatical Characteristics of the Languages of the World}, howpublished = {Presentation at the Computational Linguistics Seminar, Uppsala}, year = 2017 } @Misc{Hammarstrom:Waiheke2017, author = {Harald Hammarstr\"om and Shafqat Virk and Markus Forsberg}, title = {Automatically Filling in Grambank}, howpublished = {Presentation at the Glottobank meeting, Waiheke}, year = 2017 } @Misc{Macklin-Cordes, author = {Jayden L. Macklin-Cordes and Nathaniel L. Blackbourne and Thomas J. Bott and Jacqueline Cook and T. Mark Ellison and Jordan Hollis and Edith E. Kirlew and Genevieve C. Richards and Sanle Zhao and Erich R. Round}, title = {Robots who read grammars}, howpublished = {Poster presented at CoEDL Fest 2017, Alexandra Park Conference Centre, Alexandra Headlands, QLD}, year = 2017 } @book{el:Tsunoda:LELR, author = {Tasaku Tsunoda}, title = {Language Endangerment and Language Revitalization}, publisher = {Berlin: Mouton de Gruyter}, series = {Trends in Linguistics: Studies and Monographs}, volume = {148}, pages = {307}, year = {2005}, gbid = {t3MphHy-2g0C}, glottolog_ref_id = {4370}, hhtype = {socling}, inlg = {English [eng]}, isbn = {9783110184297}, oclc = {56614349} } @article{typ:Plank:WALS, author = {Frank Plank}, title = {WALS values evaluated}, journal = {Linguistic Typology}, volume = {13}, number = {1}, pages = {41-75}, year = {2009}, glottolog_ref_id = {454891}, hhtype = {specific_feature}, inlg = {English [eng]} } @article{typ:Polyakovetal:WALS-JM, author = {Vladimir N. Polyakov and Valery D. Solovyev and S\o{}ren Wichmann and Oleg Belyaev}, title = {Using WALS and Jazyki Mira}, journal = {Linguistic Typology}, volume = {13}, pages = {137-167}, year = {2009}, glottolog_ref_id = {153004}, hhtype = {specific_feature}, inlg = {English [eng]} } @inproceedings{W17-0119, author = "Littell, Patrick and Pine, Aidan and Davis, Henry", title = "Waldayu and Waldayu Mobile: Modern digital dictionary interfaces for endangered languages", booktile = {Proceedings of the 2nd Workshop on the Use of Computational Methods in the Study of Endangered Languages}, year = "2017", publisher = "Association for Computational Linguistics", pages = "141--150", location = "Honolulu", url = "http://www.aclweb.org/anthology/W17-0119" } @InProceedings{cl:Kamholz:PanLex, author = {David Kamholz and Jonathan Pool and Susan Colowick}, title = {PanLex: Building a Resource for Panlingual Lexical Translation}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {may}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, language = {english} } @inproceedings{lkl:Bender:From-IGT, author = {Bender, Emily M. and Crowgey, Joshua and Goodman, Michael Wayne and Xia, Fei}, title = {Learning Grammar Specifications from IGT: A Case Study of Chintang}, booktitle = {Proceedings of the 2014 Workshop on the Use of Computational Methods in the Study of Endangered Languages}, publisher = {Association for Computational Linguistics}, address = {Baltimore, Maryland, USA}, pages = {43--53}, year = {2014}, url = {http://www.aclweb.org/anthology/W/W14/W14-2206}, month = {June} } @incollection{ling:Bickel:Distributional-Typology, author = {Bickel, Balthasar}, editor = {Bernd Heine and Heiko Narrog}, title = {Distributional typology: statistical inquiries into the dynamics of linguistic diversity}, booktitle = {The Oxford Handbook of Linguistic Analysis}, publisher = {Oxford: Oxford University Press}, pages = {901-923}, year = {2015}, edition = {2} } @book{ir:Manning:IR, author = {Christopher D. Manning and Prabhakar Raghavan and Hinrich Schütze}, title = {Introduction to Information Retrieval}, publisher = {Cambridge: Cambridge University Press}, year = {2008} } @misc{ling:Cysouw:Typology-Types, author = {Cysouw, Michael}, title = {Typology without Types: Quantitatively inducing a Numeral Typology}, year = {2011}, url = {http://cysouw.de/home/presentations_files/cysouwALT9numerals.pdf}, howpublished = {Poster presented at the 9th biannual meeting of the Association for Linguistic Typology, ALT9, Hong Kong, China} } @misc{ling:Cooper:Warehouse, author = {Doug Cooper}, title = {Logistics of the Asia-Pacific Linguistic Data Warehouse}, year = {2014}, howpublished = {Paper presented at the Language Comparison with Linguistic Databases: RefLex and Typological Databases, 7-8 Oct 2014} } @incollection{ling:Dryer:Descriptive, author = {Dryer, Matthew S.}, editor = {Felix Ameka and Alan Dench and Nicholas Evans}, title = {Descriptive theories, explanatory theories, and basic linguistic theory}, booktitle = {Catching Language: Issues in Grammar Writing}, publisher = {Berlin: Mouton de Gruyter}, pages = {207-234}, year = {2006} } @incollection{hv:Guldemann:Africa:Macro-Areas, author = {Güldemann, Tom}, editor = {Lameli, Alfred and Kehrein, Roland and Rabanus, Stefan}, title = {"Sprachraum" and geography: Linguistic macro-areas in Africa}, booktitle = {Language and Space: An International Handbook of Linguistic Variation Volume 2: Language Mapping}, publisher = {Berlin: Mouton de Gruyter}, series = {Handbooks of Linguistics and Communication Science}, volume = {30/2}, pages = {561-585}, year = {2010}, fn = {africa\guldemann_sprachraum2010.pdf, africa\guldemann_sprachraum-africa2010.zip}, glottolog_ref_id = {22213}, hhtype = {overview;comparative}, inlg = {English [eng]}, macro_area = {Africa} } @incollection{ocr:Hammarstrom, author = {Harald Hammarström and Shafqat Mumtaz Virk and Markus Forsberg}, title = {Poor Man's OCR Post-Correction: Unsupervised Recognition of Variant Spelling Applied to a Multilingual Document Collection}, booktitle = {Proceedings of the Digital Access to Textual Cultural Heritage (DATeCH) conference}, publisher = {Göttingen: ACM}, pages = {71-75}, year = {2017} } @misc{typ:Hammarstrom:Three-Approaches, author = {Harald Hammarström}, title = {Three Approaches to Prefix and Suffix Statistics in the Languages of the World}, year = {2013}, howpublished = {Paper presented at the Workshop on Corpus-based Quantitative Typology (CoQuaT 2013)} } @book{ling:Harris:Structural, author = {Harris, Zellig S.}, title = {Methods in structural linguistics}, publisher = {Chicago: University of Chicago Press}, pages = {xv+384}, year = {1951} } @book{ling:Dryer:Word-Order, author = {Matthew Dryer}, title = {World Atlas of Word Order in Language}, publisher = {Oxford: Oxford University Press}, year = {forthcoming} } @article{typ:EvansLevinson:Universals, author = {Nicholas Evans and Stephen Levinson}, title = {The Myth of Language Universals: Language diversity and its importance for cognitive science}, journal = {Behavioral and Brain Sciences}, volume = {32}, number = {5}, pages = {429-492}, year = {2009}, glottolog_ref_id = {40416}, hhtype = {specific_feature}, inlg = {English [eng]} } @article{ling:Himmelmann:Suffixing:2014, author = {Nikolaus Himmelmann}, title = {Asymmetries in the prosodic phrasing of function words: Another look at the suffixing preference}, journal = {Language}, volume = {90}, number = {4}, pages = {927-960}, year = {2014} } @incollection{cl:Virk:Automatic-Extraction, author = {Virk, Shafqat Mumtaz and Lars Borin and Anju Saxena and Harald Hammarstr\"om}, editor = {Kamil Ek\v{s}tein and V\'aclav Matou\v{s}ek}, title = {Automatic Extraction of Typological Linguistic Features from Descriptive Grammars}, booktitle = {Text, Speech, and Dialogue: 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017, Proceedings}, publisher = {Berlin: Springer}, year = 2017, series = {Lecture Notes in Computer Science}, volume = 10415, pages = {111-119} } @inProceedings{Borin-Lars2016-253952, title = {Towards a Big Data View on South Asian Linguistic Diversity}, abstract = {South Asia with its rich and diverse linguistic tapestry of hundreds of languages, including many from four major language families, and a long history of intensive language contact, provides rich empirical data for studies of linguistic genealogy, linguistic typology, and language contact. South Asia is often referred to as a linguistic area, a region where, due to close contact and widespread multilingualism, languages have influenced one another to the extent that both related and unrelated languages are more similar on many linguistic levels than we would expect. However, with some rare exceptions, most studies are largely impressionistic, drawing examples from a few languages. In this paper we present our ongoing work aiming at turning the linguistic material available in Grierson’s Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia. In addition to this, we aim to contribute to the methodological development of large-scale comparative linguistics drawing on digital language resources, by exploring NLP techniques for extracting linguistic information from free-text language descriptions of the kind found in the LSI.}, booktitle = {WILDRE-3 – 3rd Workshop on Indian Language Data: Resources and Evaluation}, author = {Borin, Lars and Virk, Shafqat and Saxena, Anju}, year = {2016}, publisher = {ELRA}, adress = {Paris}, pages = {87-92} } @misc{cl:Nivre:UD20, author = {Nivre, Joakim and Agi\'c, \v{Z}eljko and Ahrenberg, Lars and Aranzabe, Maria Jesus}, title = {Universal Dependencies 2.0}, year = {2017}, url = {http://hdl.handle.net/11234/1-1983}, howpublished = {{LINDAT}/{CLARIN} digital library at the Institute of Formal and Applied Linguistics, Charles University in Prague} } @article{v:Segerer:RefLex, author = {Segerer, Guillaume}, title = {RefLex: la reconstruction sans peine}, journal = {Faits de Langues}, volume = {47}, pages = {201-214}, year = {2016}, fn = {africa\segerer_reflex2016.pdf}, glottolog_ref_id = {552072}, hhtype = {comparative}, macro_area = {Africa} } @misc{cl:Virk:TextCat, author = {Shafqat Virk and Markus Forsberg and Harald Hammarström}, title = {TextCat for Language Profiling}, year = {2017}, howpublished = {Submitted} } @Article{cl:Xia:ODIN, author = {Fei Xia and William D. Lewis and Michael Wayne Goodman and Glenn Slayden and Ryan Georgi and Joshua Crowgey and Emily M. Bender}, title = {Enriching a massively multilingual database of interlinear glossed text}, journal = {Language Resources and Evaluation}, year = 2016, volume = 50, number = 2, pages = {1-29} } @incollection{cl:Mikolov:Words-Phrases, author = {Tomas Mikolov and Ilya Sutskever and Kai Chen and Gregory S. Corrado and Jeffrey Dean}, editor = {Christopher J. C. Burges and L{é}on Bottou and Zoubin Ghahramani and Kilian Q. Weinberger}, title = {Distributed Representations of Words and Phrases and their Compositionality}, booktitle = {Advances in Neural Information Processing Systems 26 (NIPS 2013)}, publisher = {Neural Information Processing Systems}, address = {Lake Tahoe, Nevada}, pages = {3111-3119}, year = {2013}, url = {http://papers.nips.cc/paper/5021-distributed-representations-of-words-and-phrases-and-their-compositionality}, bibsource = {dblp computer science bibliography, http://dblp.org}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/nips/MikolovSCCD13}, timestamp = {Thu, 07 May 2015 20:02:01 +0200} } @book{ling:deSaussure:Generale, author = {de Saussure, Ferdinand}, title = {Cours de linguistique générale}, publisher = {Paris: Payot}, year = {1916} }