Welcome! I have an interest towards science and technologies related to human intelligence and natural languages. I have been working in the field of natural language processing and information retrieval with a focus on multilingual and semantic processing of different forms of text (articles, queries, tweets, etc).
Currently, I am principle research engineer at Apple, doing research and development work on Siri Search and Assistant. Also in spring 2024, I am teaching UC-Berkeley's NLP course. Prior to this, I was manager of data science at Ask.com, research scientist at International Computer Science Institute (ICSI) and visiting faculty at Carnegie Mellon University in Qatar .
I received my PhD from the Intelligent Systems Program at University of Pittsburgh, USA where I worked with Rebecca Hwa. My dissertation was titled Locating and Reducing Translation Difficulty. This was a statistical framework to find the problematic outputs of a machine translation system and adapting the MT system to reduce those problems.
I follow independent cinema, classical and film music in my leisure time.
Please feel free to reach me at behrangm AT berkeley DOT edu
@incollection{mohit-SemiticNERChapter, author = {Behrang Mohit}, title = {Named Entity Recognition}, booktitle = {Natural Language Processing of Semitic Languages}, year = {2014}, editor = {Imed Zitouni}, publisher = {Springer, USA}, isbn = {978-3642453571} }
@inproceedings{SALAMA14.558.L14-1456, author = {Ahmed Salama and Houda Bouamor and Behrang Mohit and Kemal Oflazer}, url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/558_Paper.pdf}, note = {ACL Anthology Identifier: L14-1456}, title = {YouDACC: the Youtube Dialectal Arabic Comment Corpus}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {May}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, language = {english}, pages = {1246--1251} }
@inproceedings{ZAGHOUANI14.956.L14-1721, author = {Wajdi Zaghouani and Behrang Mohit and Nizar Habash and Ossama Obeid and Nadi Tomeh and Alla Rozovskaya and Noura Farra and Sarah Alkuhlani and Kemal Oflazer}, url = {http://www.lrec-conf.org/proceedings/lrec2014/pdf/956_Paper.pdf}, note = {ACL Anthology Identifier: L14-1721}, title = {Large Scale Arabic Error Annotation: Guidelines and Framework}, booktitle = {Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC'14)}, year = {2014}, month = {May}, date = {26-31}, address = {Reykjavik, Iceland}, editor = {Nicoletta Calzolari and Khalid Choukri and Thierry Declerck and Hrafn Loftsson and Bente Maegaard and Joseph Mariani and Asuncion Moreno and Jan Odijk and Stelios Piperidis}, publisher = {European Language Resources Association (ELRA)}, isbn = {978-2-9517408-8-4}, language = {english}, }
@InProceedings{almannai-EtAl:2014:SemEval, author = {Al-Mannai, Kamla and Alshikhabobakr, Hanan and Bin Wasi, Sabih and Neyaz, Rukhsar and Bouamor, Houda and Mohit, Behrang}, title = {CMUQ-Hybrid: Sentiment Classification By Feature Engineering and Parameter Tuning}, booktitle = {Proceedings of the 8th International Workshop on Semantic Evaluation (SemEval 2014)}, month = {August}, year = {2014}, address = {Dublin, Ireland}, publisher = {Association for Computational Linguistics and Dublin City University}, pages = {181--185}, url = {http://www.aclweb.org/anthology/S14-2028} }
@InProceedings{bouamor-mohit-oflazer:2013:IJCNLP, author = {Bouamor, Houda and Mohit, Behrang and Oflazer, Kemal}, title = {SuMT: A Framework of Summarization and MT}, booktitle = {Proceedings of the Sixth International Joint Conference on Natural Language Processing}, month = {October}, year = {2013}, address = {Nagoya, Japan}, publisher = {Asian Federation of Natural Language Processing}, pages = {270--278}, url = {http://www.aclweb.org/anthology/I13-1031} }
@InProceedings{obeid-EtAl:2013:IJCNLP2013-Demos, author = {Obeid, Ossama and Zaghouani, Wajdi and Mohit, Behrang and Habash, Nizar and Oflazer, Kemal and Tomeh, Nadi}, title = {A Web-based Annotation Framework For Large-Scale Text Correction}, booktitle = {The Companion Volume of the Proceedings of IJCNLP 2013: System Demonstrations}, month = {October}, year = {2013}, address = {Nagoya, Japan}, publisher = {Asian Federation of Natural Language Processing}, pages = {1--4}, url = {http://www.aclweb.org/anthology/I13-2001} }
@InProceedings{Azab2013-1, title = "{Dudley North visits North London: Learning When to Transliterate to Arabic}", author = "Mahmoud Mahmoud Azab, Houda Bouamor, Behrang Mohit and Kemal Oflazer", booktitle = "Proceedings of NAACL 2013", publisher = "ACL", year = "2013", month = "January" }
@InProceedings{Schneider2013-1, title = "{Supersense Tagging for Arabic: the MT-in-the-Middle Attack}", author = "Nathan Schneider and Behrang Mohit and Chris Dyer and Kemal Oflazer and Noah A. Smith", booktitle = "Proceedings of NAACL 2013", publisher = "ACL", year = "2013", month = "January" }
@InProceedings{mohitEtAlEacl2012, title = "{Recall-Oriented Learning of Named Entities in Arabic Wikipedia}", author = "Behrang Mohit and Nathan Schneider and Rishav Bhowmick and Kemal Oflazer and Noah A. Smith", booktitle = "Proceedings of the 13th Conference of the European Chapter of the Association for the Computational Linguistics --- EACL", year = "2012", }
@InProceedings{MOHAMED12.465, title = "{Annotating and Learning Morphological Segmentation of Egyptian Colloquial Arabic}", author = "Emad Mohamed and Behrang Mohit and Kemal Oflazer", booktitle = "Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)", editor = "Nicoletta Calzolari (Conference Chair) and Khalid Choukri and Thierry Declerck and Mehmet Uğur Doğan and Bente Maegaard and Joseph Mariani and Jan Odijk and Stelios Piperidis", address = "Istanbul, Turkey", publisher = "European Language Resources Association (ELRA)", year = "2012", month = "may" }
@InProceedings{schneider-EtAl:2012:ACL2012short, title = "{Coarse Lexical Semantic Annotation with Supersenses: An Arabic Case Study}", author = "Schneider, Nathan and Mohit, Behrang and Oflazer, Kemal and Smith, Noah A.", booktitle = "Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", pages = "253--258", address = "Jeju Island, Korea", publisher = "Association for Computational Linguistics", year = "2012", month = "July" }
@InProceedings{mohamed-mohit-oflazer:2012:ACL2012short, title = "{Transforming Standard Arabic to Colloquial Arabic}", author = "Mohamed, Emad and Mohit, Behrang and Oflazer, Kemal", booktitle = "Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)", pages = "176--180", address = "Jeju Island, Korea", publisher = "Association for Computational Linguistics", year = "2012", month = "July" }
@InProceedings{liberato-mohit-hwa:2010:NAACLHLT, author = {Liberato, Frank and Mohit, Behrang and Hwa, Rebecca}, title = {Improving Phrase-Based Translation with Prototypes of Short Phrases}, booktitle = {Human Language Technologies: The 2010 Annual Conference of the North American Chapter of the Association for Computational Linguistics}, month = {June}, year = {2010}, address = {Los Angeles, California}, publisher = {Association for Computational Linguistics}, pages = {301--304} }
@inproceedings{Mohit:2007:LDP:1626355.1626392, author = {Mohit, Behrang and Hwa, Rebecca}, title = {Localization of difficult-to-translate phrases}, booktitle = {Proceedings of the Second Workshop on Statistical Machine Translation}, series = {StatMT '07}, year = {2007}, location = {Prague, Czech Republic}, pages = {248--255}, numpages = {8}, url = {http://dl.acm.org/citation.cfm?id=1626355.1626392}, acmid = {1626392}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, USA}, }
@inproceedings{riesa2006building, title={Building an English-iraqi Arabic machine translation system for spoken utterances with limited resources.}, author={Riesa, Jason and Mohit, Behrang and Knight, Kevin and Marcu, Daniel}, booktitle={INTERSPEECH}, year={2006} }
@inproceedings{Mohit:2005:SSN:1225753.1225768, author = {Mohit, Behrang and Hwa, Rebecca}, title = {Syntax-based semi-supervised named entity tagging}, booktitle = {Proceedings of the ACL 2005 on Interactive poster and demonstration sessions}, series = {ACLdemo '05}, year = {2005}, location = {Ann Arbor, Michigan}, pages = {57--60}, numpages = {4}, url = {http://dx.doi.org/10.3115/1225753.1225768}, doi = {10.3115/1225753.1225768}, acmid = {1225768}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, USA}, }
@inproceedings{Mohit:2003:SEW:1073483.1073505, author = {Mohit, Behrang and Narayanan, Srini}, title = {Semantic extraction with wide-coverage lexical resources}, booktitle = {Proceedings of the 2003 Conference of the North American Chapter of the Association for Computational Linguistics on Human Language Technology: companion volume of the Proceedings of HLT-NAACL 2003--short papers - Volume 2}, series = {NAACL-Short '03}, year = {2003}, location = {Edmonton, Canada}, pages = {64--66}, numpages = {3}, url = {http://dx.doi.org/10.3115/1073483.1073505}, doi = {10.3115/1073483.1073505}, acmid = {1073505}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, USA}, }
@inproceedings{Kantrowitz:2000:SET:345508.345650, author = {Kantrowitz, Mark and Mohit, Behrang and Mittal, Vibhu}, title = {Stemming and its effects on TFIDF ranking (poster session)}, booktitle = {Proceedings of the 23rd annual international ACM SIGIR conference on Research and development in information retrieval}, series = {SIGIR '00}, year = {2000}, isbn = {1-58113-226-3}, location = {Athens, Greece}, pages = {357--359}, numpages = {3}, url = {http://doi.acm.org/10.1145/345508.345650}, doi = {10.1145/345508.345650}, acmid = {345650}, publisher = {ACM}, address = {New York, NY, USA}, }