@inproceedings{detecting-collective-anomalies-from-multiple-spatio-temporal-datasets-across-different-domains, author = {Zheng, Yu and Zhang, Huichu and Yu, Yong}, title = {Detecting Collective Anomalies from Multiple Spatio- Temporal Datasets across Different Domains}, booktitle = {}, year = {2015}, month = {November}, abstract = { The collective anomaly denotes a collection of nearby locations that are anomalous during a few consecutive time intervals in terms of phenomena collectively witnessed by multiple datasets. The collective anomalies suggest there are underlying problems that may not be identified based on a single data source or in a single location. It also associates individual locations and time intervals, formulating a panoramic view of an event. To detect a collective anomaly is very challenging, however, as different datasets have different densities, distributions, and scales. Additionally, to find the spatio-temporal scope of a collective anomaly is very time consuming as there are many ways to combine regions and time slots. Our method consists of three components: Multiple-Source Latent-Topic (MSLT) model, Spatio-Temporal Likelihood Ratio Test (ST_LRT) model, and a candidate generation algorithm. MSLT combines multiple datasets to infer the latent functions of a geographic region in the framework of a topic model. In turn, a region’s latent functions help estimate the underlying distribution of a sparse dataset generated in the region. ST_LRT learns a proper underlying distribution for different datasets, and calculates an anomalous degree for each dataset based on a likelihood ratio test (LRT). It then aggregates the anomalous degrees of different datasets, using a skyline detection algorithm. We evaluate our method using five datasets related to New York City (NYC): 311 complaints, taxicab data, bike rental data, points of interest, and road network data, finding the anomalies that cannot be identified (or earlier than those detected) by a single dataset. Results show the advantages beyond six baseline methods. (Data) (Code)(PPT) }, publisher = {ACM SIGSPATIAL 2015}, url = {https://www.microsoft.com/en-us/research/publication/detecting-collective-anomalies-from-multiple-spatio-temporal-datasets-across-different-domains/}, address = {}, pages = {}, journal = {}, volume = {}, chapter = {}, isbn = {}, }