2021 |
Mitigating the Position Bias of Transformer Models in Passage Re-RankingInproceedingsHofstätter, Sebastian; Lipani, Aldo; Althammer, Sophia; Zlabinger, Markus; Hanbury, Allan 2021. @inproceedings{hofstaetter21, title = {Mitigating the Position Bias of Transformer Models in Passage Re-Ranking}, author = {Sebastian Hofst\"{a}tter and Aldo Lipani and Sophia Althammer and Markus Zlabinger and Allan Hanbury}, url = {https://www.researchgate.net/publication/348589683_Mitigating_the_Position_Bias_of_Transformer_Models_in_Passage_Re-Ranking}, year = {2021}, date = {2021-03-28}, series = {ECIR}, abstract = {Supervised machine learning models and their evaluation strongly depends on the quality of the underlying dataset. When we search for a relevant piece of information it may appear anywhere in a given passage. However, we observe a bias in the position of the correct answer in the text in two popular Question Answering datasets used for passage re-ranking. The excessive favoring of earlier positions inside passages is an unwanted artefact. This leads to three common Transformer-based re-ranking models to ignore relevant parts in unseen passages. More concerningly, as the evaluation set is taken from the same biased distribution, the models overfitting to that bias overestimate their true effectiveness. In this work we analyze position bias on datasets, the contextualized representations, and their effect on retrieval results. We propose a debiasing method for retrieval datasets. Our results show that a model trained on a position-biased dataset exhibits a significant decrease in re-ranking effectiveness when evaluated on a debiased dataset. We demonstrate that by mitigating the position bias, Transformer-based re-ranking models are equally effective on a biased and debiased dataset, as well as more effective in a transfer-learning setting between two differently biased datasets.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Supervised machine learning models and their evaluation strongly depends on the quality of the underlying dataset. When we search for a relevant piece of information it may appear anywhere in a given passage. However, we observe a bias in the position of the correct answer in the text in two popular Question Answering datasets used for passage re-ranking. The excessive favoring of earlier positions inside passages is an unwanted artefact. This leads to three common Transformer-based re-ranking models to ignore relevant parts in unseen passages. More concerningly, as the evaluation set is taken from the same biased distribution, the models overfitting to that bias overestimate their true effectiveness. In this work we analyze position bias on datasets, the contextualized representations, and their effect on retrieval results. We propose a debiasing method for retrieval datasets. Our results show that a model trained on a position-biased dataset exhibits a significant decrease in re-ranking effectiveness when evaluated on a debiased dataset. We demonstrate that by mitigating the position bias, Transformer-based re-ranking models are equally effective on a biased and debiased dataset, as well as more effective in a transfer-learning setting between two differently biased datasets. |
Predicting Non-Residential Building Fire Risk Using Geospatial Information and Convolutional Neural NetworksJournal ArticleAnderson-Bell, Jake; Schillaci, Calogero; Lipani, Aldo Remote Sensing Applications: Society and Environment, 2021. @article{AndersonBell2021, title = {Predicting Non-Residential Building Fire Risk Using Geospatial Information and Convolutional Neural Networks}, author = {Jake Anderson-Bell and Calogero Schillaci and Aldo Lipani}, year = {2021}, date = {2021-01-01}, journal = {Remote Sensing Applications: Society and Environment}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Evaluation metrics for measuring bias in search engine resultsJournal ArticleGezici, Gizem; Lipani, Aldo; Saygin, Yucel; Yilmaz, Emine Information Retrieval Journal, pp. 1–29, 2021, ISSN: 1386-4564. @article{Gezici2021, title = {Evaluation metrics for measuring bias in search engine results}, author = {Gizem Gezici and Aldo Lipani and Yucel Saygin and Emine Yilmaz}, url = {http://link.springer.com/10.1007/s10791-020-09386-w}, doi = {10.1007/s10791-020-09386-w}, issn = {1386-4564}, year = {2021}, date = {2021-01-01}, journal = {Information Retrieval Journal}, pages = {1--29}, publisher = {Springer}, abstract = {Search engines decide what we see for a given search query. Since many people are exposed to information through search engines, it is fair to expect that search engines are neutral. However, search engine results do not necessarily cover all the viewpoints of a search query topic, and they can be biased towards a specific view since search engine results are returned based on relevance, which is calculated using many features and sophisticated algorithms where search neutrality is not necessarily the focal point. Therefore, it is important to evaluate the search engine results with respect to bias. In this work we propose novel web search bias evaluation measures which take into account the rank and relevance. We also propose a framework to evaluate web search bias using the proposed measures and test our framework on two popular search engines based on 57 controversial query topics such as abortion, medical marijuana, and gay marriage. We measure the stance bias (in support or against), as well as the ideological bias (conservative or liberal). We observe that the stance does not necessarily correlate with the ideological leaning, e.g. a positive stance on abortion indicates a liberal leaning but a positive stance on Cuba embargo indicates a conservative leaning. Our experiments show that neither of the search engines suffers from stance bias. However, both search engines suffer from ideological bias, both favouring one ideological leaning to the other, which is more significant from the perspective of polarisation in our society.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Search engines decide what we see for a given search query. Since many people are exposed to information through search engines, it is fair to expect that search engines are neutral. However, search engine results do not necessarily cover all the viewpoints of a search query topic, and they can be biased towards a specific view since search engine results are returned based on relevance, which is calculated using many features and sophisticated algorithms where search neutrality is not necessarily the focal point. Therefore, it is important to evaluate the search engine results with respect to bias. In this work we propose novel web search bias evaluation measures which take into account the rank and relevance. We also propose a framework to evaluate web search bias using the proposed measures and test our framework on two popular search engines based on 57 controversial query topics such as abortion, medical marijuana, and gay marriage. We measure the stance bias (in support or against), as well as the ideological bias (conservative or liberal). We observe that the stance does not necessarily correlate with the ideological leaning, e.g. a positive stance on abortion indicates a liberal leaning but a positive stance on Cuba embargo indicates a conservative leaning. Our experiments show that neither of the search engines suffers from stance bias. However, both search engines suffer from ideological bias, both favouring one ideological leaning to the other, which is more significant from the perspective of polarisation in our society. |
Variational-LSTM autoencoder to forecast the spread of coronavirus across the globeJournal ArticleIbrahim, Mohamed R; Haworth, James; Lipani, Aldo; Aslam, Nilufer; Cheng, Tao; Christie, Nicola PLOS ONE, 16 (1), pp. 1-22, 2021. @article{10.1371/journal.pone.0246120, title = {Variational-LSTM autoencoder to forecast the spread of coronavirus across the globe}, author = {Mohamed R Ibrahim and James Haworth and Aldo Lipani and Nilufer Aslam and Tao Cheng and Nicola Christie}, url = {https://doi.org/10.1371/journal.pone.0246120 https://www.researchgate.net/publication/348855260_Variational-LSTM_autoencoder_to_forecast_the_spread_of_coronavirus_across_the_globe}, doi = {10.1371/journal.pone.0246120}, year = {2021}, date = {2021-01-01}, journal = {PLOS ONE}, volume = {16}, number = {1}, pages = {1-22}, publisher = {Public Library of Science}, abstract = {Modelling the spread of coronavirus globally while learning trends at global and country levels remains crucial for tackling the pandemic. We introduce a novel variational-LSTM Autoencoder model to predict the spread of coronavirus for each country across the globe. This deep Spatio-temporal model does not only rely on historical data of the virus spread but also includes factors related to urban characteristics represented in locational and demographic data (such as population density, urban population, and fertility rate), an index that represents the governmental measures and response amid toward mitigating the outbreak (includes 13 measures such as: 1) school closing, 2) workplace closing, 3) cancelling public events, 4) close public transport, 5) public information campaigns, 6) restrictions on internal movements, 7) international travel controls, 8) fiscal measures, 9) monetary measures, 10) emergency investment in health care, 11) investment in vaccines, 12) virus testing framework, and 13) contact tracing). In addition, the introduced method learns to generate a graph to adjust the spatial dependences among different countries while forecasting the spread. We trained two models for short and long-term forecasts. The first one is trained to output one step in future with three previous timestamps of all features across the globe, whereas the second model is trained to output 10 steps in future. Overall, the trained models show high validation for forecasting the spread for each country for short and long-term forecasts, which makes the introduce method a useful tool to assist decision and policymaking for the different corners of the globe.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Modelling the spread of coronavirus globally while learning trends at global and country levels remains crucial for tackling the pandemic. We introduce a novel variational-LSTM Autoencoder model to predict the spread of coronavirus for each country across the globe. This deep Spatio-temporal model does not only rely on historical data of the virus spread but also includes factors related to urban characteristics represented in locational and demographic data (such as population density, urban population, and fertility rate), an index that represents the governmental measures and response amid toward mitigating the outbreak (includes 13 measures such as: 1) school closing, 2) workplace closing, 3) cancelling public events, 4) close public transport, 5) public information campaigns, 6) restrictions on internal movements, 7) international travel controls, 8) fiscal measures, 9) monetary measures, 10) emergency investment in health care, 11) investment in vaccines, 12) virus testing framework, and 13) contact tracing). In addition, the introduced method learns to generate a graph to adjust the spatial dependences among different countries while forecasting the spread. We trained two models for short and long-term forecasts. The first one is trained to output one step in future with three previous timestamps of all features across the globe, whereas the second model is trained to output 10 steps in future. Overall, the trained models show high validation for forecasting the spread for each country for short and long-term forecasts, which makes the introduce method a useful tool to assist decision and policymaking for the different corners of the globe. |
Learning Neural Point Processes with Latent GraphsInproceedingsZhang, Qiang; Lipani, Aldo; Yilmaz, Emine Proceedings of the World Wide Web Conference, 2021. @inproceedings{Zhang2021, title = {Learning Neural Point Processes with Latent Graphs}, author = {Qiang Zhang and Aldo Lipani and Emine Yilmaz}, url = {https://www.researchgate.net/publication/349380768_Learning_Neural_Point_Processes_with_Latent_Graphs}, doi = {https://doi.org/10.1145/3442381.3450135}, year = {2021}, date = {2021-01-01}, booktitle = {Proceedings of the World Wide Web Conference}, series = {WWW '21}, abstract = {Neural point processes (NPPs) employ neural networks to capture complicated dynamics of asynchronous event sequences. Existing NPPs feed all history events into neural networks, assuming that all event types contribute to the prediction of the target type. However, this assumption can be problematic because in reality some event types do not contribute to the predictions of another type. To correct this defect, we learn to omit those types of events that do not contribute to the prediction of one target type during the formulation of NPPs. Towards this end, we simultaneously consider the tasks of (1) finding event types that contribute to predictions of the target types and (2) learning a NPP model from event sequences. For the former, we formulate a latent graph, with event types being vertices and non-zero contributing relationships being directed edges; then we propose a probabilistic graph generator, from which we sample a latent graph. For the latter, the sampled graph can be readily used as a plug-in to modify an existing NPP model. Because these two tasks are nested, we propose to optimize the model parameters through bilevel programming, and develop an efficient solution based on truncated gradient back-propagation. Experimental results on both synthetic and real-world datasets show the improved performance against state-of-the-art baselines. This work removes disturbance of non-contributing event types with the aid of a validation procedure, similar to the practice to mitigate overfitting used when training machine learning models.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Neural point processes (NPPs) employ neural networks to capture complicated dynamics of asynchronous event sequences. Existing NPPs feed all history events into neural networks, assuming that all event types contribute to the prediction of the target type. However, this assumption can be problematic because in reality some event types do not contribute to the predictions of another type. To correct this defect, we learn to omit those types of events that do not contribute to the prediction of one target type during the formulation of NPPs. Towards this end, we simultaneously consider the tasks of (1) finding event types that contribute to predictions of the target types and (2) learning a NPP model from event sequences. For the former, we formulate a latent graph, with event types being vertices and non-zero contributing relationships being directed edges; then we propose a probabilistic graph generator, from which we sample a latent graph. For the latter, the sampled graph can be readily used as a plug-in to modify an existing NPP model. Because these two tasks are nested, we propose to optimize the model parameters through bilevel programming, and develop an efficient solution based on truncated gradient back-propagation. Experimental results on both synthetic and real-world datasets show the improved performance against state-of-the-art baselines. This work removes disturbance of non-contributing event types with the aid of a validation procedure, similar to the practice to mitigate overfitting used when training machine learning models. |
New pedotransfer approaches to predict soil bulk density using WoSIS soil data and environmental covariates in Mediterranean agro-ecosystemsJournal ArticleSchillaci, Calogero; Perego, Alessia; Valkama, Elena; Märker, Michael; Saia, Sergio; Veronesi, Fabio; Lipani, Aldo; Lombardo, Luigi; Tadiello, Tommaso; Gamper, Hannes A; Tedone, Luigi; Moss, Cami; Pareja-Serrano, Elena; Amato, Gabriele; Kühl, Kersten; Dămătîrcă, Claudia; Cogato, Alessia; Mzid, Nada; Eeswaran, Rasu; Rabelo, Marya; Sperandio, Giorgio; Bosino, Alberto; Bufalini, Margherita; Tunçay, Tülay; Ding, Jianqi; Fiorentini, Marco; Tiscornia, Guadalupe; Conradt, Sarah; Botta, Marco; Acutis, Marco Science of The Total Environment, 780 , pp. 146609, 2021, ISSN: 0048-9697. @article{SCHILLACI2021146609, title = {New pedotransfer approaches to predict soil bulk density using WoSIS soil data and environmental covariates in Mediterranean agro-ecosystems}, author = {Calogero Schillaci and Alessia Perego and Elena Valkama and Michael M\"{a}rker and Sergio Saia and Fabio Veronesi and Aldo Lipani and Luigi Lombardo and Tommaso Tadiello and Hannes A Gamper and Luigi Tedone and Cami Moss and Elena Pareja-Serrano and Gabriele Amato and Kersten K\"{u}hl and Claudia D\u{a}m\u{a}t\^{i}rc\u{a} and Alessia Cogato and Nada Mzid and Rasu Eeswaran and Marya Rabelo and Giorgio Sperandio and Alberto Bosino and Margherita Bufalini and T\"{u}lay Tun\c{c}ay and Jianqi Ding and Marco Fiorentini and Guadalupe Tiscornia and Sarah Conradt and Marco Botta and Marco Acutis}, url = {https://www.sciencedirect.com/science/article/pii/S0048969721016776}, doi = {https://doi.org/10.1016/j.scitotenv.2021.146609}, issn = {0048-9697}, year = {2021}, date = {2021-01-01}, journal = {Science of The Total Environment}, volume = {780}, pages = {146609}, abstract = {For the estimation of the soil organic carbon stocks, bulk density (BD) is a fundamental parameter but measured data are usually not available especially when dealing with legacy soil data. It is possible to estimate BD by applying pedotransfer function (PTF). We applied different estimation methods with the aim to define a suitable PTF for BD of arable land for the Mediterranean Basin, which has peculiar climate features that may influence the soil carbon sequestration. To improve the existing BD estimation methods, we used a set of public climatic and topographic data along with the soil texture and organic carbon data. The present work consisted of the following steps: i) development of three PTFs models separately for top (0\textendash0.4 m) and subsoil (0.4\textendash1.2 m), ii) a 10-fold cross-validation, iii) model transferability using an external dataset derived from published data. The development of the new PTFs was based on the training dataset consisting of World Soil Information Service (WoSIS) soil profile data, climatic data from WorldClim at 1 km spatial resolution and Shuttle Radar Topography Mission (SRTM) digital elevation model at 30 m spatial resolution. The three PTFs models were developed using: Multiple Linear Regression stepwise (MLR-S), Multiple Linear Regression backward stepwise (MLR-BS), and Artificial Neural Network (ANN). The predictions of the newly developed PTFs were compared with the BD calculated using the PTF proposed by Manrique and Jones (MJ) and the modelled BD derived from the global SoilGrids dataset. For the topsoil training dataset (N = 129), MLR-S, MLR-BS and ANN had a R2 0.35, 0.58 and 0.86, respectively. For the model transferability, the three PTFs applied to the external topsoil dataset (N = 59), achieved R2 values of 0.06, 0.03 and 0.41. For the subsoil training dataset (N = 180), MLR-S, MLR-BS and ANN the R2 values were 0.36, 0.46 and 0.83, respectively. When applied to the external subsoil dataset (N = 29), the R2 values were 0.05, 0.06 and 0.41. The cross-validation for both top and subsoil dataset, resulted in an intermediate performance compared to calibration and validation with the external dataset. The new ANN PTF outperformed MLR-S, MLR-BS, MJ and SoilGrids approaches for estimating BD. Further improvements may be achieved by additionally considering the time of sampling, agricultural soil management and cultivation practices in predictive models.}, keywords = {}, pubstate = {published}, tppubtype = {article} } For the estimation of the soil organic carbon stocks, bulk density (BD) is a fundamental parameter but measured data are usually not available especially when dealing with legacy soil data. It is possible to estimate BD by applying pedotransfer function (PTF). We applied different estimation methods with the aim to define a suitable PTF for BD of arable land for the Mediterranean Basin, which has peculiar climate features that may influence the soil carbon sequestration. To improve the existing BD estimation methods, we used a set of public climatic and topographic data along with the soil texture and organic carbon data. The present work consisted of the following steps: i) development of three PTFs models separately for top (0–0.4 m) and subsoil (0.4–1.2 m), ii) a 10-fold cross-validation, iii) model transferability using an external dataset derived from published data. The development of the new PTFs was based on the training dataset consisting of World Soil Information Service (WoSIS) soil profile data, climatic data from WorldClim at 1 km spatial resolution and Shuttle Radar Topography Mission (SRTM) digital elevation model at 30 m spatial resolution. The three PTFs models were developed using: Multiple Linear Regression stepwise (MLR-S), Multiple Linear Regression backward stepwise (MLR-BS), and Artificial Neural Network (ANN). The predictions of the newly developed PTFs were compared with the BD calculated using the PTF proposed by Manrique and Jones (MJ) and the modelled BD derived from the global SoilGrids dataset. For the topsoil training dataset (N = 129), MLR-S, MLR-BS and ANN had a R2 0.35, 0.58 and 0.86, respectively. For the model transferability, the three PTFs applied to the external topsoil dataset (N = 59), achieved R2 values of 0.06, 0.03 and 0.41. For the subsoil training dataset (N = 180), MLR-S, MLR-BS and ANN the R2 values were 0.36, 0.46 and 0.83, respectively. When applied to the external subsoil dataset (N = 29), the R2 values were 0.05, 0.06 and 0.41. The cross-validation for both top and subsoil dataset, resulted in an intermediate performance compared to calibration and validation with the external dataset. The new ANN PTF outperformed MLR-S, MLR-BS, MJ and SoilGrids approaches for estimating BD. Further improvements may be achieved by additionally considering the time of sampling, agricultural soil management and cultivation practices in predictive models. |
How Am I Doing?: Evaluating Conversational Search Systems OfflineJournal ArticleLipani, Aldo; Carterette, Ben; Yilmaz, Emine ACM Transactions on Information Systems (TOIS), 2021. @article{Lipani2021TOIS, title = {How Am I Doing?: Evaluating Conversational Search Systems Offline}, author = {Aldo Lipani and Ben Carterette and Emine Yilmaz}, url = {https://www.researchgate.net/publication/350640565_How_Am_I_Doing_Evaluating_Conversational_Search_Systems_Offline https://aldolipani.com/wp-content/uploads/2021/04/How_Am_I_Doing-Evaluating_Conversational_Search_Systems_Offline.pdf}, year = {2021}, date = {2021-01-01}, journal = {ACM Transactions on Information Systems (TOIS)}, abstract = {As conversational agents like Siri and Alexa gain in popularity and use, conversation is becoming a more and more important mode of interaction for search. Conversational search shares some features with traditional search, but differs in some important respects: conversational search systems are less likely to return ranked lists of results (a SERP), more likely to involve iterated interactions, and more likely to feature longer, well-formed user queries in the form of natural language questions. Because of these differences, traditional methods for search evaluation (such as the Cranfield paradigm) do not translate easily to conversational search. In this work, we propose a framework for offline evaluation of conversational search, which includes a methodology for creating test collections with relevance judgments, an evaluation measure based on a user interaction model, and an approach to collecting user interaction data to train the model. The framework is based on the idea of “subtopics”, often used to model novelty and diversity in search and recommendation, and the user model is similar to the geometric browsing model introduced by RBP and used in ERR. As far as we know, this is the first work to combine these ideas into a comprehensive framework for offline evaluation of conversational search.}, keywords = {}, pubstate = {published}, tppubtype = {article} } As conversational agents like Siri and Alexa gain in popularity and use, conversation is becoming a more and more important mode of interaction for search. Conversational search shares some features with traditional search, but differs in some important respects: conversational search systems are less likely to return ranked lists of results (a SERP), more likely to involve iterated interactions, and more likely to feature longer, well-formed user queries in the form of natural language questions. Because of these differences, traditional methods for search evaluation (such as the Cranfield paradigm) do not translate easily to conversational search. In this work, we propose a framework for offline evaluation of conversational search, which includes a methodology for creating test collections with relevance judgments, an evaluation measure based on a user interaction model, and an approach to collecting user interaction data to train the model. The framework is based on the idea of “subtopics”, often used to model novelty and diversity in search and recommendation, and the user model is similar to the geometric browsing model introduced by RBP and used in ERR. As far as we know, this is the first work to combine these ideas into a comprehensive framework for offline evaluation of conversational search. |
2020 |
A Multilinear Sampling Algorithm to Estimate Shapley ValuesInproceedingsOkhrati, Ramin; Lipani, Aldo Proc. of ICPR, 2020. @inproceedings{okhrati2020multilinear, title = {A Multilinear Sampling Algorithm to Estimate Shapley Values}, author = {Ramin Okhrati and Aldo Lipani}, url = {https://www.researchgate.net/publication/344825957_A_Multilinear_Sampling_Algorithm_to_Estimate_Shapley_Values}, year = {2020}, date = {2020-10-25}, booktitle = {Proc.~of ICPR}, series = {ICPR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Predicting Engagement in Video LecturesInproceedingsBulathwela, Sahan; Perez-Ortiz, Maria; Lipani, Aldo; Yilmaz, Emine; Shawe-Taylor, John Proc. of EDM, 2020. @inproceedings{bulathwela2020predictingb, title = {Predicting Engagement in Video Lectures}, author = {Sahan Bulathwela and Maria Perez-Ortiz and Aldo Lipani and Emine Yilmaz and John Shawe-Taylor}, url = {https://www.researchgate.net/publication/344832087_Predicting_Engagement_in_Video_Lectures}, year = {2020}, date = {2020-01-01}, booktitle = {Proc.~of EDM}, series = {EDM '20}, abstract = {The explosion of Open Educational Resources (OERs) in the recent years creates the demand for scalable, automatic approaches to process and evaluate OERs, with the end goal of identifying and recommending the most suitable educational materials for learners. We focus on building models to find the characteristics and features involved in context-agnostic engagement (i.e. population-based), a seldom researched topic compared to other contextualised and per-sonalised approaches that focus more on individual learner engagement. Learner engagement, is arguably a more reliable measure than popularity/number of views, is more abundant than user ratings and has also been shown to be a crucial component in achieving learning outcomes. In this work, we explore the idea of building a predictive model for population-based engagement in education. We introduce a novel, large dataset of video lectures for predicting context-agnostic engagement and propose both cross-modal and modality specific feature sets to achieve this task. We further test different strategies for quantifying learner engagement signals. We demonstrate the use of our approach in the case of data scarcity. Additionally, we perform a sensitivity analysis of the best performing model, which shows promising performance and can be easily integrated into an educational recommender system for OERs.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The explosion of Open Educational Resources (OERs) in the recent years creates the demand for scalable, automatic approaches to process and evaluate OERs, with the end goal of identifying and recommending the most suitable educational materials for learners. We focus on building models to find the characteristics and features involved in context-agnostic engagement (i.e. population-based), a seldom researched topic compared to other contextualised and per-sonalised approaches that focus more on individual learner engagement. Learner engagement, is arguably a more reliable measure than popularity/number of views, is more abundant than user ratings and has also been shown to be a crucial component in achieving learning outcomes. In this work, we explore the idea of building a predictive model for population-based engagement in education. We introduce a novel, large dataset of video lectures for predicting context-agnostic engagement and propose both cross-modal and modality specific feature sets to achieve this task. We further test different strategies for quantifying learner engagement signals. We demonstrate the use of our approach in the case of data scarcity. Additionally, we perform a sensitivity analysis of the best performing model, which shows promising performance and can be easily integrated into an educational recommender system for OERs. |
Self-Attentive Hawkes ProcessesInproceedingsZhang, Qiang; Lipani, Aldo; Kirnap, Omer; Yilmaz, Emine Proc. of ICML, 2020. @inproceedings{zhang2020selfattentiveb, title = {Self-Attentive Hawkes Processes}, author = {Qiang Zhang and Aldo Lipani and Omer Kirnap and Emine Yilmaz}, url = {https://www.researchgate.net/publication/344832377_Self-Attentive_Hawkes_Process}, year = {2020}, date = {2020-01-01}, booktitle = {Proc.~of ICML}, series = {ICML '20}, abstract = {Capturing the occurrence dynamics is crucial to predicting which type of events will happen next and when. A common method to do this is through Hawkes processes. To enhance their capacity, recurrent neural networks (RNNs) have been incorporated due to RNNs’ successes in processing sequential data such as languages. Recent evidence suggests that self-attention is more competent than RNNs in dealing with languages. However, we are unaware of the effectiveness of self-attention in the context of Hawkes processes. This study aims to fill the gap by designing a self-attentive Hawkes process (SAHP). SAHP employs self-attention to summarise the influence of history events and compute the probability of the next event. One deficit of the conventional self-attention when applied to event sequences is that its positional encoding only considers the order of a sequence ignoring the time intervals between events. To overcome this deficit, we modify its encoding by translating time intervals into phase shifts of sinusoidal functions. Experiments on goodness-of-fit and prediction tasks show the improved capability of SAHP. Furthermore, SAHP is more interpretable than RNN-based counterparts because the learnt attention weights reveal contributions of one event type to the happening of another type. To the best of our knowledge, this is the first work that studies the effectiveness of self-attention in Hawkes processes.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } Capturing the occurrence dynamics is crucial to predicting which type of events will happen next and when. A common method to do this is through Hawkes processes. To enhance their capacity, recurrent neural networks (RNNs) have been incorporated due to RNNs’ successes in processing sequential data such as languages. Recent evidence suggests that self-attention is more competent than RNNs in dealing with languages. However, we are unaware of the effectiveness of self-attention in the context of Hawkes processes. This study aims to fill the gap by designing a self-attentive Hawkes process (SAHP). SAHP employs self-attention to summarise the influence of history events and compute the probability of the next event. One deficit of the conventional self-attention when applied to event sequences is that its positional encoding only considers the order of a sequence ignoring the time intervals between events. To overcome this deficit, we modify its encoding by translating time intervals into phase shifts of sinusoidal functions. Experiments on goodness-of-fit and prediction tasks show the improved capability of SAHP. Furthermore, SAHP is more interpretable than RNN-based counterparts because the learnt attention weights reveal contributions of one event type to the happening of another type. To the best of our knowledge, this is the first work that studies the effectiveness of self-attention in Hawkes processes. |
Easing Legal News Monitoring with Learning to Rank and BERTInproceedingsSanchez, Luis; He, Jiyin; Manotumruksa, Jarana; Albakour, Dyaa; Martinez, Miguel; Lipani, Aldo Proc. of ECIR, pp. 336–343, Springer International Publishing, Cham, 2020. @inproceedings{10.1007/978-3-030-45442-5_42, title = {Easing Legal News Monitoring with Learning to Rank and BERT}, author = {Luis Sanchez and Jiyin He and Jarana Manotumruksa and Dyaa Albakour and Miguel Martinez and Aldo Lipani}, url = {https://www.researchgate.net/publication/338825714_Easing_Legal_News_Monitoring_with_Learning_to_Rank_and_BERT}, year = {2020}, date = {2020-01-01}, booktitle = {Proc.~of ECIR}, pages = {336--343}, publisher = {Springer International Publishing}, address = {Cham}, series = {ECIR '20}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Learning to Re-Rank with Contextualized StopwordsInproceedingsHofstätter, Sebastian; Lipani, Aldo; Zlabinger, Markus; Hanbury, Allan Proc. of CIKM, 2020. @inproceedings{hofstaetter2020learning, title = {Learning to Re-Rank with Contextualized Stopwords}, author = {Sebastian Hofst\"{a}tter and Aldo Lipani and Markus Zlabinger and Allan Hanbury}, url = {https://www.researchgate.net/publication/344832244_Learning_to_Re-Rank_with_Contextualized_Stopwords}, year = {2020}, date = {2020-01-01}, booktitle = {Proc.~of CIKM}, series = {CIKM '20}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2019 |
FACTS-IR: Fairness, Accountability, Confidentiality, Transparency, and Safety in Information RetrievalJournal ArticleRoegiest, Adam; Lipani, Aldo; Beutel, Alex; Olteanu, Alexandra; Lucic, Ana; Stoica, Ana-Andreea; Das, Anubrata; Biega, Asia; Voorn, Bart; Hauff, Claudia; Spina, Damiano; Lewis, David; Oard, Douglas W; Yilmaz, Emine; Hasibi, Faegheh; Kazai, Gabriella; McDonald, Graham; Haned, Hinda; Ounis, Iadh; van der Linden, Ilse; Garcia-Gathright, Jean; Baan, Joris; Lau, Kamuela N; Balog, Krisztian; de Rijke, Maarten; Sayed, Mahmoud; Panteli, Maria; Sanderson, Mark; Lease, Matthew; Ekstrand, Michael D; Lahoti, Preethi; Kamishima, Toshihiro SIGIR Forum, 53 (2), pp. 20–43, 2019. @article{roegiest-2019-facts-ir, title = {FACTS-IR: Fairness, Accountability, Confidentiality, Transparency, and Safety in Information Retrieval}, author = {Adam Roegiest and Aldo Lipani and Alex Beutel and Alexandra Olteanu and Ana Lucic and Ana-Andreea Stoica and Anubrata Das and Asia Biega and Bart Voorn and Claudia Hauff and Damiano Spina and David Lewis and Douglas W Oard and Emine Yilmaz and Faegheh Hasibi and Gabriella Kazai and Graham McDonald and Hinda Haned and Iadh Ounis and Ilse van der Linden and Jean Garcia-Gathright and Joris Baan and Kamuela N Lau and Krisztian Balog and Maarten de Rijke and Mahmoud Sayed and Maria Panteli and Mark Sanderson and Matthew Lease and Michael D Ekstrand and Preethi Lahoti and Toshihiro Kamishima}, url = {https://www.researchgate.net/publication/337933046_FACTS-IR_Fairness_Accountability_Confidentiality_Transparency_and_Safety_in_Information_Retrieval}, year = {2019}, date = {2019-12-01}, journal = {SIGIR Forum}, volume = {53}, number = {2}, pages = {20--43}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
Fixed-Cost Pooling StrategiesJournal ArticleLipani, Aldo; Losada, David E; Zuccon, Guido; Lupu, Mihai IEEE Transactions on Knowledge and Data Engineering (TKDE), 2019. @article{Lipani2019TKDE, title = {Fixed-Cost Pooling Strategies}, author = {Aldo Lipani and David E. Losada and Guido Zuccon and Mihai Lupu}, url = {https://www.researchgate.net/publication/336369355_Fixed-Cost_Pooling_Strategies}, year = {2019}, date = {2019-10-06}, journal = {IEEE Transactions on Knowledge and Data Engineering (TKDE)}, abstract = {The empirical nature of Information Retrieval (IR) mandates strong experimental practices. A keystone of such experimental practices is the Cranfield evaluation paradigm. Within this paradigm, the collection of relevance judgments has been the subject of intense scientific investigation. This is because, on one hand, consistent, precise, and numerous judgements are keys to reducing evaluation uncertainty and test collection bias; on the other hand, however, relevance judgements are costly to collect. The selection of which documents to judge for relevance, known as pooling method, has therefore a great impact on IR evaluation. In this paper we focus on the bias introduced by the pooling method, known as pool bias, which affects the reusability of test collections, in particular when building test collections with a limited budget. In this paper we formalize and evaluate a set of 22 pooling strategies based on: traditional strategies, voting systems, retrieval fusion methods, evaluation measures, and multi-armed bandit models. To do this we run a large-scale evaluation by considering a set of 9 standard TREC test collections, in which we show that the choice of the pooling strategy has significant effects on the cost needed to obtain an unbiased test collection. We also identify the least biased pooling strategy in terms of pool bias according to three IR evaluation measures: AP, NDCG, and P@10.}, keywords = {}, pubstate = {published}, tppubtype = {article} } The empirical nature of Information Retrieval (IR) mandates strong experimental practices. A keystone of such experimental practices is the Cranfield evaluation paradigm. Within this paradigm, the collection of relevance judgments has been the subject of intense scientific investigation. This is because, on one hand, consistent, precise, and numerous judgements are keys to reducing evaluation uncertainty and test collection bias; on the other hand, however, relevance judgements are costly to collect. The selection of which documents to judge for relevance, known as pooling method, has therefore a great impact on IR evaluation. In this paper we focus on the bias introduced by the pooling method, known as pool bias, which affects the reusability of test collections, in particular when building test collections with a limited budget. In this paper we formalize and evaluate a set of 22 pooling strategies based on: traditional strategies, voting systems, retrieval fusion methods, evaluation measures, and multi-armed bandit models. To do this we run a large-scale evaluation by considering a set of 9 standard TREC test collections, in which we show that the choice of the pooling strategy has significant effects on the cost needed to obtain an unbiased test collection. We also identify the least biased pooling strategy in terms of pool bias according to three IR evaluation measures: AP, NDCG, and P@10. |
From a User Model for Query Sessions to Session Rank Biased Precision (sRBP)InproceedingsLipani, Aldo; Carterette, Ben; Yilmaz, Emine Proc. of ICTIR, 2019. @inproceedings{Lipani2019, title = {From a User Model for Query Sessions to Session Rank Biased Precision (sRBP)}, author = {Aldo Lipani and Ben Carterette and Emine Yilmaz}, url = {https://www.researchgate.net/publication/334725760_From_a_User_Model_for_Query_Sessions_to_Session_Rank_Biased_Precision_sRBP}, doi = {10.1145/3341981.3344216}, year = {2019}, date = {2019-10-02}, booktitle = {Proc.~of ICTIR}, journal = {Proc.~of ICTIR}, abstract = {To satisfy their information needs, users usually carry out searches on retrieval systems by continuously trading off between the examination of search results retrieved by under-specified queries and the refinement of these queries through reformulation. In Information Retrieval (IR), a series of query reformulations is known as a query-session. Research in IR evaluation has traditionally been focused on the development of measures for the ad hoc task, for which a retrieval system aims to retrieve the best documents for a single query. Thus, most IR evaluation measures, with a few exceptions , are not suitable to evaluate retrieval scenarios that call for multiple refinements over a query-session. In this paper, by formally modeling a user's expected behaviour over query-sessions, we derive a session-based evaluation measure, which results in a generalization of the evaluation measure Rank Biased Precision (RBP). We demonstrate the quality of this new session-based evaluation measure, named Session RBP (sRBP), by evaluating its user model against the observed user behaviour over the query-sessions of the 2014 TREC Session track.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } To satisfy their information needs, users usually carry out searches on retrieval systems by continuously trading off between the examination of search results retrieved by under-specified queries and the refinement of these queries through reformulation. In Information Retrieval (IR), a series of query reformulations is known as a query-session. Research in IR evaluation has traditionally been focused on the development of measures for the ad hoc task, for which a retrieval system aims to retrieve the best documents for a single query. Thus, most IR evaluation measures, with a few exceptions , are not suitable to evaluate retrieval scenarios that call for multiple refinements over a query-session. In this paper, by formally modeling a user's expected behaviour over query-sessions, we derive a session-based evaluation measure, which results in a generalization of the evaluation measure Rank Biased Precision (RBP). We demonstrate the quality of this new session-based evaluation measure, named Session RBP (sRBP), by evaluating its user model against the observed user behaviour over the query-sessions of the 2014 TREC Session track. |
Reply-aided Detection of Misinformation via Bayesian Deep LearningInproceedingsZhang, Qiang; Lipani, Aldo; Liang, Shangsong; Yilmaz, Emine Proc. of WWW, 2019. @inproceedings{Zhang2019b, title = {Reply-aided Detection of Misinformation via Bayesian Deep Learning}, author = {Qiang Zhang and Aldo Lipani and Shangsong Liang and Emine Yilmaz}, doi = {10.1145/3308558.3313718}, year = {2019}, date = {2019-05-13}, booktitle = {Proc.~of WWW}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
From Stances’ Imbalance to Their Hierarchical Representation and DetectionInproceedingsZhang, Qiang; Liang, Shangsong; Lipani, Aldo; Ren, Zhaochun; Yilmaz, Emine Proc. of WWW, 2019. @inproceedings{Zhang2019b, title = {From Stances’ Imbalance to Their Hierarchical Representation and Detection}, author = {Qiang Zhang and Shangsong Liang and Aldo Lipani and Zhaochun Ren and Emine Yilmaz}, doi = {10.1145/3308558.3313724}, year = {2019}, date = {2019-05-13}, booktitle = {Proc.~of WWW}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
On Biases in Information Retrieval Models and EvaluationJournal ArticleLipani, Aldo SIGIR Forum, 52 (2), 2019. @article{Lipani:2019:BIR:3308774.3308804, title = {On Biases in Information Retrieval Models and Evaluation}, author = {Aldo Lipani}, doi = {10.1145/3308774.3308804}, year = {2019}, date = {2019-01-01}, journal = {SIGIR Forum}, volume = {52}, number = {2}, publisher = {ACM}, address = {New York, NY, USA}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
On the Learnability of Software Router Performance via CPU MeasurementsInproceedingsShelbourne, Charles; Linguaglossa, Leonardo; Lipani, Aldo; Zhang, Tianzhu; Geyer, Fabien Proc. of CoNEXT, pp. 23–25, Association for Computing Machinery, Orlando, FL, USA, 2019, ISBN: 9781450370066. @inproceedings{10.1145/3360468.3366776, title = {On the Learnability of Software Router Performance via CPU Measurements}, author = {Charles Shelbourne and Leonardo Linguaglossa and Aldo Lipani and Tianzhu Zhang and Fabien Geyer}, url = {https://www.researchgate.net/publication/337580746_On_the_Learnability_of_Software_Router_Performance_via_CPU_Measurements https://doi.org/10.1145/3360468.3366776 }, doi = {10.1145/3360468.3366776}, isbn = {9781450370066}, year = {2019}, date = {2019-01-01}, booktitle = {Proc.~of CoNEXT}, pages = {23\textendash25}, publisher = {Association for Computing Machinery}, address = {Orlando, FL, USA}, series = {CoNEXT ’19}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2018 |
On Biases in Information Retrieval Models and EvaluationPhD ThesisLipani, Aldo TU Wien, 2018. @phdthesis{PhDLipani2018, title = {On Biases in Information Retrieval Models and Evaluation}, author = {Aldo Lipani}, url = {http://aldolipani.com/wp-content/uploads/2018/09/phd_thesis.pdf}, doi = {10.13140/RG.2.2.28623.74400}, year = {2018}, date = {2018-09-21}, school = {TU Wien}, abstract = {The advent of the modern information technology has benefited society as the digitisation of content increased over the last half-century. While the processing capability of our species has remained unchanged, the information available to us has been notably increasing. In this overload of information, Information Retrieval (IR) has been playing a prominent role by developing systems capable of separating relevant information from the rest. This separation, however, is a difficult task rooted in the complexity of understanding of what is and what is not relevant. To manage this complexity, IR has developed a strong empirical nature, which has led to the development of grounded retrieval models, resulting in the development of retrieval systems empirically designed to be biased towards relevant information. However, other biases have been observed, which counteract retrieval performance. In this thesis, the reduction of retrieval systems to filters of information, or sampling processes, has allowed us to systematically investigate these biases. We study biases manifesting in two aspects of IR research: retrieval models and retrieval evaluation. We start by identifying retrieval biases in probabilistic IR models and then develop new document priors to improve retrieval performance. Next, we discuss the accessibility bias of retrieval models, and for Boolean retrieval models we develop a mathematical framework of retrievability. For retrieval evaluation biases, we study how test collections are built using the pooling method and how this method introduces bias. Then, to improve the reliability of the evaluation, we first develop new pooling strategies to mitigate this bias at test collection build time and then, for two IR evaluation measures, Precision and Recall at cut-off (P@n and R@n), we develop new pool bias estimators to mitigate it at evaluation time. Through a large scale experimentation involving up to 15 test collections, four IR evaluation measures and three bias measures, we demonstrate that including document priors based on verboseness improves the performance of probabilistic retrieval models; that the accessibility bias of Boolean retrieval models quickly worsens for conjunctive queries with the increase of the query length (while slightly improving for disjunctive queries); that the test collection bias can be lowered at test collection build time by pooling strategies inspired by a well-known problem in reinforcement learning, the multi-armed bandit problem; and that this bias can also be improved at evaluation time by analysing the runs participating in the pool. For this last point in particular, we show that for P@n, bias reduction is done by quantifying the potential of the new system against the pooled runs, and for R@n, this is done instead by simulating the absence of a pooled run from the set of pooled runs. This thesis contributes to the IR field by giving a better understanding of relevance through the lens of biases in retrieval models and retrieval evaluation. The identification of these biases, and their exploitation or mitigation, leads to the development of better performing IR models and the improvement of the current IR evaluation practice.}, keywords = {}, pubstate = {published}, tppubtype = {phdthesis} } The advent of the modern information technology has benefited society as the digitisation of content increased over the last half-century. While the processing capability of our species has remained unchanged, the information available to us has been notably increasing. In this overload of information, Information Retrieval (IR) has been playing a prominent role by developing systems capable of separating relevant information from the rest. This separation, however, is a difficult task rooted in the complexity of understanding of what is and what is not relevant. To manage this complexity, IR has developed a strong empirical nature, which has led to the development of grounded retrieval models, resulting in the development of retrieval systems empirically designed to be biased towards relevant information. However, other biases have been observed, which counteract retrieval performance. In this thesis, the reduction of retrieval systems to filters of information, or sampling processes, has allowed us to systematically investigate these biases. We study biases manifesting in two aspects of IR research: retrieval models and retrieval evaluation. We start by identifying retrieval biases in probabilistic IR models and then develop new document priors to improve retrieval performance. Next, we discuss the accessibility bias of retrieval models, and for Boolean retrieval models we develop a mathematical framework of retrievability. For retrieval evaluation biases, we study how test collections are built using the pooling method and how this method introduces bias. Then, to improve the reliability of the evaluation, we first develop new pooling strategies to mitigate this bias at test collection build time and then, for two IR evaluation measures, Precision and Recall at cut-off (P@n and R@n), we develop new pool bias estimators to mitigate it at evaluation time. Through a large scale experimentation involving up to 15 test collections, four IR evaluation measures and three bias measures, we demonstrate that including document priors based on verboseness improves the performance of probabilistic retrieval models; that the accessibility bias of Boolean retrieval models quickly worsens for conjunctive queries with the increase of the query length (while slightly improving for disjunctive queries); that the test collection bias can be lowered at test collection build time by pooling strategies inspired by a well-known problem in reinforcement learning, the multi-armed bandit problem; and that this bias can also be improved at evaluation time by analysing the runs participating in the pool. For this last point in particular, we show that for P@n, bias reduction is done by quantifying the potential of the new system against the pooled runs, and for R@n, this is done instead by simulating the absence of a pooled run from the set of pooled runs. This thesis contributes to the IR field by giving a better understanding of relevance through the lens of biases in retrieval models and retrieval evaluation. The identification of these biases, and their exploitation or mitigation, leads to the development of better performing IR models and the improvement of the current IR evaluation practice. |
A Systematic Approach to Normalization in Probabilistic ModelsJournal ArticleLipani, Aldo; Roelleke, Thomas; Lupu, Mihai; Hanbury, Allan Information Retrieval Journal, 2018. @article{Lipani2018, title = {A Systematic Approach to Normalization in Probabilistic Models}, author = {Aldo Lipani and Thomas Roelleke and Mihai Lupu and Allan Hanbury}, doi = {10.1007/s10791-018-9334-1}, year = {2018}, date = {2018-06-30}, journal = {Information Retrieval Journal}, abstract = {Every information retrieval (IR) model embeds in its scoring function a form of term frequency (TF) quantification. The contribution of the term frequency is determined by the properties of the function of the chosen TF quantification, and by its TF normalization. The first defines how independent the occurrences of multiple terms are, while the second acts on mitigating the a priori probability of having a high term frequency in a document (estimation usually based on the document length). New test collections, coming from different domains (e.g. medical, legal), give evidence that not only document length, but in addition, verboseness of documents should be explicitly considered. Therefore we propose and investigate a systematic combination of document verboseness and length. To theoretically justify the combination, we show the duality between document verboseness and length. In addition, we investigate the duality between verboseness and other components of IR models. We test these new TF normalizations on four suitable test collections. We do this on a well defined spectrum of TF quantifications. Finally, based on the theoretical and experimental observations, we show how the two components of this new normalization, document verboseness and length, interact with each other. Our experiments demonstrate that the new models never underperform existing models, while sometimes introducing statistically significantly better results, at no additional computational cost.}, keywords = {}, pubstate = {published}, tppubtype = {article} } Every information retrieval (IR) model embeds in its scoring function a form of term frequency (TF) quantification. The contribution of the term frequency is determined by the properties of the function of the chosen TF quantification, and by its TF normalization. The first defines how independent the occurrences of multiple terms are, while the second acts on mitigating the a priori probability of having a high term frequency in a document (estimation usually based on the document length). New test collections, coming from different domains (e.g. medical, legal), give evidence that not only document length, but in addition, verboseness of documents should be explicitly considered. Therefore we propose and investigate a systematic combination of document verboseness and length. To theoretically justify the combination, we show the duality between document verboseness and length. In addition, we investigate the duality between verboseness and other components of IR models. We test these new TF normalizations on four suitable test collections. We do this on a well defined spectrum of TF quantifications. Finally, based on the theoretical and experimental observations, we show how the two components of this new normalization, document verboseness and length, interact with each other. Our experiments demonstrate that the new models never underperform existing models, while sometimes introducing statistically significantly better results, at no additional computational cost. |
2017 |
Fixed-Cost Pooling Strategies Based on IR Evaluation MeasuresInproceedingsLipani, Aldo; Palotti, Joao; Lupu, Mihai; Piroi, Florina; Zuccon, Guido; Hanbury, Allan Proc. of ECIR, 2017. @inproceedings{Lipani2017, title = {Fixed-Cost Pooling Strategies Based on IR Evaluation Measures}, author = {Aldo Lipani and Joao Palotti and Mihai Lupu and Florina Piroi and Guido Zuccon and Allan Hanbury}, doi = {10.1007/978-3-319-56608-5_28}, year = {2017}, date = {2017-01-01}, booktitle = {Proc.~of ECIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Visual Pool: A Tool to Visualize and Interact with the Pooling MethodInproceedingsLipani, Aldo; Lupu, Mihai; Hanbury, Allan Proc. of SIGIR, 2017. @inproceedings{Lipani:2017:VPT:3077136.3084146, title = {Visual Pool: A Tool to Visualize and Interact with the Pooling Method}, author = {Aldo Lipani and Mihai Lupu and Allan Hanbury}, doi = {10.1145/3077136.3084146}, year = {2017}, date = {2017-01-01}, booktitle = {Proc.~of SIGIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Fixed Budget Pooling Strategies Based on Fusion MethodsInproceedingsLipani, Aldo; Lupu, Mihai; Palotti, Joao; Zuccon, Guido; Hanbury, Allan Proc. of SAC, 2017. @inproceedings{Lipani:2017:FBP:3019612.3019692, title = {Fixed Budget Pooling Strategies Based on Fusion Methods}, author = {Aldo Lipani and Mihai Lupu and Joao Palotti and Guido Zuccon and Allan Hanbury}, doi = {10.1145/3019612.3019692}, year = {2017}, date = {2017-01-01}, booktitle = {Proc.~of SAC}, abstract = {The empirical nature of Information Retrieval (IR) mandates strong experimental practices. The Cranfield/TREC evaluation paradigm represents a keystone of such experimental practices. Within this paradigm, the generation of relevance judgments has been the subject of intense scientific investigation. This is because, on one hand, consistent, precise and numerous judgements are key to reduce evaluation uncertainty and test collection bias; on the other hand, however, relevance judgements are costly to collect. The selection of which documents to judge for relevance (known as pooling) has therefore great impact in IR evaluation. In this paper, we contribute a set of 8 novel pooling strategies based on retrieval fusion methods. We show that the choice of the pooling strategy has significant effects on the cost needed to obtain an unbiased test collection; we also identify the best performing pooling strategy according to three evaluation measure.}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } The empirical nature of Information Retrieval (IR) mandates strong experimental practices. The Cranfield/TREC evaluation paradigm represents a keystone of such experimental practices. Within this paradigm, the generation of relevance judgments has been the subject of intense scientific investigation. This is because, on one hand, consistent, precise and numerous judgements are key to reduce evaluation uncertainty and test collection bias; on the other hand, however, relevance judgements are costly to collect. The selection of which documents to judge for relevance (known as pooling) has therefore great impact in IR evaluation. In this paper, we contribute a set of 8 novel pooling strategies based on retrieval fusion methods. We show that the choice of the pooling strategy has significant effects on the cost needed to obtain an unbiased test collection; we also identify the best performing pooling strategy according to three evaluation measure. |
Spatio-temporal topsoil organic carbon mapping of a semi-arid Mediterranean region: The role of land use, soil texture, topographic indices and the influence of remote sensing data to modellingJournal ArticleSchillaci, Calogero; Acutis, Marco; Lombardo, Luigi; Lipani, Aldo; Fantappiè, Maria; Märker, Michael; Saia, Sergio Science of The Total Environment, 601-602 , 2017. @article{SCHILLACI2017821, title = {Spatio-temporal topsoil organic carbon mapping of a semi-arid Mediterranean region: The role of land use, soil texture, topographic indices and the influence of remote sensing data to modelling}, author = {Calogero Schillaci and Marco Acutis and Luigi Lombardo and Aldo Lipani and Maria Fantappi\`{e} and Michael M\"{a}rker and Sergio Saia}, doi = {10.1016/j.scitotenv.2017.05.239}, year = {2017}, date = {2017-01-01}, journal = {Science of The Total Environment}, volume = {601-602}, keywords = {}, pubstate = {published}, tppubtype = {article} } |
2016 |
Fairness in Information RetrievalInproceedingsLipani, Aldo Proc. of SIGIR, 2016. @inproceedings{Lipani:2016:FIR:2911451.2911473, title = {Fairness in Information Retrieval}, author = {Aldo Lipani}, doi = {10.1145/2911451.2911473}, year = {2016}, date = {2016-01-01}, booktitle = {Proc.~of SIGIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
The Solitude of Relevant Documents in the PoolInproceedingsLipani, Aldo; Lupu, Mihai; Kanoulas, Evangelos; Hanbury, Allan Proc. of CIKM, 2016. @inproceedings{Lipani:2016:SRD:2983323.2983891, title = {The Solitude of Relevant Documents in the Pool}, author = {Aldo Lipani and Mihai Lupu and Evangelos Kanoulas and Allan Hanbury}, doi = {10.1145/2983323.2983891}, year = {2016}, date = {2016-01-01}, booktitle = {Proc.~of CIKM}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
The Curious Incidence of Bias Corrections in the PoolInproceedingsLipani, Aldo; Lupu, Mihai; Hanbury, Allan Proc. of ECIR, 2016. @inproceedings{Lipani2016, title = {The Curious Incidence of Bias Corrections in the Pool}, author = {Aldo Lipani and Mihai Lupu and Allan Hanbury}, doi = {10.1007/978-3-319-30671-1_20}, year = {2016}, date = {2016-01-01}, booktitle = {Proc.~of ECIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
The Impact of Fixed-Cost Pooling Strategies on Test Collection BiasInproceedingsLipani, Aldo; Zuccon, Guido; Lupu, Mihai; Koopman, Bevan; Hanbury, Allan Proc. of ICTIR, 2016. @inproceedings{Lipani:2016:IFP:2970398.2970429, title = {The Impact of Fixed-Cost Pooling Strategies on Test Collection Bias}, author = {Aldo Lipani and Guido Zuccon and Mihai Lupu and Bevan Koopman and Allan Hanbury}, doi = {10.1145/2970398.2970429}, year = {2016}, date = {2016-01-01}, booktitle = {Proc.~of ICTIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2015 |
DASyR(IR) - Document Analysis System for Systematic Reviews (in Information Retrieval)InproceedingsPiroi, Florina; Lipani, Aldo; Lupu, Mihai; Hanbury, Allan Proc. of ICDAR, 2015. @inproceedings{7333830, title = {DASyR(IR) - Document Analysis System for Systematic Reviews (in Information Retrieval)}, author = {Florina Piroi and Aldo Lipani and Mihai Lupu and Allan Hanbury}, doi = {10.1109/ICDAR.2015.7333830}, year = {2015}, date = {2015-08-01}, booktitle = {Proc.~of ICDAR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Verboseness Fission for BM25 Document Length NormalizationInproceedingsLipani, Aldo; Lupu, Mihai; Hanbury, Allan; Aizawa, Akiko Proc. of ICTIR, 2015. @inproceedings{Lipani:2015:VFB:2808194.2809486, title = {Verboseness Fission for BM25 Document Length Normalization}, author = {Aldo Lipani and Mihai Lupu and Allan Hanbury and Akiko Aizawa}, doi = {10.1145/2808194.2809486}, year = {2015}, date = {2015-01-01}, booktitle = {Proc.~of ICTIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
An Initial Analytical Exploration of RetrievabilityInproceedingsLipani, Aldo; Lupu, Mihai; Aizawa, Akiko; Hanbury, Allan Proc. of ICTIR, 2015. @inproceedings{Lipani:2015:IAE:2808194.2809495, title = {An Initial Analytical Exploration of Retrievability}, author = {Aldo Lipani and Mihai Lupu and Akiko Aizawa and Allan Hanbury}, doi = {10.1145/2808194.2809495}, year = {2015}, date = {2015-01-01}, booktitle = {Proc.~of ICTIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Splitting Water: Precision and Anti-Precision to Reduce Pool BiasInproceedingsLipani, Aldo; Lupu, Mihai; Hanbury, Allan Proc. of SIGIR, 2015. @inproceedings{Lipani:2015:SWP:2766462.2767749, title = {Splitting Water: Precision and Anti-Precision to Reduce Pool Bias}, author = {Aldo Lipani and Mihai Lupu and Allan Hanbury}, doi = {10.1145/2766462.2767749}, year = {2015}, date = {2015-01-01}, booktitle = {Proc.~of SIGIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
2014 |
TUW-IMP at the NTCIR-11 Math-2InproceedingsLipani, Aldo; Andersson, Linda; Piroi, Florina; Lupu, Mihai; Hanbury, Allan Proc. of NTCIR, 2014. @inproceedings{Lipani2014TUWIMPAT, title = {TUW-IMP at the NTCIR-11 Math-2}, author = {Aldo Lipani and Linda Andersson and Florina Piroi and Mihai Lupu and Allan Hanbury}, doi = {10.13140/2.1.1127.8404}, year = {2014}, date = {2014-01-01}, booktitle = {Proc.~of NTCIR}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
Extracting Nanopublications from IR PapersInproceedingsLipani, Aldo; Piroi, Florina; Andersson, Linda; Hanbury, Allan Proc. of IRFC, 2014. @inproceedings{Lipani2014b, title = {Extracting Nanopublications from IR Papers}, author = {Aldo Lipani and Florina Piroi and Linda Andersson and Allan Hanbury}, doi = {10.1007/978-3-319-12979-2_5}, year = {2014}, date = {2014-01-01}, booktitle = {Proc.~of IRFC}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |
An Information Retrieval Ontology for Information Retrieval NanopublicationsInproceedingsLipani, Aldo; Piroi, Florina; Andersson, Linda; Hanbury, Allan Proc. of CLEF, 2014. @inproceedings{Lipani2014c, title = {An Information Retrieval Ontology for Information Retrieval Nanopublications}, author = {Aldo Lipani and Florina Piroi and Linda Andersson and Allan Hanbury}, doi = {10.1007/978-3-319-11382-1_5}, year = {2014}, date = {2014-01-01}, booktitle = {Proc.~of CLEF}, keywords = {}, pubstate = {published}, tppubtype = {inproceedings} } |