2024
SummEQuAL: Summarization Evaluation via Question Answering using Large Language Models Miscellaneous
Liu, Junyuan; Shi, Zhengyan; Lipani, Aldo
2024.
@misc{liu2024summequal,
title = {SummEQuAL: Summarization Evaluation via Question Answering using Large Language Models},
author = {Junyuan Liu and Zhengyan Shi and Aldo Lipani},
editor = {Bhavana Dalvi Mishra and Greg Durrett and Peter Jansen and Ben Lipkin and Danilo Neves Ribeiro and Lionel Wong and Xi Ye and Wenting Zhao},
url = {https://aclanthology.org/2024.nlrse-1.5},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-01},
booktitle = {Proceedings of the 2nd Workshop on Natural Language Reasoning and Structured Explanations (@ACL 2024)},
pages = {46\textendash55},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
abstract = {Summarization is hard to evaluate due to its diverse and abstract nature. Although N-gram-based metrics like BLEU and ROUGE are prevalent, they often do not align well with human evaluations. While model-based alternatives such as BERTScore improve, they typically require extensive labelled data. The advent of Large Language Models (LLMs) presents a promising avenue for evaluation. To this end, we introduce SummEQuAL, a novel content-based framework using LLMs for unified, reproducible summarization evaluation. SummEQuAL evaluates summaries by comparing their content with the source document, employing a question-answering approach to gauge both recall and precision. To validate SummEQuAL's effectiveness, we develop a dataset based on MultiWOZ. We conduct experiments on SummEval and our MultiWOZ-based dataset, showing that SummEQuAL largely improves the quality of summarization evaluation. Notably, SummEQuAL demonstrates a 19.7% improvement over QuestEval in terms of sample-level Pearson correlation with human assessments of consistency on the SummEval dataset. Furthermore, it exceeds the performance of the BERTScore baseline by achieving a 17.3% increase in Spearman correlation on our MultiWOZ-based dataset. Our study illuminates the potential of LLMs for a unified evaluation framework, setting a new paradigm for future summarization evaluation.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Transparent and Scrutable Recommendations Using Natural Language User Profiles Proceedings Article
Ramos, Jerome; Rahmani, Hossein A.; Wang, Xi; Fu, Xiao; Lipani, Aldo
In: Ku, Lun-Wei; Martins, Andre; Srikumar, Vivek (Ed.): Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 13971–13984, Association for Computational Linguistics, Bangkok, Thailand, 2024.
@inproceedings{ramos2024transparent,
title = {Transparent and Scrutable Recommendations Using Natural Language User Profiles},
author = {Jerome Ramos and Hossein A. Rahmani and Xi Wang and Xiao Fu and Aldo Lipani},
editor = {Lun-Wei Ku and Andre Martins and Vivek Srikumar},
url = {https://aclanthology.org/2024.acl-long.753},
year = {2024},
date = {2024-08-01},
booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {13971\textendash13984},
publisher = {Association for Computational Linguistics},
address = {Bangkok, Thailand},
abstract = {Recent state-of-the-art recommender systems predominantly rely on either implicit or explicit feedback from users to suggest new items. While effective in recommending novel options, many recommender systems often use uninterpretable embeddings to represent user preferences. This lack of transparency not only limits user understanding of why certain items are suggested but also reduces the user's ability to scrutinize and modify their preferences, thereby affecting their ability to receive a list of preferred recommendations. Given the recent advances in Large Language Models (LLMs), we investigate how a properly crafted prompt can be used to summarize a user's preferences from past reviews and recommend items based only on language-based preferences. In particular, we study how LLMs can be prompted to generate a natural language (NL) user profile that holistically describe a user's preferences. These NL profiles can then be leveraged to fine-tune a LLM using only NL profiles to make transparent and scrutable recommendations. Furthermore, we validate the scrutability of our user profile-based recommender by investigating the impact on recommendation changes after editing NL user profiles. According to our evaluations of the model's rating prediction performance on two benchmarking rating prediction datasets, we observe that this novel approach maintains a performance level on par with established recommender systems in a warm-start setting. With a systematic analysis into the effect of updating user profiles and system prompts, we show the advantage of our approach in easier adjustment of user preferences and a greater autonomy over users' received recommendations.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Report on the Collab-a-Thon at ECIR 2024 Miscellaneous
MacAvaney, Sean; Roegiest, Adam; Lipani, Aldo; Parry, Andrew; Engelmann, Björn Engelmann; Kreutz, Christin Katharina; Meng, Chuan; Frayling, Erlend; Yang, Eugene; Schlatt, Ferdinand; Faggioli, Guglielmo; Scells, Harrisen; Atanassova, Iana; Friese, Jana; Bevendorff, Janek; Sanz-Cruzado, Javier; Trippas, Johanne; Pathak, Kanaad; Dhole, Kaustubh; Azzopardi, Leif; Fröbe, Maik; Bertin, Marc; Prasad, Nishchal; Zerhoudi, Saber; Wang, Shuai; Chatterjee, Shubham; Jaenich, Thomas; Kruschwitz, Udo; Wang, Xi; Long, Zijun
2024, ISSN: 0163-5840.
@misc{macavaney2024report,
title = {Report on the Collab-a-Thon at ECIR 2024},
author = {Sean MacAvaney and Adam Roegiest and Aldo Lipani and Andrew Parry and Bj\"{o}rn Engelmann Engelmann and Christin Katharina Kreutz and Chuan Meng and Erlend Frayling and Eugene Yang and Ferdinand Schlatt and Guglielmo Faggioli and Harrisen Scells and Iana Atanassova and Jana Friese and Janek Bevendorff and Javier Sanz-Cruzado and Johanne Trippas and Kanaad Pathak and Kaustubh Dhole and Leif Azzopardi and Maik Fr\"{o}be and Marc Bertin and Nishchal Prasad and Saber Zerhoudi and Shuai Wang and Shubham Chatterjee and Thomas Jaenich and Udo Kruschwitz and Xi Wang and Zijun Long},
url = {https://doi.org/10.1145/3687273.3687287},
doi = {10.1145/3687273.3687287},
issn = {0163-5840},
year = {2024},
date = {2024-08-01},
urldate = {2024-08-01},
journal = {SIGIR Forum},
volume = {58},
number = {1},
pages = {1\textendash11},
publisher = {Association for Computing Machinery},
address = {New York, NY, USA},
abstract = {We present a report on the Collab-a-thon, a series of sessions at the European Conference on Information Retrieval (ECIR) 2024 designed to help foster new collaborations during a conference. This report presents the motivation and design of the Collab-a-thon, a summary of the discussions covered at each session, and a set of recommendations for conducting similar events in the future. The event is set to run again at ECIR 2025 and planning is underway to pilot the event in a different community at the IEEE International Conference on Distributed Computing Systems (ICDCS) 2025.Date: 25\textendash27 March 2024.Website: https://www.ecir2024.org/collab-a-thon/.},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Do Sentence Transformers Learn Quasi-Geospatial Concepts from General Text? Miscellaneous
Ilyankou, Ilya; Lipani, Aldo; Cavazzi, Stefano; Gao, Xiaowei; Haworth, James
2024.
@misc{ilyankou2024sentence,
title = {Do Sentence Transformers Learn Quasi-Geospatial Concepts from General Text?},
author = {Ilya Ilyankou and Aldo Lipani and Stefano Cavazzi and Xiaowei Gao and James Haworth},
year = {2024},
date = {2024-01-01},
urldate = {2024-01-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Building and Evaluating a WebApp for Effortless Deep Learning Model Deployment Proceedings Article
Wu, Ruikun; Han, Jiaxuan; Ramos, Jerome; Lipani, Aldo
In: Advances in Information Retrieval, pp. 246–250, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-56069-9.
@inproceedings{ruikun2024building,
title = {Building and Evaluating a WebApp for Effortless Deep Learning Model Deployment},
author = {Ruikun Wu and Jiaxuan Han and Jerome Ramos and Aldo Lipani},
isbn = {978-3-031-56069-9},
year = {2024},
date = {2024-01-01},
booktitle = {Advances in Information Retrieval},
pages = {246\textendash250},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {In the field of deep learning, particularly Natural Language Processing (NLP), model deployment is a key process for public testing and analysis. However, developing a deployment pipeline is often difficult and time-consuming. To address this challenge, we developed SUD.DL, a web application to simplify the model deployment process for NLP researchers. Our application provides significant improvements in deployment efficiency, functionality discoverability, and deployment functionality, allowing NLP researchers to quickly deploy and test models on the web.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Do Sentence Transformers Learn Quasi-Geospatial Concepts from General Text? Proceedings Article
Ilyankou, Ilya; Lipani, Aldo; Cavazzi, Stefano; Gao, Xiaowei; Haworth, James
In: Second International Workshop on Geographic Information Extraction from Texts at ECIR 2024, 2024.
@inproceedings{ilyankou2024sentenceb,
title = {Do Sentence Transformers Learn Quasi-Geospatial Concepts from General Text?},
author = {Ilya Ilyankou and Aldo Lipani and Stefano Cavazzi and Xiaowei Gao and James Haworth},
url = {https://arxiv.org/abs/2404.04169},
year = {2024},
date = {2024-01-01},
booktitle = {Second International Workshop on Geographic Information Extraction from Texts at ECIR 2024},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Understanding Users' Confidence in Spoken Queries for Conversational Search Systems Proceedings Article
Yu, Youjing; Shi, Zhengxiang; Lipani, Aldo
In: Iliadis, Lazaros; Maglogiannis, Ilias; Papaleonidas, Antonios; Pimenidis, Elias; Jayne, Chrisina (Ed.): Engineering Applications of Neural Networks, pp. 405–418, Springer Nature Switzerland, Cham, 2024, ISBN: 978-3-031-62495-7.
@inproceedings{yu2024spoken,
title = {Understanding Users' Confidence in Spoken Queries for Conversational Search Systems},
author = {Youjing Yu and Zhengxiang Shi and Aldo Lipani},
editor = {Lazaros Iliadis and Ilias Maglogiannis and Antonios Papaleonidas and Elias Pimenidis and Chrisina Jayne},
isbn = {978-3-031-62495-7},
year = {2024},
date = {2024-01-01},
booktitle = {Engineering Applications of Neural Networks},
pages = {405\textendash418},
publisher = {Springer Nature Switzerland},
address = {Cham},
abstract = {The confidence level in users' speech has long been recognised as an important signal in traditional dialogue systems. In this work, we highlight the importance of user confidence detection in queries in conversational search systems (CSSs). Accurately estimating a user's confidence level in CSSs is important because it enables the CSSs to infer the degree of competency of a user on the queried topic and subsequently tailor its responses appropriately. This is especially important in CSSs since their responses need to be concise and precise. However, few prior works have evaluated user confidence in CSSs due to a lack of available datasets. We present a novel speech-based dataset named UNderstanding Spoken qUeRiEs (UNSURE) (Code and instructions on how to obtain this dataset is available at https://github.com/YoujingYu99/confidence_css), which contains confidence grading annotations of user queries in natural language conversations. Based on this dataset, we propose a multimodal approach to infer users' confidence in spoken queries as a baseline model. Preliminary experimental results demonstrate that our proposed fusion model is capable of achieving near human-level performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
EXtrA-ShaRC: Explainable and Scrutable Reading Comprehension for Conversational Systems Proceedings Article
Ramos, Jerome; Lipani, Aldo
In: Proceedings of the 32nd ACM Conference on User Modeling, Adaptation and Personalization, pp. 47–56, Association for Computing Machinery, Cagliari, Italy, 2024, ISBN: 9798400704338.
@inproceedings{ramos2024extra,
title = {EXtrA-ShaRC: Explainable and Scrutable Reading Comprehension for Conversational Systems},
author = {Jerome Ramos and Aldo Lipani},
url = {https://doi.org/10.1145/3627043.3659546},
doi = {10.1145/3627043.3659546},
isbn = {9798400704338},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 32nd ACM Conference on User Modeling, Adaptation and Personalization},
pages = {47\textendash56},
publisher = {Association for Computing Machinery},
address = {Cagliari, Italy},
series = {UMAP '24},
abstract = {Conversational Machine Reading (CMR) systems answer high-level user questions by interpreting contextual information, asking clarification questions, and generating human-like responses. While effective, such systems often use knowledge about the task and the user in a non-transparent and non-scrutable way. For example, if a user wants to ask questions like “Why are you asking this?” or “Why is this the correct answer?”, the system should be able to highlight and return the relevant information that led to the decision in an interpretable manner. Similarly, if a user scrutinizes and edits their user profile, the final output of the model should change accordingly. To test the transparency and scrutability of conversational machine reading systems, we formalize two new tasks by extending the ShARC dataset to create the EXtrA-ShARC dataset. For transparency, we propose a baseline model that can simultaneously extract explanations and answer the user’s question. We will also publicly release counterfactual user profiles to test scrutability for all CMR models. Our dataset opens up a range of research directions for using natural language explanations and counterfactual profiles in conversational systems, both for evaluating the model and increasing transparency for end users.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Fallen apple detection as an auxiliary task: Boosting robotic apple detection performance through multi-task learning Journal Article
Zhao, Jiayi; Lipani, Aldo; Schillaci, Calogero
In: Smart Agricultural Technology, vol. 8, pp. 100436, 2024, ISSN: 2772-3755.
@article{zhao2024fallen,
title = {Fallen apple detection as an auxiliary task: Boosting robotic apple detection performance through multi-task learning},
author = {Jiayi Zhao and Aldo Lipani and Calogero Schillaci},
url = {https://www.sciencedirect.com/science/article/pii/S2772375524000418},
doi = {https://doi.org/10.1016/j.atech.2024.100436},
issn = {2772-3755},
year = {2024},
date = {2024-01-01},
journal = {Smart Agricultural Technology},
volume = {8},
pages = {100436},
abstract = {In modern agricultural practices, advanced machine learning techniques play a pivotal role in optimizing yields and management. A significant challenge in orchard management is detecting apples on trees, which is essential for effective harvest planning and yield estimation. The YOLO series, especially the YOLOv8 model, stands out as a state-of-the-art solution for object detection, but its potential in orchards remains untapped. Addressing this, our study evaluates YOLOv8’s capability in orchard apple detection, aiming to set a benchmark. By employing image augmentation techniques like exposure, rotation, mosaic, and cutout, we lifted the model's performance to a state-of-the-art level. We further integrated multi-task learning, enhancing tree apple detection by also identifying apples on the ground. This approach resulted in a model with robust accuracy across evaluation metrics. Our results underscore that the YOLOv8 model achieves a leading standard in orchard apple detection. When trained for both tree and fallen apple detection, it outperformed the one when trained exclusively for the former. Recognizing fallen apples not only reduces waste but could also indicate pest activity, influencing strategic orchard decisions and potentially boosting economic returns. Merging cutting-edge tech with agricultural needs, our research showcases the promise of multi-task learning in fruit detection with deep learning.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
An Analysis of Stopping Strategies in Conversational Search Systems Proceedings Article
Fu, Xiao; Perez-Ortiz, Maria; Lipani, Aldo
In: Proceedings of the 2024 ACM SIGIR International Conference on Theory of Information Retrieval, pp. 247–257, Association for Computing Machinery, Washington DC, USA, 2024, ISBN: 9798400706813.
@inproceedings{xiao2024stopping,
title = {An Analysis of Stopping Strategies in Conversational Search Systems},
author = {Xiao Fu and Maria Perez-Ortiz and Aldo Lipani},
url = {https://doi.org/10.1145/3664190.3672524},
doi = {10.1145/3664190.3672524},
isbn = {9798400706813},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 ACM SIGIR International Conference on Theory of Information Retrieval},
pages = {247\textendash257},
publisher = {Association for Computing Machinery},
address = {Washington DC, USA},
series = {ICTIR '24},
abstract = {Stopping strategies are a crucial aspect of conversational systems and user simulations, as they provide insight into when users end their interactions, which is vital for creating realistic simulations. While the Information Retrieval (IR) community has studied this topic extensively, little research has been done on stopping strategies in Conversational Search Systems (CSSs). This is due to conversations' unique sequential and interactive nature, where traditional IR techniques struggle to accurately predict stopping points well, and require new methods to be adapted from traditional IR techniques.In this paper, we adapt Stopping Rules (SRs) from the IR community to the conversational setting, creating new SRs and identifying core features for each. We then analyze these features with several conversational datasets and aim to identify key features that predict stopping points in conversations between users and CSSs.We found that models based on these features performed well in predicting stopping points and that textual statistical features, i.e., numbers of words, nouns, noun phrases and sentences users received from systems or outputted by users, always play a significant role in determining stopping points, with the number of outputted unique nouns playing a particularly important role as an SR. Our results provide a foundation for developing more realistic user models and simulators and for guiding the design of more reliable evaluation measures for CSSs.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Normalised Precision at Fixed Recall for Evaluating TAR Proceedings Article
Kusa, Wojciech; Peikos, Georgios; Staudinger, Moritz; Lipani, Aldo; Hanbury, Allan
In: Proceedings of the 2024 ACM SIGIR International Conference on Theory of Information Retrieval, pp. 43–49, Association for Computing Machinery, Washington DC, USA, 2024, ISBN: 9798400706813.
@inproceedings{wojciech2024normalised,
title = {Normalised Precision at Fixed Recall for Evaluating TAR},
author = {Wojciech Kusa and Georgios Peikos and Moritz Staudinger and Aldo Lipani and Allan Hanbury},
url = {https://doi.org/10.1145/3664190.3672532},
doi = {10.1145/3664190.3672532},
isbn = {9798400706813},
year = {2024},
date = {2024-01-01},
booktitle = {Proceedings of the 2024 ACM SIGIR International Conference on Theory of Information Retrieval},
pages = {43\textendash49},
publisher = {Association for Computing Machinery},
address = {Washington DC, USA},
series = {ICTIR '24},
abstract = {A popular approach to High-Recall Information Retrieval (HRIR) is Technology-Assisted Review (TAR), which uses information retrieval and machine learning techniques to aid the review of large document collections. TAR systems are commonly used in legal eDiscovery and medical systematic literature reviews. Successful TAR systems are able to find the majority of relevant documents using the least number of manual assessments. Previous work typically evaluated TAR models retrospectively, assuming that the system achieves a specific, fixed Recall level first and then measuring model quality (for instance, work saved at r% Recall).This paper presents an analysis of one of such measures: Precision at r% Recall (P@r%). We show that minimum Precision at r% scores depends on the dataset, and therefore, this measure should not be used for evaluation across topics or datasets. We propose its min-max normalised version (nP@r%), and show that it is equal to a product of TNR and Precision scores. Our analysis shows that nP@r% is least correlated with the percentage of relevant documents in the dataset and can be used to focus on additional aspects of the TAR tasks that are not captured with current measures. Finally, we introduce a variation of nP@r%, that is a geometric mean of TNR and Precision, preserving the properties of nP@r% and having a lower coefficient of variation.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2023
``Dr LLM, what do I have?'': The Impact of User Beliefs and Prompt Formulation on Health Diagnoses Proceedings Article
Kusa, Wojciech; Mosca, Edoardo; Lipani, Aldo
In: Khosla, Sopan (Ed.): Proceedings of the Third Workshop on NLP for Medical Conversations, pp. 13–19, Association for Computational Linguistics, Bali, Indonesia, 2023.
@inproceedings{kusa-etal-2023-dr,
title = {``Dr LLM, what do I have?'': The Impact of User Beliefs and Prompt Formulation on Health Diagnoses},
author = {Wojciech Kusa and Edoardo Mosca and Aldo Lipani},
editor = {Sopan Khosla},
url = {https://aclanthology.org/2023.nlpmc-1.2},
year = {2023},
date = {2023-11-01},
booktitle = {Proceedings of the Third Workshop on NLP for Medical Conversations},
pages = {13\textendash19},
publisher = {Association for Computational Linguistics},
address = {Bali, Indonesia},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A Survey on Asking Clarification Questions Datasets in Conversational Systems Proceedings Article
Rahmani, Hossein A.; Wang, Xi; Feng, Yue; Zhang, Qiang; Yilmaz, Emine; Lipani, Aldo
In: Rogers, Anna; Boyd-Graber, Jordan; Okazaki, Naoaki (Ed.): Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), pp. 2698–2716, Association for Computational Linguistics, Toronto, Canada, 2023.
@inproceedings{rahmani-etal-2023-survey,
title = {A Survey on Asking Clarification Questions Datasets in Conversational Systems},
author = {Hossein A. Rahmani and Xi Wang and Yue Feng and Qiang Zhang and Emine Yilmaz and Aldo Lipani},
editor = {Anna Rogers and Jordan Boyd-Graber and Naoaki Okazaki},
url = {https://aclanthology.org/2023.acl-long.152},
doi = {10.18653/v1/2023.acl-long.152},
year = {2023},
date = {2023-07-01},
booktitle = {Proceedings of the 61st Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
pages = {2698\textendash2716},
publisher = {Association for Computational Linguistics},
address = {Toronto, Canada},
abstract = {The ability to understand a user's underlying needs is critical for conversational systems, especially with limited input from users in a conversation. Thus, in such a domain, Asking Clarification Questions (ACQs) to reveal users' true intent from their queries or utterances arise as an essential task. However, it is noticeable that a key limitation of the existing ACQs studies is their incomparability, from inconsistent use of data, distinct experimental setups and evaluation strategies. Therefore, in this paper, to assist the development of ACQs techniques, we comprehensively analyse the current ACQs research status, which offers a detailed comparison of publicly available datasets, and discusses the applied evaluation metrics, joined with benchmarks for multiple ACQs-related tasks. In particular, given a thorough analysis of the ACQs task, we discuss a number of corresponding research directions for the investigation of ACQs as well as the development of conversational systems.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Automatic blight disease detection in potato (Solanum tuberosum L.) and tomato (Solanum lycopersicum, L. 1753) plants using deep learning Journal Article
Anim-Ayeko, Alberta Odamea; Schillaci, Calogero; Lipani, Aldo
In: Smart Agricultural Technology, vol. 4, pp. 100178, 2023, ISSN: 2772-3755.
@article{ANIMAYEKO2023100178,
title = {Automatic blight disease detection in potato (Solanum tuberosum L.) and tomato (Solanum lycopersicum, L. 1753) plants using deep learning},
author = {Alberta Odamea Anim-Ayeko and Calogero Schillaci and Aldo Lipani},
url = {https://www.sciencedirect.com/science/article/pii/S2772375523000084},
doi = {https://doi.org/10.1016/j.atech.2023.100178},
issn = {2772-3755},
year = {2023},
date = {2023-01-01},
journal = {Smart Agricultural Technology},
volume = {4},
pages = {100178},
abstract = {Early and late blight are two diseases which pose a huge risk to both potato (Solanum tuberosum L.) and tomato (Solanum lycopersicum, L. 1753) crops and make farmers run at a loss. The early and automatic detection of these diseases would save time as well as enable farmers to act quickly on crops which have been affected. Machine learning and deep learning technology provide many solutions for the detection of the blight diseases in affected crops, and are common in the literature. However, explanation methods for such solutions are not common, but are necessary, considering some machine learning models are seen as black boxes. This study proposes a ResNet-9 model which detects the blight disease state of potato and tomato leaf images, which farmers can leverage. With the data obtained from the popular “Plant Village Dataset”, there were 3,990 initial training data samples. After augmenting the training set and a rigorous hyperparameter optimization procedure, the model was trained with these hyperparameter values, and examined on the test set, which contained 1,331 images. A test accuracy of 99.25%, 99.67% overall precision, 99.33% overall recall and 99.33% overall F1-score values were achieved. To fully understand the model, explanations for the proposed model were provided through saliency maps, which showed the reasoning behind the predictions of the model. It was observed that the ResNet-9 model considered the shape of the leaf, diseased areas present and general green areas of the leaf for its predictions and this makes us understand the model predictions better and see that the model behaves as expected. Our results could contribute to the testing and deployment of Convolutional Neural Network (CNN) models for classification of proximal sensing images of potato (Solanum tuberosum L.) and tomato (Solanum lycopersicum, L. 1753) plant leaves. Further studies would benefit from this modeling framework and would have the chance to test several other variables to determine the leaf infections in an earlier stage for crop protection.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
An Analysis of Work Saved over Sampling in the Evaluation of Automated Citation Screening in Systematic Literature Reviews Journal Article
Kusa, Wojciech; Lipani, Aldo; Knoth, Petr; Hanbury, Allan
In: Intelligent Systems with Applications, pp. 200193, 2023, ISSN: 2667-3053.
@article{KUSA2023200193,
title = {An Analysis of Work Saved over Sampling in the Evaluation of Automated Citation Screening in Systematic Literature Reviews},
author = {Wojciech Kusa and Aldo Lipani and Petr Knoth and Allan Hanbury},
url = {https://www.sciencedirect.com/science/article/pii/S2667305323000182},
doi = {https://doi.org/10.1016/j.iswa.2023.200193},
issn = {2667-3053},
year = {2023},
date = {2023-01-01},
journal = {Intelligent Systems with Applications},
pages = {200193},
abstract = {Citation screening is an essential and time-consuming step of the systematic literature review process in medicine. Multiple previous studies have proposed various automation techniques to assist manual annotators in this tedious task. The most widely used measure for the evaluation of automated citation screening techniques is Work Saved over Sampling (WSS). In this work, we analyse this measure and examine its drawbacks. We subsequently propose to normalise WSS which enables citation screening performance comparisons across different systematic reviews. We analytically show that normalised WSS is equivalent to the True Negative Rate (TNR). Finally, we provide benchmark scores for fifteen systematic review datasets with TNR@95% recall measure and compare the measure with Precision and AUC.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Quantifying the Bias of Transformer-Based Language Models for African American English in Masked Language Modeling Proceedings Article
Salutari, Flavia; Ramos, Jerome; Rahmani, Hossein A.; Linguaglossa, Leonardo; Lipani, Aldo
In: Proceedings of the 27th Pacific-Asia Conference on Knowledge Discovery and Data Mining, 2023.
@inproceedings{salutari-etal-2023-quantifying,
title = {Quantifying the Bias of Transformer-Based Language Models for African American English in Masked Language Modeling},
author = {Flavia Salutari and Jerome Ramos and Hossein A. Rahmani and Leonardo Linguaglossa and Aldo Lipani},
year = {2023},
date = {2023-01-01},
urldate = {2023-01-01},
booktitle = {Proceedings of the 27th Pacific-Asia Conference on Knowledge Discovery and Data Mining},
series = {PAKDD '23},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
When and What to Ask Through World States and Text Instructions: IGLU NLP Challenge Solution Miscellaneous
Shi, Zhengxiang; Ramos, Jerome; Kim, To Eun; Wang, Xi; Rahmani, Hossein A.; Lipani, Aldo
2023.
@misc{shi2023ask,
title = {When and What to Ask Through World States and Text Instructions: IGLU NLP Challenge Solution},
author = {Zhengxiang Shi and Jerome Ramos and To Eun Kim and Xi Wang and Hossein A. Rahmani and Aldo Lipani},
year = {2023},
date = {2023-01-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Climate and environmental data contribute to the prediction of grain commodity prices using deep learning Journal Article
Wang, Zilin; French, Niamh; James, Thomas; Schillaci, Calogero; Chan, Faith; Feng, Meili; Lipani, Aldo
In: Journal of Sustainable Agriculture and Environment, vol. n/a, no. n/a, 2023.
@article{zilin-et-al-2023-climate,
title = {Climate and environmental data contribute to the prediction of grain commodity prices using deep learning},
author = {Zilin Wang and Niamh French and Thomas James and Calogero Schillaci and Faith Chan and Meili Feng and Aldo Lipani},
url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/sae2.12041},
doi = {https://doi.org/10.1002/sae2.12041},
year = {2023},
date = {2023-01-01},
journal = {Journal of Sustainable Agriculture and Environment},
volume = {n/a},
number = {n/a},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Unlocking the Potential of User Feedback: Leveraging Large Language Model as User Simulators to Enhance Dialogue System Proceedings Article
Hu, Zhiyuan; Feng, Yue; Luu, Anh Tuan; Hooi, Bryan; Lipani, Aldo
In: Proceedings of the 32nd ACM International Conference on Information and Knowledge Management, pp. 3953–3957, Association for Computing Machinery, Birmingham, United Kingdom, 2023, ISBN: 9798400701245.
@inproceedings{10.1145/3583780.3615220,
title = {Unlocking the Potential of User Feedback: Leveraging Large Language Model as User Simulators to Enhance Dialogue System},
author = {Zhiyuan Hu and Yue Feng and Anh Tuan Luu and Bryan Hooi and Aldo Lipani},
url = {https://doi.org/10.1145/3583780.3615220},
doi = {10.1145/3583780.3615220},
isbn = {9798400701245},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 32nd ACM International Conference on Information and Knowledge Management},
pages = {3953\textendash3957},
publisher = {Association for Computing Machinery},
address = {Birmingham, United Kingdom},
series = {CIKM '23},
abstract = {Dialogue systems and large language models (LLMs) have gained considerable attention. However, the direct utilization of LLMs as task-oriented dialogue (TOD) models has been found to underperform compared to smaller task-specific models. Nonetheless, it is crucial to acknowledge the significant potential of LLMs and explore improved approaches for leveraging their impressive abilities. Motivated by the goal of leveraging LLMs, we propose an alternative approach called User-Guided Response Optimization (UGRO) to combine it with a smaller TOD model. This approach uses LLM as an annotation-free user simulator to assess dialogue responses, combining them with smaller fine-tuned end-to-end TOD models. By utilizing the satisfaction feedback generated by LLMs, UGRO further optimizes the supervised fine-tuned TOD model. Specifically, the TOD model takes the dialogue history as input and, with the assistance of the user simulator's feedback, generates high-satisfaction responses that meet the user's requirements. Through empirical experiments on two TOD benchmarks, we validate the effectiveness of our method. The results demonstrate that our approach outperforms previous state-of-the-art (SOTA) results.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
VoMBaT: A Tool for Visualising Evaluation Measure Behaviour in High-Recall Search Tasks Proceedings Article
Kusa, Wojciech; Lipani, Aldo; Knoth, Petr; Hanbury, Allan
In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 3105–3109, Association for Computing Machinery, Taipei, Taiwan, 2023, ISBN: 9781450394086.
@inproceedings{10.1145/3539618.3591802,
title = {VoMBaT: A Tool for Visualising Evaluation Measure Behaviour in High-Recall Search Tasks},
author = {Wojciech Kusa and Aldo Lipani and Petr Knoth and Allan Hanbury},
url = {https://doi.org/10.1145/3539618.3591802},
doi = {10.1145/3539618.3591802},
isbn = {9781450394086},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {3105\textendash3109},
publisher = {Association for Computing Machinery},
address = {Taipei, Taiwan},
series = {SIGIR '23},
abstract = {The objective of High-Recall Information Retrieval (HRIR) is to retrieve as many relevant documents as possible for a given search topic. One approach to HRIR is Technology-Assisted Review (TAR), which uses information retrieval and machine learning techniques to aid the review of large document collections. TAR systems are commonly used in legal eDiscovery and systematic literature reviews. Successful TAR systems are able to find the majority of relevant documents using the least number of assessments. Commonly used retrospective evaluation assumes that the system achieves a specific, fixed recall level first, and then measures the precision or work saved (e.g., precision at r% recall). This approach can cause problems related to understanding the behaviour of evaluation measures in a fixed recall setting. It is also problematic when estimating time and money savings during technology-assisted reviews.This paper presents a new visual analytics tool to explore the dynamics of evaluation measures depending on recall level. We implemented 18 evaluation measures based on the confusion matrix terms, both from general IR tasks and specific to TAR. The tool allows for a comparison of the behaviour of these measures in a fixed recall evaluation setting. It can also simulate savings in time and money and a count of manual vs automatic assessments for different datasets depending on the model quality. The tool is open-source, and the demo is available under the following URL: https://vombat.streamlit.app.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Rethink the Effectiveness of Text Data Augmentation: An Empirical Analysis Proceedings
Shi, Zhengxiang; Lipani, Aldo
2023.
@proceedings{shi2023rethink,
title = {Rethink the Effectiveness of Text Data Augmentation: An Empirical Analysis},
author = {Zhengxiang Shi and Aldo Lipani},
year = {2023},
date = {2023-01-01},
series = {ESANN},
keywords = {},
pubstate = {published},
tppubtype = {proceedings}
}
Self Contrastive Learning for Session-based Recommendation Miscellaneous
Shi, Zhengxiang; Wang, Xi; Lipani, Aldo
2023.
@misc{shi2023self,
title = {Self Contrastive Learning for Session-based Recommendation},
author = {Zhengxiang Shi and Xi Wang and Aldo Lipani},
year = {2023},
date = {2023-01-01},
keywords = {},
pubstate = {published},
tppubtype = {misc}
}
Priming and Actions: An Analysis in Conversational Search Systems Proceedings Article
Fu, Xiao; Lipani, Aldo
In: Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval, pp. 2277–2281, Association for Computing Machinery, Taipei, Taiwan, 2023, ISBN: 9781450394086.
@inproceedings{10.1145/3539618.3592041,
title = {Priming and Actions: An Analysis in Conversational Search Systems},
author = {Xiao Fu and Aldo Lipani},
url = {https://doi.org/10.1145/3539618.3592041},
doi = {10.1145/3539618.3592041},
isbn = {9781450394086},
year = {2023},
date = {2023-01-01},
booktitle = {Proceedings of the 46th International ACM SIGIR Conference on Research and Development in Information Retrieval},
pages = {2277\textendash2281},
publisher = {Association for Computing Machinery},
address = {Taipei, Taiwan},
series = {SIGIR '23},
abstract = {In order to accurately simulate users in conversational systems, it is essential to comprehend the factors that influence their behaviour. This is a critical challenge for the Information Retrieval (IR) field, as conventional methods are not well-suited for the interactive and unique sequential structure of conversational contexts. In this study, we employed the concept of Priming effects from the Psychology literature to identify core stimuli for each abstracted effect. We then examined these stimuli on various datasets to investigate their correlations with users' actions. Finally, we trained Logistic Regression (LR) models based on these stimuli to anticipate users' actions. Our findings offer a basis for creating more realistic user models and simulators, as we identified the subset of stimuli with strong relationships with users' actions. Additionally, we built a model that can predict users' actions.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
2022
Tab this Folder of Documents: Page Stream Segmentation of Business Documents Proceedings Article 🏆 Best Paper
Mungmeeprued, Thisanaporn; Ma, Yuxin; Mehta, Nisarg; Lipani, Aldo
In: Proceedings of the ACM Symposium on Document Engineering, ACM, San Jose, CA, USA, 2022.
@inproceedings{mungmeeprued-etal-2022-tab-this-best,
title = {Tab this Folder of Documents: Page Stream Segmentation of Business Documents},
author = {Thisanaporn Mungmeeprued and Yuxin Ma and Nisarg Mehta and Aldo Lipani},
url = {https://www.researchgate.net/publication/363113372_Tab_this_Folder_of_Documents_Page_Stream_Segmentation_of_Business_Documents},
year = {2022},
date = {2022-09-20},
urldate = {2022-09-20},
booktitle = {Proceedings of the ACM Symposium on Document Engineering},
publisher = {ACM},
address = {San Jose, CA, USA},
series = {DocEng '22},
abstract = {In the midst of digital transformation, automatically understanding the structure and composition of scanned documents is important in order to allow correct indexing, archiving, and processing. In many organizations, different types of documents are usually scanned together in folders, so it is essential to automate the task of segmenting the folders into documents which then proceed to further analysis tailored to specific document types. This task is known as Page Stream Segmentation (PSS). In this paper, we propose a deep learning solution to solve the task of determining whether or not a page is a breaking-point given a sequence of scanned pages (a folder) as input. We also provide a dataset called TABME (TAB this folder of docuMEnts) generated specifically for this task. Our proposed architecture combines LayoutLM and ResNet to exploit both textual and visual features of the document pages and achieves an F1 score of 0.953. The dataset and code used to run the experiments in this paper are available at the following web link: https://github.com/aldolipani/TABME.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Evaluating the Cranfield Paradigm for Conversational Search Systems Proceedings Article 🏆 Best Paper
Fu, Xiao; Yilmaz, Emine; Lipani, Aldo
In: Proceedings of the 12th International Conference on The Theory of Information Retrieval, ACM, Madrid, Spain, 2022.
@inproceedings{fu-etal-2022-evaluating-best,
title = {Evaluating the Cranfield Paradigm for Conversational Search Systems},
author = {Xiao Fu and Emine Yilmaz and Aldo Lipani},
doi = {10.1145/3539813.3545126},
year = {2022},
date = {2022-07-11},
urldate = {2022-01-01},
booktitle = {Proceedings of the 12th International Conference on The Theory of Information Retrieval},
publisher = {ACM},
address = {Madrid, Spain},
series = {ICTIR '22},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A Multi-Task Based Neural Model to Simulate Users in Goal-Oriented Dialogue Systems Proceedings Article
Kim, To Eun; Lipani, Aldo
In: Proc. of SIGIR, 2022.
@inproceedings{kim-lipani-2022-multi,
title = {A Multi-Task Based Neural Model to Simulate Users in Goal-Oriented Dialogue Systems},
author = {To Eun Kim and Aldo Lipani},
url = {https://www.researchgate.net/publication/360276605_A_Multi-Task_Based_Neural_Model_to_Simulate_Users_in_Goal-Oriented_Dialogue_Systems},
year = {2022},
date = {2022-06-11},
urldate = {2022-06-11},
booktitle = {Proc.~of SIGIR},
series = {SIGIR '22},
abstract = {A human-like user simulator that anticipates users' satisfaction scores, actions, and utterances can help goal-oriented dialogue systems in evaluating the conversation and refining their dialogue strategies. However, little work has experimented with user simulators which can generate users' utterances. In this paper, we propose a deep learning-based user simulator that predicts users' satisfaction scores and actions while also jointly generating users' utterances in a multi-task manner. In particular, we show that 1) the proposed deep text-to-text multi-task neural model achieves state-of-the-art performance in the users' satisfaction scores and actions prediction tasks, and 2) in an ablation analysis, user satisfaction score prediction, action prediction, and utterance generation tasks can boost the performance with each other via positive transfers across the tasks. The source code and model checkpoints used for the experiments run in this paper are available at the following weblink: https://github.com/kimdanny/user-simulation-t5.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
StepGame: A New Benchmark for Robust Multi-Hop Spatial Reasoning in Texts Proceedings Article
Shi, Zhengxiang; Zhang, Qiang; Lipani, Aldo
In: Proceedings of the Association for the Advancement of Artificial Intelligence, 2022.
@inproceedings{Shi2022,
title = {StepGame: A New Benchmark for Robust Multi-Hop Spatial Reasoning in Texts},
author = {Zhengxiang Shi and Qiang Zhang and Aldo Lipani},
url = {https://www.researchgate.net/publication/357159030_StepGame_A_New_Benchmark_for_Robust_Multi-Hop_Spatial_Reasoning_in_Texts},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the Association for the Advancement of Artificial Intelligence},
series = {AAAI '22},
abstract = {Inferring spatial relations in natural language is a crucial ability an intelligent system should possess. The bAbI dataset tries to capture tasks relevant to this domain (tasks 17 and 19). However, these tasks have several limitations. Most importantly, they are limited to fixed expressions, they are limited in the number of reasoning steps required to solve them, and they fail to test the robustness of models to input that contains irrelevant or redundant information. In this paper, we present a new Question-Answering dataset called StepGame for robust multi-hop spatial reasoning in texts. Our experiments demonstrate that state-of-the-art models on the bAbI dataset struggle on the StepGame dataset. Moreover, we propose a Tensor-Product based Memory-Augmented Neural Network (TP-MANN) specialized for spatial reasoning tasks. Experimental results on both datasets show that our model outperforms all the baselines with superior generalization and robustness performance.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Forecasting Solar Home System Customers' Electricity Usage with a 3D Convolutional Neural Network to Improve Energy Access Journal Article
Kizilcec, Vivien; Spataru, Catalina; Lipani, Aldo; Parikh, Priti
In: Energies, vol. 15, no. 3, 2022, ISSN: 1996-1073.
@article{en15030857,
title = {Forecasting Solar Home System Customers' Electricity Usage with a 3D Convolutional Neural Network to Improve Energy Access},
author = {Vivien Kizilcec and Catalina Spataru and Aldo Lipani and Priti Parikh},
url = {https://www.mdpi.com/1996-1073/15/3/857},
doi = {10.3390/en15030857},
issn = {1996-1073},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Energies},
volume = {15},
number = {3},
abstract = {Off-grid technologies, such as solar home systems (SHS), offer the opportunity to alleviate global energy poverty, providing a cost-effective alternative to an electricity grid connection. However, there is a paucity of high-quality SHS electricity usage data and thus a limited understanding of consumers’ past and future usage patterns. This study addresses this gap by providing a rare large-scale analysis of real-time energy consumption data for SHS customers (n = 63,299) in Rwanda. Our results show that 70% of SHS users’ electricity usage decreased a year after their SHS was installed. This paper is novel in its application of a three-dimensional convolutional neural network (CNN) architecture for electricity load forecasting using time series data. It also marks the first time a CNN was used to predict SHS customers’ electricity consumption. The model forecasts individual households’ usage 24 h and seven days ahead, as well as an average week across the next three months. The last scenario derived the best performance with a mean squared error of 0.369. SHS companies could use these predictions to offer a tailored service to customers, including providing feedback information on their likely future usage and expenditure. The CNN could also aid load balancing for SHS based microgrids.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Semantic segmentation of cracks: Data challenges and architecture Journal Article
Panella, Fabio; Lipani, Aldo; Boehm, Jan
In: Automation in Construction, vol. 135, pp. 104110, 2022, ISSN: 0926-5805.
@article{PANELLA2022104110,
title = {Semantic segmentation of cracks: Data challenges and architecture},
author = {Fabio Panella and Aldo Lipani and Jan Boehm},
url = {https://www.sciencedirect.com/science/article/pii/S0926580521005616},
doi = {https://doi.org/10.1016/j.autcon.2021.104110},
issn = {0926-5805},
year = {2022},
date = {2022-01-01},
journal = {Automation in Construction},
volume = {135},
pages = {104110},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Borough-level COVID-19 forecasting in London using deep learning techniques and a novel MSE-Moran’s I loss function Journal Article
Olsen, Frederik; Schillaci, Calogero; Ibrahim, Mohamed; Lipani, Aldo
In: Results in Physics, vol. 35, pp. 105374, 2022, ISSN: 2211-3797.
@article{OLSEN2022105374,
title = {Borough-level COVID-19 forecasting in London using deep learning techniques and a novel MSE-Moran’s I loss function},
author = {Frederik Olsen and Calogero Schillaci and Mohamed Ibrahim and Aldo Lipani},
url = {https://www.sciencedirect.com/science/article/pii/S2211379722001450},
doi = {https://doi.org/10.1016/j.rinp.2022.105374},
issn = {2211-3797},
year = {2022},
date = {2022-01-01},
journal = {Results in Physics},
volume = {35},
pages = {105374},
abstract = {Following its identification in late 2019, COVID-19 has spread around the globe, and been declared a pandemic. With this in mind, modelling the spread of COVID-19 remains important for responding effectively. To date research has focused primarily on modelling the spread of COVID-19 on national and regional scales with just a few studies doing so on a city and sub-city scale. However, no attempts have yet been made to design and optimize a model explicitly for accurately forecasting the spread of COVID-19 at sub-city scale. This research aimed to address this research gap by developing an experimental LSTM-ANN deep learning model. The model is largely autoregressive in nature as it considers temporally lagged borough-level COVID-19 cases data from the last 9 days, but also considers temporally lagged (i) borough-level NO2 concentration data, (ii) government stringency data, and (iii) climatic data from the last 9 days, as well as non-temporally variable borough-level urban characteristics data when modelling and forecasting the spread of the disease. The model was also encouraged to learn the spatial relationships between boroughs with regards to the spread of COVID-19 by a novel MSE-Moran’s I loss function. Overall, the model’s performance appears promising and so the model represents a useful tool for assisting the decision making and interventions of governing bodies within cities. A sensitivity analysis also indicated that of the non COVID-19 variables, the government stringency is particularly important in the modelling process, with this being closely followed by the climatic variables, the NO2 concentration data, and finally the urban characteristics data. Additionally, the introduction of the novel MSE-Moran’s I loss function appeared to improve the model’s forecasting performance, and so this research has implications at the intersection of deep learning and disease modelling. It may also have implications within spatio-temporal forecasting more generally because such a feature may have the potential to improve forecasting in other spatio-temporal applications},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
On The Prediction of Landslide Occurrences and Sizesvia Hierarchical Neural Networks Journal Article
Aguilera, Quinton; Lombardo, Luigi; Tanyas, Hakan; Lipani, Aldo
In: Stochastic Environmental Research and Risk Assessment, 2022, ISSN: 2693-5015.
@article{Aguilera2022,
title = {On The Prediction of Landslide Occurrences and Sizesvia Hierarchical Neural Networks},
author = {Quinton Aguilera and Luigi Lombardo and Hakan Tanyas and Aldo Lipani},
url = {https://doi.org/10.21203/rs.3.rs-1260650/v1},
doi = {10.21203/rs.3.rs-1260650/v1},
issn = {2693-5015},
year = {2022},
date = {2022-01-01},
journal = {Stochastic Environmental Research and Risk Assessment},
abstract = {For more than three decades, the scientific community that studies landslides through data-driven models has focused on estimating where landslides occur across a given landscape. This concept is widely known as landslide susceptibility. And, it has seen a vast improvement from old bivariate statistical techniques to modern deep learning routines. Despite all these advancements, no spatially-explicit data-driven model is currently capable of also predicting how large landslides may be once they trigger in a specific study area. In this work, we exploit a model architecture that has already found a number of applications in landslide susceptibility. Specifically, we opt for the use of Neural Network (NN). But, instead of focusing exclusively on where landslides may occur, we extend this paradigm to also spatially predict classes of landslide sizes. As a result, we keep the traditional binary classification paradigm but we make use of it to complement the susceptibility estimates with a crucial information for landslide hazard assessment. We will refer to this model as Hierarchical Neural Network (HNN) throughout the manuscript. To test this analytical protocol, we use the Nepalese area where the Gorkha earthquake induced tens of thousands of landslides in 2014. The results we obtain are quite promising. The component of our HNN that estimates the susceptibility outperforms a binomial Generalized Linear Model (GLM) baseline we used as benchmark. We did this for a GLM represents the most common classifier in the landslide literature. Most importantly, our HNN also suitably performed across the entire procedure. As a result, the landslide-area-class prediction returned not just a single susceptibility map, as per tradition. But, it also produced several informative maps on the expected landslide size classes. Our vision is for administrations to consult these suite of model outputs and maps to better assess the risk to local communities and infrastructure. And, to promote the diffusion of our HNN, we are sharing the data and codes in the supplementary material in the hope that we would stimulate others to replicate similar analyses.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Dynamic Schema Graph Fusion Network for Multi-Domain Dialogue State Tracking Proceedings Article
Feng, Yue; Lipani, Aldo; Ye, Fanghua; Zhang, Qiang; Yilmaz, Emine
In: Association for Computational Linguistics: ACL 2022, Association for Computational Linguistics, 2022.
@inproceedings{feng-etal-2022-dynamic,
title = {Dynamic Schema Graph Fusion Network for Multi-Domain Dialogue State Tracking},
author = {Yue Feng and Aldo Lipani and Fanghua Ye and Qiang Zhang and Emine Yilmaz},
url = {https://aclanthology.org/2022.acl-long.10.pdf},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Association for Computational Linguistics: ACL 2022},
publisher = {Association for Computational Linguistics},
abstract = {Dialogue State Tracking (DST) aims to keep track of users' intentions during the course of a conversation. In DST, modelling the relations among domains and slots is still an under-studied problem. Existing approaches that have considered such relations generally fall short in: (1) fusing prior slot-domain membership relations and dialogue-aware dynamic slot relations explicitly, and (2) generalizing to unseen domains. To address these issues, we propose a novel textbfDynamic textbfSchema textbfGraph textbfFusion textbfNetwork (textbfDSGFNet), which generates a dynamic schema graph to explicitly fuse the prior slot-domain membership relations and dialogue-aware dynamic slot relations. It also uses the schemata to facilitate knowledge transfer to new domains. DSGFNet consists of a dialogue utterance encoder, a schema graph encoder, a dialogue-aware schema graph evolving network, and a schema graph enhanced dialogue state decoder. Empirical results on benchmark datasets (i.e., SGD, MultiWOZ2.1, and MultiWOZ2.2), show that DSGFNet outperforms existing methods.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Learning to Execute Actions or Ask Clarification Questions Proceedings Article
Shi, Zhengxiang; Feng, Yue; Lipani, Aldo
In: Findings of NAACL, 2022.
@inproceedings{shi-etal-2022-learning,
title = {Learning to Execute Actions or Ask Clarification Questions},
author = {Zhengxiang Shi and Yue Feng and Aldo Lipani},
url = {https://www.researchgate.net/publication/360050130_Learning_to_Execute_Actions_or_Ask_Clarification_Questions},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
booktitle = {Findings of NAACL},
series = {NAACL '22},
abstract = {Collaborative tasks are ubiquitous activities where a form of communication is required in order to reach a joint goal. Collaborative building is one of such tasks. We wish to develop an intelligent builder agent in a simulated building environment (Minecraft) that can build whatever users wish to build by just talking to the agent. In order to achieve this goal, such agents need to be able to take the initiative by asking clarification questions when further information is needed. Existing works on Minecraft Corpus Dataset only learn to execute instructions neglecting the importance of asking for clarifications. In this paper, we extend the Minecraft Corpus Dataset by annotating all builder utterances into eight types, including clarification questions, and propose a new builder agent model capable of determining when to ask or execute instructions. Experimental results show that our model achieves state-of-the-art performance on the collabora-tive building task with a substantial improvement. We also define two new tasks, the learning to ask task and the joint learning task. The latter consists of solving both collaborating building and learning to ask tasks jointly.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Attention-based Ingredient Phrase Parser Proceedings Article
Shi, Zhengxiang; Ni, Pin; Kim, To Eun; Wang, Mehui; Lipani, Aldo
In: 2022.
@inproceedings{Shi2022:Ingredient,
title = {Attention-based Ingredient Phrase Parser},
author = {Zhengxiang Shi and Pin Ni and To Eun Kim and Mehui Wang and Aldo Lipani},
year = {2022},
date = {2022-01-01},
series = {ESANN},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Snow detection in alpine regions with Convolutional Neural Networks: discriminating snow from cold clouds and water body Journal Article
Lu, Yichen; James, Thomas; Schillaci, Calogero; Lipani, Aldo
In: GIScience & Remote Sensing, vol. 59, no. 1, pp. 1321-1343, 2022.
@article{Lu2022,
title = {Snow detection in alpine regions with Convolutional Neural Networks: discriminating snow from cold clouds and water body},
author = {Yichen Lu and Thomas James and Calogero Schillaci and Aldo Lipani},
url = {https://www.tandfonline.com/doi/abs/10.1080/15481603.2022.2112391},
doi = {10.1080/15481603.2022.2112391},
year = {2022},
date = {2022-01-01},
journal = {GIScience \& Remote Sensing},
volume = {59},
number = {1},
pages = {1321-1343},
publisher = {Taylor \& Francis},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Eigenvector-based Graph Neural Network Embeddings and Trust Rating Prediction in Bitcoin Networks Proceedings Article
Ni, Pin; Yuan, Qiao; Khraishi, Raad; Okhrati, Ramin; Lipani, Aldo; Medda, Francesca
In: Proceedings of the 3rd ACM International Conference on AI in Finance, ACM, 2022.
@inproceedings{pin-eta-al-2022-eigenvector-based,
title = {Eigenvector-based Graph Neural Network Embeddings and Trust Rating Prediction in Bitcoin Networks},
author = {Pin Ni and Qiao Yuan and Raad Khraishi and Ramin Okhrati and Aldo Lipani and Francesca Medda},
year = {2022},
date = {2022-01-01},
booktitle = {Proceedings of the 3rd ACM International Conference on AI in Finance},
publisher = {ACM},
series = {ICAIF '22},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Deep Learning Based Burnt Area Mapping Using Sentinel 1 for the Santa Cruz Mountains Lightning Complex (CZU) and Creek Fires 2020 Journal Article
Luft, Harrison; Schillaci, Calogero; Ceccherini, Guido; Vieira, Diana; Lipani, Aldo
In: Fire, vol. 5, no. 5, 2022, ISSN: 2571-6255.
@article{luft-etal-2022-deep-learning-based,
title = {Deep Learning Based Burnt Area Mapping Using Sentinel 1 for the Santa Cruz Mountains Lightning Complex (CZU) and Creek Fires 2020},
author = {Harrison Luft and Calogero Schillaci and Guido Ceccherini and Diana Vieira and Aldo Lipani},
url = {https://www.mdpi.com/2571-6255/5/5/163},
doi = {10.3390/fire5050163},
issn = {2571-6255},
year = {2022},
date = {2022-01-01},
urldate = {2022-01-01},
journal = {Fire},
volume = {5},
number = {5},
abstract = {The study presented here builds on previous synthetic aperture radar (SAR) burnt area estimation models and presents the first U-Net (a convolutional network architecture for fast and precise segmentation of images) combined with ResNet50 (Residual Networks used as a backbone for many computer vision tasks) encoder architecture used with SAR, Digital Elevation Model, and land cover data for burnt area mapping in near-real time. The Santa Cruz Mountains Lightning Complex (CZU) was one of the most destructive fires in state history. The results showed a maximum burnt area segmentation F1-Score of 0.671 in the CZU, which outperforms current models estimating burnt area with SAR data for the specific event studied models in the literature, with an F1-Score of 0.667. The framework presented here has the potential to be applied on a near real-time basis, which could allow land monitoring as the frequency of data capture improves.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Deep Learning Based Burnt Area Mapping Using Sentinel 1 for the Santa Cruz Mountains Lightning Complex (CZU) and Creek Fires 2020 Journal Article
Luft, Harrison; Schillaci, Calogero; Ceccherini, Guido; Vieira, Diana; Lipani, Aldo
In: Fire, vol. 5, no. 5, 2022, ISSN: 2571-6255.
@article{luft-etal-2022-deep-learning-based,
title = {Deep Learning Based Burnt Area Mapping Using Sentinel 1 for the Santa Cruz Mountains Lightning Complex (CZU) and Creek Fires 2020},
author = {Harrison Luft and Calogero Schillaci and Guido Ceccherini and Diana Vieira and Aldo Lipani},
url = {https://www.mdpi.com/2571-6255/5/5/163},
doi = {10.3390/fire5050163},
issn = {2571-6255},
year = {2022},
date = {2022-01-01},
journal = {Fire},
volume = {5},
number = {5},
abstract = {The study presented here builds on previous synthetic aperture radar (SAR) burnt area estimation models and presents the first U-Net (a convolutional network architecture for fast and precise segmentation of images) combined with ResNet50 (Residual Networks used as a backbone for many computer vision tasks) encoder architecture used with SAR, Digital Elevation Model, and land cover data for burnt area mapping in near-real time. The Santa Cruz Mountains Lightning Complex (CZU) was one of the most destructive fires in state history. The results showed a maximum burnt area segmentation F1-Score of 0.671 in the CZU, which outperforms current models estimating burnt area with SAR data for the specific event studied models in the literature, with an F1-Score of 0.667. The framework presented here has the potential to be applied on a near real-time basis, which could allow land monitoring as the frequency of data capture improves.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
2021
Schillaci, Calogero; Saia, Sergio; Lipani, Aldo; Perego, Alessia; Zaccone, Claudio; Acutis, Marco
In: Carbon Balance and Management 2021 16:1, vol. 16, no. 1, pp. 1–15, 2021, ISSN: 1750-0680.
@article{Schillaci2021,
title = {Validating the regional estimates of changes in soil organic carbon by using the data from paired-sites: the case study of Mediterranean arable lands},
author = {Calogero Schillaci and Sergio Saia and Aldo Lipani and Alessia Perego and Claudio Zaccone and Marco Acutis},
url = {https://cbmjournal.biomedcentral.com/articles/10.1186/s13021-021-00182-7},
doi = {10.1186/S13021-021-00182-7},
issn = {1750-0680},
year = {2021},
date = {2021-06-01},
journal = {Carbon Balance and Management 2021 16:1},
volume = {16},
number = {1},
pages = {1--15},
publisher = {BioMed Central},
abstract = {Legacy data are unique occasions for estimating soil organic carbon (SOC) concentration changes and spatial variability, but their use showed limitations due to the sampling schemes adopted and improvements may be needed in the analysis methodologies. When SOC changes is estimated with legacy data, the use of soil samples collected in different plots (i.e., non-paired data) may lead to biased results. In the present work},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Mitigating the Position Bias of Transformer Models in Passage Re-Ranking Proceedings Article
Hofstätter, Sebastian; Lipani, Aldo; Althammer, Sophia; Zlabinger, Markus; Hanbury, Allan
In: 2021.
@inproceedings{hofstaetter21,
title = {Mitigating the Position Bias of Transformer Models in Passage Re-Ranking},
author = {Sebastian Hofst\"{a}tter and Aldo Lipani and Sophia Althammer and Markus Zlabinger and Allan Hanbury},
url = {https://www.researchgate.net/publication/348589683_Mitigating_the_Position_Bias_of_Transformer_Models_in_Passage_Re-Ranking},
year = {2021},
date = {2021-03-28},
series = {ECIR},
abstract = {Supervised machine learning models and their evaluation strongly depends on the quality of the underlying dataset. When we search for a relevant piece of information it may appear anywhere in a given passage. However, we observe a bias in the position of the correct answer in the text in two popular Question Answering datasets used for passage re-ranking. The excessive favoring of earlier positions inside passages is an unwanted artefact. This leads to three common Transformer-based re-ranking models to ignore relevant parts in unseen passages. More concerningly, as the evaluation set is taken from the same biased distribution, the models overfitting to that bias overestimate their true effectiveness. In this work we analyze position bias on datasets, the contextualized representations, and their effect on retrieval results. We propose a debiasing method for retrieval datasets. Our results show that a model trained on a position-biased dataset exhibits a significant decrease in re-ranking effectiveness when evaluated on a debiased dataset. We demonstrate that by mitigating the position bias, Transformer-based re-ranking models are equally effective on a biased and debiased dataset, as well as more effective in a transfer-learning setting between two differently biased datasets.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Predicting non-residential building fire risk using geospatial information and convolutional neural networks Journal Article
Anderson-Bell, Jake; Schillaci, Calogero; Lipani, Aldo
In: Remote Sensing Applications: Society and Environment, vol. 21, pp. 100470, 2021, ISBN: 2352-9385.
@article{AndersonBell2021,
title = {Predicting non-residential building fire risk using geospatial information and convolutional neural networks},
author = {Jake Anderson-Bell and Calogero Schillaci and Aldo Lipani},
url = {https://www.researchgate.net/publication/348685058_Predicting_Non-Residential_Building_Fire_Risk_using_geospatial_information_and_Convolutional_Neural_Networks?_sg=0qL6N0cbQ_HdrMLbhX3okclPN2GEgUaFvVGkR7KcWmzNXnHYLEGXGZy_MshQG22n4HcmLISR8yJciXOD4nvqHm6JuZzxot56EKlMkrp1.2swBtS-hEil0L0vmFPQpFL8YYSXszHMWp9gGihU0nLKrJpCVp-Bsc-hMwRS0koLkkPn-iFbc_J58y9Jgbkj73A},
doi = {10.1016/j.rsase.2021.100470},
isbn = {2352-9385},
year = {2021},
date = {2021-01-01},
journal = {Remote Sensing Applications: Society and Environment},
volume = {21},
pages = {100470},
abstract = {Building fire risk prediction is crucial for allocation of building inspection resources and prevention of fire incidents. Existing research of building fire prediction makes use of data relating to local demography, crime, building use and physical building characteristics, yet few studies have analysed the relative importance of predictive features. Furthermore, image features relating to buildings, such as aerial imagery and digital surface models (DSM), have not been explored. This research presents a multi-modal hybrid neural network for the prediction of fire risk at the building level using the London Fire Brigade dataset. The inclusion of traditional and novel image features is assessed using Shapley values and an ablation study. The ablation study found that while building use is the most effective contributor of classification performance, demographic features, apart from social class, are detrimental. Moreover, while the DSM did not lead to any notable improvement in classification performance, the inclusion of the aerial imagery feature lead to a 4% increase in median validation ROC AUC. The final model presented achieved an ROC AUC of 0.8195 on the test set.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Evaluation metrics for measuring bias in search engine results Journal Article
Gezici, Gizem; Lipani, Aldo; Saygin, Yucel; Yilmaz, Emine
In: Information Retrieval Journal, pp. 1–29, 2021, ISSN: 1386-4564.
@article{Gezici2021,
title = {Evaluation metrics for measuring bias in search engine results},
author = {Gizem Gezici and Aldo Lipani and Yucel Saygin and Emine Yilmaz},
url = {http://link.springer.com/10.1007/s10791-020-09386-w},
doi = {10.1007/s10791-020-09386-w},
issn = {1386-4564},
year = {2021},
date = {2021-01-01},
journal = {Information Retrieval Journal},
pages = {1--29},
publisher = {Springer},
abstract = {Search engines decide what we see for a given search query. Since many people are exposed to information through search engines, it is fair to expect that search engines are neutral. However, search engine results do not necessarily cover all the viewpoints of a search query topic, and they can be biased towards a specific view since search engine results are returned based on relevance, which is calculated using many features and sophisticated algorithms where search neutrality is not necessarily the focal point. Therefore, it is important to evaluate the search engine results with respect to bias. In this work we propose novel web search bias evaluation measures which take into account the rank and relevance. We also propose a framework to evaluate web search bias using the proposed measures and test our framework on two popular search engines based on 57 controversial query topics such as abortion, medical marijuana, and gay marriage. We measure the stance bias (in support or against), as well as the ideological bias (conservative or liberal). We observe that the stance does not necessarily correlate with the ideological leaning, e.g. a positive stance on abortion indicates a liberal leaning but a positive stance on Cuba embargo indicates a conservative leaning. Our experiments show that neither of the search engines suffers from stance bias. However, both search engines suffer from ideological bias, both favouring one ideological leaning to the other, which is more significant from the perspective of polarisation in our society.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Variational-LSTM autoencoder to forecast the spread of coronavirus across the globe Journal Article
Ibrahim, Mohamed R; Haworth, James; Lipani, Aldo; Aslam, Nilufer; Cheng, Tao; Christie, Nicola
In: PLOS ONE, vol. 16, no. 1, pp. 1-22, 2021.
@article{10.1371/journal.pone.0246120,
title = {Variational-LSTM autoencoder to forecast the spread of coronavirus across the globe},
author = {Mohamed R Ibrahim and James Haworth and Aldo Lipani and Nilufer Aslam and Tao Cheng and Nicola Christie},
url = {https://doi.org/10.1371/journal.pone.0246120
https://www.researchgate.net/publication/348855260_Variational-LSTM_autoencoder_to_forecast_the_spread_of_coronavirus_across_the_globe},
doi = {10.1371/journal.pone.0246120},
year = {2021},
date = {2021-01-01},
journal = {PLOS ONE},
volume = {16},
number = {1},
pages = {1-22},
publisher = {Public Library of Science},
abstract = {Modelling the spread of coronavirus globally while learning trends at global and country levels remains crucial for tackling the pandemic. We introduce a novel variational-LSTM Autoencoder model to predict the spread of coronavirus for each country across the globe. This deep Spatio-temporal model does not only rely on historical data of the virus spread but also includes factors related to urban characteristics represented in locational and demographic data (such as population density, urban population, and fertility rate), an index that represents the governmental measures and response amid toward mitigating the outbreak (includes 13 measures such as: 1) school closing, 2) workplace closing, 3) cancelling public events, 4) close public transport, 5) public information campaigns, 6) restrictions on internal movements, 7) international travel controls, 8) fiscal measures, 9) monetary measures, 10) emergency investment in health care, 11) investment in vaccines, 12) virus testing framework, and 13) contact tracing). In addition, the introduced method learns to generate a graph to adjust the spatial dependences among different countries while forecasting the spread. We trained two models for short and long-term forecasts. The first one is trained to output one step in future with three previous timestamps of all features across the globe, whereas the second model is trained to output 10 steps in future. Overall, the trained models show high validation for forecasting the spread for each country for short and long-term forecasts, which makes the introduce method a useful tool to assist decision and policymaking for the different corners of the globe.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Learning Neural Point Processes with Latent Graphs Proceedings Article
Zhang, Qiang; Lipani, Aldo; Yilmaz, Emine
In: Proceedings of the World Wide Web Conference, 2021.
@inproceedings{Zhang2021,
title = {Learning Neural Point Processes with Latent Graphs},
author = {Qiang Zhang and Aldo Lipani and Emine Yilmaz},
url = {https://www.researchgate.net/publication/349380768_Learning_Neural_Point_Processes_with_Latent_Graphs},
doi = {https://doi.org/10.1145/3442381.3450135},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the World Wide Web Conference},
series = {WWW '21},
abstract = {Neural point processes (NPPs) employ neural networks to capture complicated dynamics of asynchronous event sequences. Existing NPPs feed all history events into neural networks, assuming that all event types contribute to the prediction of the target type. However, this assumption can be problematic because in reality some event types do not contribute to the predictions of another type. To correct this defect, we learn to omit those types of events that do not contribute to the prediction of one target type during the formulation of NPPs. Towards this end, we simultaneously consider the tasks of (1) finding event types that contribute to predictions of the target types and (2) learning a NPP model from event sequences. For the former, we formulate a latent graph, with event types being vertices and non-zero contributing relationships being directed edges; then we propose a probabilistic graph generator, from which we sample a latent graph. For the latter, the sampled graph can be readily used as a plug-in to modify an existing NPP model. Because these two tasks are nested, we propose to optimize the model parameters through bilevel programming, and develop an efficient solution based on truncated gradient back-propagation. Experimental results on both synthetic and real-world datasets show the improved performance against state-of-the-art baselines. This work removes disturbance of non-contributing event types with the aid of a validation procedure, similar to the practice to mitigate overfitting used when training machine learning models.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
New pedotransfer approaches to predict soil bulk density using WoSIS soil data and environmental covariates in Mediterranean agro-ecosystems Journal Article
Schillaci, Calogero; Perego, Alessia; Valkama, Elena; Märker, Michael; Saia, Sergio; Veronesi, Fabio; Lipani, Aldo; Lombardo, Luigi; Tadiello, Tommaso; Gamper, Hannes A; Tedone, Luigi; Moss, Cami; Pareja-Serrano, Elena; Amato, Gabriele; Kühl, Kersten; Dămătîrcă, Claudia; Cogato, Alessia; Mzid, Nada; Eeswaran, Rasu; Rabelo, Marya; Sperandio, Giorgio; Bosino, Alberto; Bufalini, Margherita; Tunçay, Tülay; Ding, Jianqi; Fiorentini, Marco; Tiscornia, Guadalupe; Conradt, Sarah; Botta, Marco; Acutis, Marco
In: Science of The Total Environment, vol. 780, pp. 146609, 2021, ISSN: 0048-9697.
@article{SCHILLACI2021146609,
title = {New pedotransfer approaches to predict soil bulk density using WoSIS soil data and environmental covariates in Mediterranean agro-ecosystems},
author = {Calogero Schillaci and Alessia Perego and Elena Valkama and Michael M\"{a}rker and Sergio Saia and Fabio Veronesi and Aldo Lipani and Luigi Lombardo and Tommaso Tadiello and Hannes A Gamper and Luigi Tedone and Cami Moss and Elena Pareja-Serrano and Gabriele Amato and Kersten K\"{u}hl and Claudia D\u{a}m\u{a}t\^{i}rc\u{a} and Alessia Cogato and Nada Mzid and Rasu Eeswaran and Marya Rabelo and Giorgio Sperandio and Alberto Bosino and Margherita Bufalini and T\"{u}lay Tun\c{c}ay and Jianqi Ding and Marco Fiorentini and Guadalupe Tiscornia and Sarah Conradt and Marco Botta and Marco Acutis},
url = {https://www.sciencedirect.com/science/article/pii/S0048969721016776},
doi = {https://doi.org/10.1016/j.scitotenv.2021.146609},
issn = {0048-9697},
year = {2021},
date = {2021-01-01},
journal = {Science of The Total Environment},
volume = {780},
pages = {146609},
abstract = {For the estimation of the soil organic carbon stocks, bulk density (BD) is a fundamental parameter but measured data are usually not available especially when dealing with legacy soil data. It is possible to estimate BD by applying pedotransfer function (PTF). We applied different estimation methods with the aim to define a suitable PTF for BD of arable land for the Mediterranean Basin, which has peculiar climate features that may influence the soil carbon sequestration. To improve the existing BD estimation methods, we used a set of public climatic and topographic data along with the soil texture and organic carbon data. The present work consisted of the following steps: i) development of three PTFs models separately for top (0\textendash0.4 m) and subsoil (0.4\textendash1.2 m), ii) a 10-fold cross-validation, iii) model transferability using an external dataset derived from published data. The development of the new PTFs was based on the training dataset consisting of World Soil Information Service (WoSIS) soil profile data, climatic data from WorldClim at 1 km spatial resolution and Shuttle Radar Topography Mission (SRTM) digital elevation model at 30 m spatial resolution. The three PTFs models were developed using: Multiple Linear Regression stepwise (MLR-S), Multiple Linear Regression backward stepwise (MLR-BS), and Artificial Neural Network (ANN). The predictions of the newly developed PTFs were compared with the BD calculated using the PTF proposed by Manrique and Jones (MJ) and the modelled BD derived from the global SoilGrids dataset. For the topsoil training dataset (N = 129), MLR-S, MLR-BS and ANN had a R2 0.35, 0.58 and 0.86, respectively. For the model transferability, the three PTFs applied to the external topsoil dataset (N = 59), achieved R2 values of 0.06, 0.03 and 0.41. For the subsoil training dataset (N = 180), MLR-S, MLR-BS and ANN the R2 values were 0.36, 0.46 and 0.83, respectively. When applied to the external subsoil dataset (N = 29), the R2 values were 0.05, 0.06 and 0.41. The cross-validation for both top and subsoil dataset, resulted in an intermediate performance compared to calibration and validation with the external dataset. The new ANN PTF outperformed MLR-S, MLR-BS, MJ and SoilGrids approaches for estimating BD. Further improvements may be achieved by additionally considering the time of sampling, agricultural soil management and cultivation practices in predictive models.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
How Am I Doing?: Evaluating Conversational Search Systems Offline Journal Article
Lipani, Aldo; Carterette, Ben; Yilmaz, Emine
In: ACM Transactions on Information Systems (TOIS), 2021.
@article{Lipani2021TOIS,
title = {How Am I Doing?: Evaluating Conversational Search Systems Offline},
author = {Aldo Lipani and Ben Carterette and Emine Yilmaz},
url = {https://www.researchgate.net/publication/350640565_How_Am_I_Doing_Evaluating_Conversational_Search_Systems_Offline
https://aldolipani.com/wp-content/uploads/2021/04/How_Am_I_Doing-Evaluating_Conversational_Search_Systems_Offline.pdf},
year = {2021},
date = {2021-01-01},
journal = {ACM Transactions on Information Systems (TOIS)},
abstract = {As conversational agents like Siri and Alexa gain in popularity and use, conversation is becoming a more and more important mode of interaction for search. Conversational search shares some features with traditional search, but differs in some important respects: conversational search systems are less likely to return ranked lists of results (a SERP), more likely to involve iterated interactions, and more likely to feature longer, well-formed user queries in the form of natural language questions. Because of these differences, traditional methods for search evaluation (such as the Cranfield paradigm) do not translate easily to conversational search. In this work, we propose a framework for offline evaluation of conversational search, which includes a methodology for creating test collections with relevance judgments, an evaluation measure based on a user interaction model, and an approach to collecting user interaction data to train the model. The framework is based on the idea of “subtopics”, often used to model novelty and diversity in search and recommendation, and the user model is similar to the geometric browsing model introduced by RBP and used in ERR. As far as we know, this is the first work to combine these ideas into a comprehensive framework for offline evaluation of conversational search.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Convolutional neural networks for water segmentation using sentinel-2 red, green, blue (RGB) composites and derived spectral indices Journal Article
James, Thomas; Schillaci, Calogero; Lipani, Aldo
In: International Journal of Remote Sensing, vol. 42, no. 14, pp. 5342-5369, 2021.
@article{doi:10.1080/01431161.2021.1913298,
title = {Convolutional neural networks for water segmentation using sentinel-2 red, green, blue (RGB) composites and derived spectral indices},
author = {Thomas James and Calogero Schillaci and Aldo Lipani},
url = {https://doi.org/10.1080/01431161.2021.1913298
https://www.researchgate.net/publication/351093583_Convolutional_neural_networks_for_water_segmentation_using_sentinel-2_red_green_blue_RGB_composites_and_derived_spectral_indices},
doi = {10.1080/01431161.2021.1913298},
year = {2021},
date = {2021-01-01},
journal = {International Journal of Remote Sensing},
volume = {42},
number = {14},
pages = {5342-5369},
publisher = {Taylor \& Francis},
abstract = {Near-real time water segmentation with medium resolution satellite imagery plays a critical role in water management. Automated water segmentation of satellite imagery has traditionally been achieved using spectral indices. Spectral water segmentation is limited by environmental factors and requires human expertise to be applied effectively. In recent years, the use of convolutional neural networks (CNN’s) for water segmentation has been successful when used on high-resolution satellite imagery, but to a lesser extent for medium resolution imagery. Existing studies have been limited to geographically localized datasets and reported metrics have been benchmarked against a limited range of spectral indices. This study seeks to determine if a single CNN based on Red, Green, Blue (RGB) image classification can effectively segment water on a global scale and outperform traditional spectral methods. Additionally, this study evaluates the extent to which smaller datasets (of very complex pattern, e.g harbour megacities) can be used to improve globally applicable CNNs within a specific region. Multispectral imagery from the European Space Agency, Sentinel-2 satellite (10 m spatial resolution) was sourced. Test sites were
selected in Florida, New York, and Shanghai to represent a globally diverse range of waterbody typologies. Region-specific spectral water segmentation algorithms were developed on each test site, to represent benchmarks of spectral index performance. DeepLabV3-ResNet101 was trained on 33,311 semantically labelled true-colour samples. The resulting model was retrained on three smaller subsets of the data, specific to New York, Shanghai and Florida. CNN predictions reached a maximum mean intersection over union result of 0.986 and F1-Score of 0.983. At the Shanghai test site, the CNN’s predictions outperformed the spectral benchmark, primarily due to the CNN’s ability to process contextual features at multiple scales. In all test cases, retraining the networks to localized subsets of the dataset improved the localized region’s segmentation predictions. The CNN’s presented are suitable for cloud-based deployment and could contribute to the wider use of satellite imagery for water management.},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
selected in Florida, New York, and Shanghai to represent a globally diverse range of waterbody typologies. Region-specific spectral water segmentation algorithms were developed on each test site, to represent benchmarks of spectral index performance. DeepLabV3-ResNet101 was trained on 33,311 semantically labelled true-colour samples. The resulting model was retrained on three smaller subsets of the data, specific to New York, Shanghai and Florida. CNN predictions reached a maximum mean intersection over union result of 0.986 and F1-Score of 0.983. At the Shanghai test site, the CNN’s predictions outperformed the spectral benchmark, primarily due to the CNN’s ability to process contextual features at multiple scales. In all test cases, retraining the networks to localized subsets of the dataset improved the localized region’s segmentation predictions. The CNN’s presented are suitable for cloud-based deployment and could contribute to the wider use of satellite imagery for water management.
Subsequence Based Deep Active Learning for Named Entity Recognition Proceedings Article
Radmard, Puria; Fathullah, Yassir; Lipani, Aldo
In: Proceedings of the Association for Computational Linguistics, 2021.
@inproceedings{Radmard2021,
title = {Subsequence Based Deep Active Learning for Named Entity Recognition},
author = {Puria Radmard and Yassir Fathullah and Aldo Lipani},
url = {https://www.researchgate.net/publication/351885762_Subsequence_Based_Deep_Active_Learning_for_Named_Entity_Recognition},
year = {2021},
date = {2021-01-01},
booktitle = {Proceedings of the Association for Computational Linguistics},
series = {ACL '21},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Inference of virtual network functions' state via analysis of the CPU behavior Proceedings Article
Shelbourne, Charles; Linguaglossa, Leonardo; Zhang, Tianzhu; Lipani, Aldo
In: the 33nd International Teletraffic Congress (ITC 33), 2021.
@inproceedings{Ling2108:Inference,
title = {Inference of virtual network functions' state via analysis of the CPU behavior},
author = {Charles Shelbourne and Leonardo Linguaglossa and Tianzhu Zhang and Aldo Lipani},
url = {https://www.researchgate.net/publication/352826750_Inference_of_virtual_network_functions'_state_via_analysis_of_the_CPU_behavior},
year = {2021},
date = {2021-01-01},
booktitle = {the 33nd International Teletraffic Congress (ITC 33)},
abstract = {The on-going process of softwarization of IT networks promises to reduce the operational and management costs of network infrastructures by replacing hardware middleboxes with equivalent pieces of code executed on general-purpose servers. Alongside the benefits from the operator’s perspective, new strategies to provide the network’s resources to users are arising. Following the principle of “everything as a service”, multiple tenants can access the required resources \textendash typically CPUs, NICs, or RAM \textendash according to a Service-Level Agreement. However, tenants’ applications may require a complex and expensive measurement infrastructure to continuously monitor the network function’s state. Although the application’s specific behavior is unknown (and often opaque to the infrastructure owner), the software nature of (virtual) network functions (VNFs) may be the key to infer the behavior of the high-level functions by accessing low-level information, which is still under the control of the operating system and therefore of the infrastructure owner. As such, in the scenario of software VNFs executed on COTS servers, the underlying CPU’s behavior can be used as the sole predictor for the high-level VNF state without explicit in-network measurements: in this paper, we develop a novel methodology to infer high-level characteristics such as throughput or packet loss using CPU data instead of network measurements. Our methodology consists of (i) experimentally analyzing the behavior of a CPU that executes a VNF under different loads, (ii) extracting a correlation between the CPU footprint and the high-level application state, and (iii) use this knowledge to detect the previously mentioned network metrics. Our code and datasets are publicly available.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
A Baseline for Shapley Values in MLPs: from Missingness to Neutrality Proceedings Article
Izzo, Cosimo; Lipani, Aldo; Okhrati, Ramin; Medda, Francesca
In: ESANN, 2021, ISSN: 2331-8422.
@inproceedings{Izzo2021,
title = {A Baseline for Shapley Values in MLPs: from Missingness to Neutrality},
author = {Cosimo Izzo and Aldo Lipani and Ramin Okhrati and Francesca Medda},
url = {http://www.i6doc.com/en/. http://arxiv.org/abs/2006.04896},
issn = {2331-8422},
year = {2021},
date = {2021-01-01},
booktitle = {ESANN},
abstract = {Being able to explain a prediction as well as having a model that performs well are paramount in many machine learning applications. Deep neural networks have gained momentum recently on the basis of their accuracy, however these are often criticised to be black-boxes. Many authors have focused on proposing methods to explain their predictions. Among these explainability methods, feature attribution methods have been favoured for their strong theoretical foundation: the Shapley value. A limitation of Shapley value is the need to define a baseline (aka reference point) representing the missingness of a feature. In this paper, we present a method to choose a baseline based on a neutrality value: a parameter defined by decision makers at which their choices are determined by the returned value of the model being either below or above it. Based on this concept, we theoretically justify these neutral baselines and find a way to identify them for MLPs. Then, we experimentally demonstrate that for a binary classification task, using a synthetic dataset and a dataset coming from the financial domain, the proposed baselines outperform, in terms of local explanability power, standard ways of choosing them.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}