Prompt engineering holds the promise for the computational literary studies (CLS) to obtain high quality markup for literary research questions by simply prompting large language models with natural language strings. We test prompt engineering’s validity for two CLS sequence labeling tasks under the following aspects: (i) how generalizable are the results of identical prompts on different dataset splits?, (ii) how robust are performance results when re-formulating the prompts?, and (iii) how generalizable are certain fixed phrases added to the prompts that are generally considered to increase performance. We find that results are sensitive to data splits and prompt formulation, while the addition of fixed phrases does not change performance in most cases, depending on the chosen model.
@inproceedings{ Pichler2025aa,
Title = {{Evaluating LLM-Prompting for Sequence Labeling Tasks in Computational Literary Studies}},
Author = { Axel Pichler and Janis Pagel and Nils Reiter },
Booktitle = {{Proceedings of the 9th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2025)}},
Month = { May },
Year = { 2025 }
}
TY -
TI - Evaluating LLM-Prompting for Sequence Labeling Tasks in Computational Literary Studies
AU - Axel Pichler
AU - Janis Pagel
AU - Nils Reiter
PY - 2025
J2 - Proceedings of the 9th Joint SIGHUM Workshop on Computational Linguistics for Cultural Heritage, Social Sciences, Humanities and Literature (LaTeCH-CLfL 2025)
ER -