Research Publications

2018

Berndt J, Fischer B, Britz K. Scaling the ConceptCloud browser to large semi-structured data sets. 14th African Conference on Research in Computer Science and Applied Mathematics, Stellenbosch, South Africa, Proceedings. 2018. https://hal.inria.fr/hal-01881376.

Semi-structured data sets such as product reviews or event log data are simultaneously becoming more widely used and growing ever larger. This paper describes ConceptCloud, a flexible interactive browser for semi-structured datasets, with a focus on the recent trend of implementing server-based architectures to accommodate ever growing datasets. ConceptCloud makes use of an intuitive tag cloud visualization viewer in combination with an underlying concept lattice to provide a formal structure for navigation through datasets without prior knowledge of the structure of the data or compromising scalability. This is achieved by implementing architectural changes to increase the system’s resource efficiency.

@proceedings{185,
  author = {Joshua Berndt and Bernd Fischer and Katarina Britz},
  title = {Scaling the ConceptCloud browser to large semi-structured data sets},
  abstract = {Semi-structured data sets such as product reviews or event log data are simultaneously becoming more widely used and growing ever larger. This paper describes ConceptCloud, a flexible interactive browser for semi-structured datasets, with a focus on the recent trend of implementing server-based architectures to accommodate ever growing datasets. ConceptCloud makes use of an intuitive tag cloud visualization viewer in combination with an underlying concept lattice to provide a formal structure for navigation through datasets without prior knowledge of the structure of the data or compromising scalability. This is achieved by implementing architectural changes to increase the system’s resource efficiency.},
  year = {2018},
  journal = {14th African Conference on Research in   Computer Science and Applied Mathematics, Stellenbosch, South Africa, Proceedings},
  pages = {276- 283},
  month = {14/10-16/10},
  publisher = {HAL archives-ouvertes},
  url = {https://hal.inria.fr/hal-01881376},
}
Britz K, Varzinczak I. Preferential accessibility and preferred worlds. Journal of Logic, Language and Information. 2018;27(2). https://doi.org/10.1007/s10849-017-9264-0.

Modal accounts of normality in non-monotonic reasoning traditionally have an underlying semantics based on a notion of preference amongst worlds. In this paper, we motivate and investigate an alternative semantics, based on ordered accessibility relations in Kripke frames. The underlying intuition is that some world tuples may be seen as more normal, while others may be seen as more exceptional. We show that this delivers an elegant and intuitive semantic construction, which gives a new perspective on defeasible necessity. Technically, the revisited logic does not change the expressive power of our previously defined preferential modalities. This conclusion follows from an analysis of both semantic constructions via a generalisation of bisimulations to the preferential case. Reasoners based on the previous semantics therefore also suffice for reasoning over the new semantics. We complete the picture by investigating different notions of defeasible conditionals in modal logic that can also be captured within our framework.\footnote{A preliminary version of the work reported in this paper was presented at the Workshop on Nonmonotonic Reasoning.

@article{183,
  author = {Katarina Britz and Ivan Varzinczak},
  title = {Preferential accessibility and preferred worlds},
  abstract = {Modal accounts of normality in non-monotonic reasoning traditionally have an underlying semantics based on a notion of preference amongst worlds. In this paper, we motivate and investigate an alternative semantics, based on ordered accessibility relations in Kripke frames. The underlying intuition is that some world tuples may be seen as more normal, while others may be seen as more exceptional. We show that this delivers an elegant and intuitive semantic construction, which gives a new perspective on defeasible necessity. Technically, the revisited logic does not change the expressive power of our previously defined preferential modalities. This conclusion follows from an analysis of both semantic constructions via a generalisation of bisimulations to the preferential case. Reasoners based on the previous semantics therefore also suffice for reasoning over the new semantics. We complete the picture by investigating different notions of defeasible conditionals in modal logic that can also be captured within our framework.\footnote{A preliminary version of the work reported in this paper was presented at the Workshop on Nonmonotonic Reasoning.},
  year = {2018},
  journal = {Journal of Logic, Language and Information},
  volume = {27},
  pages = {133-155},
  issue = {2},
  publisher = {Springer},
  url = {https://doi.org/10.1007/s10849-017-9264-0},
}
Britz K, Varzinczak I. From KLM-Style Conditionals to Defeasible Modalities, and Back. Journal of Applied Non-Classical Logics. 2018;28(1). https://doi.org/10.1080/11663081.2017.1397325.

We investigate an aspect of defeasibility that has somewhat been overlooked by the non-monotonic reasoning community, namely that of defeasible modes of reasoning. These aim to formalise defeasibility of the traditional notion of necessity in modal logic, in particular of its different readings as action, knowledge and others in specific contexts, rather than defeasibility of conditional forms. Building on an extension of the preferential approach to modal logics, we introduce new modal operators with which to formalise the notion of defeasible necessity and distinct possibility, and that can be used to represent expected effects, refutable knowledge, and so on. We show how KLM-style conditionals can smoothly be integrated with our richer language. We also propose a tableau calculus which is sound and complete with respect to our modal preferential semantics, and of which the computational complexity remains in the same class as that of the underlying classical modal logic.

@article{182,
  author = {Katarina Britz and Ivan Varzinczak},
  title = {From KLM-Style Conditionals to Defeasible Modalities, and Back},
  abstract = {We investigate an aspect of defeasibility that has somewhat been overlooked by the non-monotonic reasoning community, namely that of defeasible modes of reasoning. These aim to formalise defeasibility of the traditional notion of necessity in modal logic, in particular of its different readings as action, knowledge and others in specific contexts, rather than defeasibility of conditional forms. Building on an extension of the preferential approach to modal logics, we introduce new modal operators with which to formalise the notion of defeasible necessity and distinct possibility, and that can be used to represent expected effects, refutable knowledge, and so on. We show how KLM-style conditionals can smoothly be integrated with our richer language. We also propose a tableau calculus which is sound and complete with respect to our modal preferential semantics, and of which the computational complexity remains in the same class as that of the underlying classical modal logic.},
  year = {2018},
  journal = {Journal of Applied Non-Classical Logics},
  volume = {28},
  pages = {92-121},
  issue = {1},
  publisher = {Taylor & Francis},
  url = {https://doi.org/10.1080/11663081.2017.1397325},
}
Price CS, Moodley D, Pillay A. Dynamic Bayesian decision network to represent growers’ adaptive pre-harvest burning decisions in a sugarcane supply chain. Proceedings of the Annual Conference of the South African Institute of Computer Scientists and Information Technologists (SAICSIT '18). 2018. https://dl.acm.org/citation.cfm?id=3278681.

Sugarcane growers usually burn their cane to facilitate its harvesting and transportation. Cane quality tends to deteriorate after burning, so it must be delivered as soon as possible to the mill for processing. This situation is dynamic and many factors, including weather conditions, delivery quotas and previous decisions taken, affect when and how much cane to burn. A dynamic Bayesian decision network (DBDN) was developed, using an iterative knowledge engineering approach, to represent sugarcane growers’ adaptive pre-harvest burning decisions. It was evaluated against five different scenarios which were crafted to represent the range of issues the grower faces when making these decisions. The DBDN was able to adapt reactively to delays in deliveries, although the model did not have enough states representing delayed delivery statuses. The model adapted proactively to rain forecasts, but only adapted reactively to high wind forecasts. The DBDN is a promising way of modelling such dynamic, adaptive operational decisions.

@proceedings{181,
  author = {C. Sue Price and Deshen Moodley and Anban Pillay},
  title = {Dynamic Bayesian decision network to represent growers’ adaptive pre-harvest burning decisions in a sugarcane supply chain},
  abstract = {Sugarcane growers usually burn their cane to facilitate its harvesting and transportation.  Cane quality tends to deteriorate after burning, so it must be delivered as soon as possible to the mill for processing.  This situation is dynamic and many factors, including weather conditions, delivery quotas and previous decisions taken, affect when and how much cane to burn.  A dynamic Bayesian decision network (DBDN) was developed, using an iterative knowledge engineering approach, to represent sugarcane growers’ adaptive pre-harvest burning decisions.  It was evaluated against five different scenarios which were crafted to represent the range of issues the grower faces when making these decisions.  The DBDN was able to adapt reactively to delays in deliveries, although the model did not have enough states representing delayed delivery statuses.  The model adapted proactively to rain forecasts, but only adapted reactively to high wind forecasts.   The DBDN is a promising way of modelling such dynamic, adaptive operational decisions.},
  year = {2018},
  journal = {Proceedings of the Annual Conference of the South African Institute of Computer Scientists and Information Technologists (SAICSIT '18)},
  pages = {89-98},
  month = {26/09-28/09},
  publisher = {ACM},
  address = {New York NY},
  isbn = {978-1-4503-6647-2},
  url = {https://dl.acm.org/citation.cfm?id=3278681},
}
Britz K, Varzinczak I. From KLM-Style Conditionals to Defeasible Modalities, and Back. Journal of Applied Non-Classical Logics. 2018;28(1). https://doi.org/10.1080/11663081.2017.1397325.

We investigate an aspect of defeasibility that has somewhat been overlooked by the non-monotonic reasoning community, namely that of defeasible modes of reasoning. These aim to formalise defeasibility of the traditional notion of necessity in modal logic, in particular of its different readings as action, knowledge and others in specific contexts, rather than defeasibility of conditional forms. Building on an extension of the preferential approach to modal logics, we introduce new modal operators with which to formalise the notion of defeasible necessity and distinct possibility, and that can be used to represent expected effects, refutable knowledge, and so on. We show how KLM-style conditionals can smoothly be integrated with our richer language. We also propose a tableau calculus which is sound and complete with respect to our modal preferential semantics, and of which the computational complexity remains in the same class as that of the underlying classical modal logic.

@article{184,
  author = {Katarina Britz and Ivan Varzinczak},
  title = {From KLM-Style Conditionals to Defeasible Modalities, and Back},
  abstract = {We investigate an aspect of defeasibility that has somewhat been overlooked by the non-monotonic reasoning community, namely that of defeasible modes of reasoning. These aim to formalise defeasibility of the traditional notion of necessity in modal logic, in particular of its different readings as action, knowledge and others in specific contexts, rather than defeasibility of conditional forms. Building on an extension of the preferential approach to modal logics, we introduce new modal operators with which to formalise the notion of defeasible necessity and distinct possibility, and that can be used to represent expected effects, refutable knowledge, and so on. We show how KLM-style conditionals can smoothly be integrated with our richer language. We also propose a tableau calculus which is sound and complete with respect to our modal preferential semantics, and of which the computational complexity remains in the same class as that of the underlying classical modal logic.},
  year = {2018},
  journal = {Journal of Applied Non-Classical Logics},
  volume = {28},
  pages = {92-121},
  issue = {1},
  publisher = {Taylor & Francis},
  url = {https://doi.org/10.1080/11663081.2017.1397325},
}

2017

Bell L, Meyer T, Mouton F. Mobile On-board Vehicle Event Recorder: MOVER. Information Communication Technology and Society Conference (ICTAS). 2017. doi:10.1109/ICTAS.2017.7920653.

The rapid development of smart-phone technology in recent years has lead to many smart-phone owners owning out-of-date devices, equipped with useful technologies, which are no longer in use. These devices are valuable resources that can be harnessed to improve users’ lives. This project aims at leveraging these older, unused devices to help improve road safety, specifically through the improved response time of emergency services to accident locations. An Android application — Mobile On-board Vehicle Event Recorder (MOVER) — was designed and built for the purpose of detecting car accidents through the use of acceleration thresholds. Driving data was gathered and crash simulations were run. With this data, testing and analysis were conducted in order to determine an acceleration threshold that separates normal driving from accident situations as accurately as possible. With this application, users can leverage their previous or current mobile devices to improve road safety - for themselves, and their area as a whole. A promising level of accuracy was achieved, but significant improvements can be made to the application. Large opportunity for future work exists in the field, and hopefully through the development of this application, other researchers may be more inclined to investigate and test such future work.

@proceedings{358,
  author = {Luke Bell and Thomas Meyer and Francois Mouton},
  title = {Mobile On-board Vehicle Event Recorder: MOVER},
  abstract = {The rapid development of smart-phone technology in recent years has lead to many smart-phone owners owning out-of-date devices, equipped with useful technologies, which are no longer in use. These devices are valuable resources that can be harnessed to improve users’ lives. This project aims at leveraging these older, unused devices to help improve road safety, specifically through the improved response time of emergency services to accident locations. An Android application — Mobile On-board Vehicle Event Recorder (MOVER) — was designed and built for the purpose of detecting car accidents through the use of acceleration thresholds. Driving data was gathered and crash simulations were run. With this data, testing and analysis were conducted in order to determine an acceleration threshold that separates normal driving from accident situations as accurately as possible. With this application, users can leverage their previous or current mobile devices to improve road safety - for themselves, and their area as a whole. A promising level of accuracy was achieved, but significant improvements can be made to the application. Large opportunity for future work exists in the field, and hopefully through the development of this application, other researchers may be more inclined to investigate and test such future work.},
  year = {2017},
  journal = {Information Communication Technology and Society Conference (ICTAS)},
  month = {9/03 - 10/03},
  url = {https://www.researchgate.net/publication/316239845_Mobile_on-board_vehicle_event_recorder_MOVER},
  doi = {10.1109/ICTAS.2017.7920653},
}
Gerber A, Morar N, Meyer T. Ontology-driven taxonomic work OWS for Afrotropical Bees. TDWG Annual Conference. 2017. http://pubs.cs.uct.ac.za/id/eprint/1206.

This poster presents the results of an investigation into the use of ontology technologies to support taxonomy functions. Taxonomy is the science of naming and grouping biological organisms into a hierarchy. A core function of biological taxonomy is the classification and revised classification of biological organisms into an agreed upon taxonomic structure based on sets of shared characteristics. Recent developments in knowledge representation within Computer Science include the establishment of computational ontologies. Such ontologies are particularly well suited to support classification functions such as those used in biological taxonomy. Using a specific genus of Afrotropical bees, this research project captured and represented the taxonomic knowledge base into an OWL2 ontology. In addition, the project used and extended available reasoning algorithms over the ontology to draw inferences that support the necessary taxonomy functions, and developed an application, the web ontology classifier (WOC). The WOC uses the Afrotropical bee ontology and demonstrates the taxonomic functions namely: identification (keys) as well as the description and comparison of taxa (taxonomic revision).

@proceedings{357,
  author = {Aurona Gerber and Nishal Morar and Thomas Meyer},
  title = {Ontology-driven taxonomic work OWS for Afrotropical Bees},
  abstract = {This poster presents the results of an investigation into the use of ontology technologies to support taxonomy functions. Taxonomy is the science of naming and grouping biological organisms into a hierarchy. A core function of biological taxonomy is the classification and revised classification of biological organisms into an agreed upon taxonomic structure based on sets of shared characteristics. Recent developments in knowledge representation within Computer Science include the establishment of computational ontologies. Such ontologies are particularly well suited to support classification functions such as those used in biological taxonomy. Using a specific genus of Afrotropical bees, this research project captured and represented the taxonomic knowledge base into an OWL2 ontology. In addition, the project used and extended available reasoning algorithms over the ontology to draw inferences that support the necessary taxonomy functions, and developed an application, the web ontology classifier (WOC). The WOC uses the Afrotropical bee ontology and demonstrates the taxonomic functions namely: identification (keys) as well as the description and comparison of taxa (taxonomic revision).},
  year = {2017},
  journal = {TDWG Annual Conference},
  month = {2/10 - 6/10},
  url = {http://pubs.cs.uct.ac.za/id/eprint/1206},
}
Van Niekerk DR, Van Heerden CJ, Davel MH, et al. Rapid development of TTS corpora for four South African languages. Interspeech. 2017. doi:10.21437/Interspeech.2017-1139.

This paper describes the development of text-to-speech corpora for four South African languages. The approach followed investigated the possibility of using low-cost methods including informal recording environments and untrained volunteer speakers. This objective and the additional future goal of expanding the corpus to increase coverage of South Africa’s 11 official languages necessitated experimenting with multi-speaker and code-switched data. The process and relevant observations are detailed throughout. The latest version of the corpora are available for download under an open-source licence and will likely see further development and refinement in future.

@proceedings{278,
  author = {Daniel Van Niekerk and Charl Van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson and Martin Jansche and Linne Ha},
  title = {Rapid development of TTS corpora for four South African languages},
  abstract = {This paper describes the development of text-to-speech corpora for four South African languages. The approach followed investigated the possibility of using low-cost methods including informal recording environments and untrained volunteer speakers. This objective and the additional future goal of expanding the corpus to increase coverage of South Africa’s 11 official languages necessitated experimenting with multi-speaker and code-switched data. The process and relevant observations are detailed throughout. The latest version of the corpora are available for download under an open-source licence and will likely see further development and refinement in future.},
  year = {2017},
  journal = {Interspeech},
  pages = {2178-2182},
  address = {Stockholm, Sweden},
  doi = {10.21437/Interspeech.2017-1139},
}
Van Niekerk DR. Evaluating acoustic modelling of lexical stress for Afrikaans speech synthesis. Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech). 2017. doi:10.1109/RoboMech.2017.8261128.

An explicit lexical stress feature is investigated for statistical parametric speech synthesis in Afrikaans: Firstly, objective measures are used to assess proposed annotation protocols and dictionaries compared to the baseline (implicit modelling) on the Lwazi 2 text-to-speech corpus. Secondly, the best candidates are evaluated on additional corpora. Finally, a comparative subjective evaluation is conducted to determine the perceptual impact on text-to-speech synthesis. The best candidate dictionary is associated with favourable objective results obtained on all corpora and was preferred in the subjective test. This suggests that it may form a basis for further refinement and work on improved prosodic models.

@proceedings{277,
  author = {Daniel Van Niekerk},
  title = {Evaluating acoustic modelling of lexical stress for Afrikaans speech synthesis},
  abstract = {An explicit lexical stress feature is investigated for statistical parametric speech synthesis in Afrikaans: Firstly, objective measures are used to assess proposed annotation protocols and dictionaries compared to the baseline (implicit modelling) on the Lwazi 2 text-to-speech corpus. Secondly, the best candidates are evaluated on additional corpora. Finally, a comparative subjective evaluation is conducted to determine the perceptual impact on text-to-speech synthesis. The best candidate dictionary is associated with favourable objective results obtained on all corpora and was preferred in the subjective test. This suggests that it may form a basis for further refinement and work on improved prosodic models.},
  year = {2017},
  journal = {Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech)},
  pages = {86-91},
  address = {Bloemfontein, South Africa},
  isbn = {978-1-5386-2314-5, 978-1-5386-2313-8},
  doi = {10.1109/RoboMech.2017.8261128},
}
Van Heerden CJ, Karakos D, Narasimhan K, Davel MH, Schwartz R. Constructing Sub-Word Units for Spoken Term Detection. IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP). 2017. doi:10.1109/ICASSP.2017.7953264.

Spoken term detection, especially of out-of-vocabulary (OOV) key-words, benefits from the use of sub-word systems. We experiment with different language-dependent approaches to sub-word unit generation, generating both syllable-like and morpheme-like units, and demonstrate how the performance of syllable-like units can be improved by artificially increasing the number of unique units. The effect of unit choice is empirically evaluated using the eight languages from the 2016 IARPA BABEL evaluation.

@proceedings{276,
  author = {Charl Van Heerden and Damianos Karakos and Karthik Narasimhan and Marelie Davel and Richard Schwartz},
  title = {Constructing Sub-Word Units for Spoken Term Detection},
  abstract = {Spoken term detection, especially of out-of-vocabulary (OOV) key-words, benefits from the use of sub-word systems. We experiment with different language-dependent approaches to sub-word unit generation, generating both syllable-like and morpheme-like units, and demonstrate how the performance of syllable-like units can be improved by artificially increasing the number of unique units. The effect of unit choice is empirically evaluated using the eight languages from the 2016 IARPA BABEL evaluation.},
  year = {2017},
  journal = {IEEE Int. Conf. on Acoustics, Speech and Signal Processing (ICASSP)},
  pages = {5780-5784},
  address = {New Orleans,  Louisiana},
  isbn = {9781509041176},
  doi = {10.1109/ICASSP.2017.7953264},
}
Van der Walt C, Barnard E. Variable Kernel Density Estimation in High-dimensional Feature Spaces. AAAI Conf. on Artificial Intelligence (AAAI-17). 2017.

Estimating the joint probability density function of a dataset is a central task in many machine learning applications. In this work we address the fundamental problem of kernel bandwidth estimation for variable kernel density estimation in high-dimensional feature spaces. We derive a variable kernel bandwidth estimator by minimizing the leave-one-out entropy objective function and show that this estimator is capable of performing estimation in high-dimensional feature spaces with great success. We compare the performance of this estimator to state-of-the art maximum likelihood estimators on a number of representative high-dimensional machine learning tasks and show that the newly introduced minimum leave-one-out entropy estimator performs optimally on a number of high-dimensional datasets considered.

@proceedings{275,
  author = {Christiaan Van der Walt and Etienne Barnard},
  title = {Variable Kernel Density Estimation in High-dimensional Feature Spaces},
  abstract = {Estimating the joint probability density function of a dataset is a central task in many machine learning applications. In this work we address the fundamental problem of kernel bandwidth estimation for variable kernel density estimation in high-dimensional feature spaces. We derive a variable kernel bandwidth estimator by minimizing the leave-one-out entropy objective function and show that this estimator is capable of performing estimation in high-dimensional feature spaces with great success. We compare the performance of this estimator to state-of-the art maximum likelihood estimators on a number of representative high-dimensional machine learning tasks and show that the newly introduced minimum leave-one-out entropy estimator performs optimally on a number of high-dimensional datasets considered.},
  year = {2017},
  journal = {AAAI Conf. on Artificial Intelligence (AAAI-17)},
  pages = {2674-2680},
  month = {04/02-09/04},
}
Giwa O, Davel MH. The Effect of Language Identification Accuracy on Speech Recognition Accuracy of Proper Names. Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech). 2017. doi:10.1109/RoboMech.2017.8261145.

Utilizing the known language of origin of a name can be useful when predicting the pronunciation of the name. When this language is not known, automatic language identification (LID) can be used to influence which language-specific grapheme-to-phoneme (G2P) predictor is triggered to produce a pronunciation for the name. We investigate the implications when both the LID system and the G2P system generate errors: what influence does this have on a resulting speech recognition system? We experiment with different approaches to LID-based dictionary creation and report on results in four South African languages: Afrikaans, English, Sesotho and isiZulu.

@proceedings{274,
  author = {Oluwapelumi Giwa and Marelie Davel},
  title = {The Effect of Language Identification Accuracy on Speech Recognition Accuracy of Proper Names},
  abstract = {Utilizing the known language of origin of a name can be useful when predicting the pronunciation of the name. When this language is not known, automatic language identification (LID) can be used to influence which language-specific grapheme-to-phoneme (G2P) predictor is triggered to produce a pronunciation for the name. We investigate the implications when both the LID system and the G2P system generate errors: what influence does this have on a resulting speech recognition system? We experiment with different approaches to LID-based dictionary creation and report on results in four South African languages: Afrikaans, English, Sesotho and isiZulu.},
  year = {2017},
  journal = {Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech)},
  pages = {187-192},
  address = {Bloemfontein, South Africa},
  isbn = {978-1-5386-2314-5, 978-1-5386-2313-8},
  doi = {10.1109/RoboMech.2017.8261145},
}
Giwa O, Davel MH. Bilateral G2P Accuracy: Measuring the effect of variants. Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech). 2017. doi:10.1109/RoboMech.2017.8261149.

Incorporating pronunciation variants in a dictionary is controversial, as this can be either advantageous or detrimental for a speech recognition system. Grapheme-ophoneme (G2P) accuracy can help guide this decision, but calculating the G2P accuracy of variant-based dictionaries is not fully straightforward. We propose a variant matching technique to measure G2P accuracy in a principled way, when both the reference and hypothesized dictionaries may include variants. We use the new measure to evaluate G2P accuracy and speech recognition performance of systems developed with an existing set of dictionaries, and observe a better correlation between G2P accuracy and speech recognition performance, than when utilising alternative metrics.

@proceedings{273,
  author = {Oluwapelumi Giwa and Marelie Davel},
  title = {Bilateral G2P Accuracy: Measuring the effect of variants},
  abstract = {Incorporating pronunciation variants in a dictionary is controversial, as this can be either advantageous or detrimental for a speech recognition system. Grapheme-ophoneme (G2P) accuracy can help guide this decision, but calculating the G2P accuracy of variant-based dictionaries is not fully straightforward. We propose a variant matching technique to measure G2P accuracy in a principled way, when both the reference and hypothesized dictionaries may include variants. We use the new measure to evaluate G2P accuracy and speech recognition performance of systems developed with an existing set of dictionaries, and observe a better correlation between G2P accuracy and speech recognition performance, than when utilising alternative metrics.},
  year = {2017},
  journal = {Pattern Recognition Association of South Africa and Mechatronics International Conference (PRASA-RobMech)},
  pages = {208-213},
  address = {Bloemfontein, South Africa},
  isbn = {978-1-5386-2314-5, 978-1-5386-2313-8},
  doi = {10.1109/RoboMech.2017.8261149},
}
De Wet F, Kleynhans N, Van Compernolle D, Sahraeian R. Speech recognition for under-resourced languages: Data sharing in hidden Markov model systems. South African Journal of Science . 2017;113(1/2). doi:https://doi.org/10.17159/sajs.2017/20160038.

For purposes of automated speech recognition in under-resourced environments, techniques used to share acoustic data between closely related or similar languages become important. Donor languages with abundant resources can potentially be used to increase the recognition accuracy of speech systems developed in the resource poor target language. The assumption is that adding more data will increase the robustness of the statistical estimations captured by the acoustic models. In this study we investigated data sharing between Afrikaans and Flemish – an under-resourced and well-resourced language, respectively. Our approach was focused on the exploration of model adaptation and refinement techniques associated with hidden Markov model based speech recognition systems to improve the benefit of sharing data. Specifically, we focused on the use of currently available techniques, some possible combinations and the exact utilisation of the techniques during the acoustic model development process. Our findings show that simply using normal approaches to adaptation and refinement does not result in any benefits when adding Flemish data to the Afrikaans training pool. The only observed improvement was achieved when developing acoustic models on all available data but estimating model refinements and adaptations on the target data only. Significance: • Acoustic modelling for under-resourced languages • Automatic speech recognition for Afrikaans • Data sharing between Flemish and Afrikaans to improve acoustic modelling for Afrikaans

@article{272,
  author = {Febe De Wet and Neil Kleynhans and Dirk Van Compernolle and Reza Sahraeian},
  title = {Speech recognition for under-resourced languages: Data sharing in hidden Markov model systems},
  abstract = {For purposes of automated speech recognition in under-resourced environments, techniques used to
share acoustic data between closely related or similar languages become important. Donor languages
with abundant resources can potentially be used to increase the recognition accuracy of speech
systems developed in the resource poor target language. The assumption is that adding more data will
increase the robustness of the statistical estimations captured by the acoustic models. In this study
we investigated data sharing between Afrikaans and Flemish – an under-resourced and well-resourced
language, respectively. Our approach was focused on the exploration of model adaptation and refinement
techniques associated with hidden Markov model based speech recognition systems to improve the
benefit of sharing data. Specifically, we focused on the use of currently available techniques, some
possible combinations and the exact utilisation of the techniques during the acoustic model development
process. Our findings show that simply using normal approaches to adaptation and refinement does
not result in any benefits when adding Flemish data to the Afrikaans training pool. The only observed
improvement was achieved when developing acoustic models on all available data but estimating model
refinements and adaptations on the target data only.
Significance:
• Acoustic modelling for under-resourced languages
• Automatic speech recognition for Afrikaans
• Data sharing between Flemish and Afrikaans to improve acoustic modelling for Afrikaans},
  year = {2017},
  journal = {South African Journal of Science},
  volume = {113},
  pages = {25-33},
  issue = {1/2},
  publisher = {Academy of Science for South Africa (ASSAf)},
  doi = {https://doi.org/10.17159/sajs.2017/20160038},
}
Ogundele O, Moodley D, Seebregts C, Pillay A. Building Semantic Causal Models to Predict Treatment Adherence for Tuberculosis Patients in Sub-Saharan Africa. In: Software Engineering In Health Care, Lncs Vol. 9062. Springer, Cham; 2017. doi:https://doi.org/10.1007/978-3-319-63194-3_6.

Poor adherence to prescribed treatment is a major factor contributing to tuberculosis patients developing drug resistance and failing treatment. Treatment adherence behaviour is influenced by diverse personal, cultural and socio-economic factors that vary between regions and communities. Decision network models can potentially be used to predict treatment adherence behaviour. However, determining the network structure (identifying the factors and their causal relations) and the conditional probabilities is a challenging task. To resolve the former we developed an ontology supported by current scientific literature to categorise and clarify the similarity and granularity of factors.

@inbook{250,
  author = {Olukunle Ogundele and Deshen Moodley and Chris Seebregts and Anban Pillay},
  title = {Building Semantic Causal Models to Predict Treatment Adherence for Tuberculosis Patients in Sub-Saharan Africa},
  abstract = {Poor adherence to prescribed treatment is a major factor contributing to tuberculosis patients developing drug resistance and failing treatment. Treatment adherence behaviour is influenced by diverse personal, cultural and socio-economic factors that vary between regions and communities. Decision network models can potentially be used to predict treatment adherence behaviour. However, determining the network structure (identifying the factors and their causal relations) and the conditional probabilities is a challenging task. To resolve the former we developed an ontology supported by current scientific literature to categorise and clarify the similarity and granularity of factors.},
  year = {2017},
  journal = {Software Engineering in Health Care, LNCS vol. 9062},
  pages = {81 - 95},
  publisher = {Springer, Cham},
  isbn = {978-3-319-63193-6},
  doi = {https://doi.org/10.1007/978-3-319-63194-3_6},
}
Berglund M, van der Merwe B. On the semantics of regular expression parsing in the wild. Theoretical Computer Science. 2017;679. doi:http://dx.doi.org/10.1016/j.tcs.2016.09.006.

We introduce prioritized transducers to formalize capturing groups in regular expression matching in a way that permits straightforward modeling of capturing in Java’s 1 regular expression library. The broader questions of parsing semantics and performance are also considered. In addition, the complexity of deciding equivalence of regular expressions with capturing groups is investigated.

@article{218,
  author = {Martin Berglund and Brink van der Merwe},
  title = {On the semantics of regular expression parsing in the wild},
  abstract = {We introduce prioritized transducers to formalize capturing groups in regular expression
matching in a way that permits straightforward modeling of capturing in Java’s 1 regular
expression library. The broader questions of parsing semantics and performance are also
considered. In addition, the complexity of deciding equivalence of regular expressions with
capturing groups is investigated.},
  year = {2017},
  journal = {Theoretical Computer Science},
  volume = {679},
  pages = {69 - 82},
  publisher = {Elsevier},
  isbn = {0304-3975},
  url = {https://www.sciencedirect.com/science/article/pii/S0304397516304790?via%3Dihub},
  doi = {http://dx.doi.org/10.1016/j.tcs.2016.09.006},
}
Watson B, Runge T, Schaefer I, Cleophas LGWA. Many-MADFAct: Concurrently Constructing MADFAs. Prague Stringology Conference 2017. 2017. https://dblp.org/db/conf/stringology/stringology2017.

No Abstract

@proceedings{215,
  author = {Bruce Watson and T. Runge and I. Schaefer and L.G.W.A. Cleophas},
  title = {Many-MADFAct: Concurrently Constructing MADFAs},
  abstract = {No Abstract},
  year = {2017},
  journal = {Prague Stringology Conference 2017},
  pages = {127-142},
  month = {28/08-30/08},
  publisher = {Prague Stringology Club},
  isbn = {978-80-01-06193-0},
  url = {https://dblp.org/db/conf/stringology/stringology2017},
}
Watson B. Efficient pattern matching in degenerate strings with the Burrows-Wheeler transform. WCTA 2017 12th Workshop on Compression, Text and Algorithms. 2017. pages.di.unipi.it/spire2017/wcta.html.

No Abstract

@proceedings{214,
  author = {Bruce Watson},
  title = {Efficient pattern matching in degenerate strings with the Burrows-Wheeler transform},
  abstract = {No Abstract},
  year = {2017},
  journal = {WCTA 2017 12th Workshop on Compression, Text and Algorithms},
  pages = {1-7},
  month = {29/09},
  url = {pages.di.unipi.it/spire2017/wcta.html},
}
Watson B, Nxumalo M, Kourie DG, Cleophas LGWA. An Assessment of Algorithms for Deriving Failure Deterministic Finite Automata. South African Computer Journal. 2017;29(1). http://dx.doi.org/10.18489/sacj.v29i1.456.

No Abstract

@article{213,
  author = {Bruce Watson and M. Nxumalo and D.G Kourie and L.G.W.A. Cleophas},
  title = {An Assessment of Algorithms for Deriving Failure Deterministic Finite Automata},
  abstract = {No Abstract},
  year = {2017},
  journal = {South African Computer Journal},
  volume = {29},
  pages = {43-68},
  issue = {1},
  isbn = {2313-7835},
  url = {http://dx.doi.org/10.18489/sacj.v29i1.456},
}
Watson B, Daykin JW. Indeterminate String Factorizations and Degenerate Text Transformations. Mathematics in Computer Science. 2017;11(2). https://core.ac.uk/download/pdf/81595959.pdf.

No Abstract

@article{212,
  author = {Bruce Watson and J.W. Daykin},
  title = {Indeterminate String Factorizations and Degenerate Text Transformations},
  abstract = {No Abstract},
  year = {2017},
  journal = {Mathematics in Computer Science},
  volume = {11},
  pages = {209-218},
  issue = {2},
  isbn = {1661-8270},
  url = {https://core.ac.uk/download/pdf/81595959.pdf},
}
de Waal A, Koen H, de Villiers JP, Roodt H. An expert-driven causal model of the rhino poaching problem. Ecological Modelling. 2017;347. https://www.sciencedirect.com/science/article/pii/S0304380016307621.

A significant challenge in ecological modelling is the lack of complete sets of high-quality data. This is especially true in the rhino poaching problem where data is incomplete. Although there are many poaching attacks, they can be spread over a vast surface area such as in the case of the Kruger National Park in South Africa, which is roughly the size of Israel. Bayesian networks are useful reasoning tools and can utilise expert knowledge when data is insufficient or sparse. Bayesian networks allow the modeller to incorporate data, expert knowledge, or any combination of the two. This flexibility of Bayesian networks makes them ideal for modelling complex ecological problems. In this paper an expert-driven model of the rhino poaching problem is presented. The development as well as the evaluation of the model is performed from an expert perspective. Independent expert evaluation is performed in the form of queries that test different scenarios. Structuring the rhino poaching problem as a causal network yields a framework that can be used to reason about the problem, as well as inform the modeller of the type of data that has to be gathered.

@article{191,
  author = {Alta de Waal and Hildegarde Koen and J.P de Villiers and Henk Roodt},
  title = {An expert-driven causal model of the rhino poaching problem},
  abstract = {A significant challenge in ecological modelling is the lack of complete sets of high-quality data. This is especially true in the rhino poaching problem where data is incomplete. Although there are many poaching attacks, they can be spread over a vast surface area such as in the case of the Kruger National Park in South Africa, which is roughly the size of Israel. Bayesian networks are useful reasoning tools and can utilise expert knowledge when data is insufficient or sparse. Bayesian networks allow the modeller to incorporate data, expert knowledge, or any combination of the two. This flexibility of Bayesian networks makes them ideal for modelling complex ecological problems. In this paper an expert-driven model of the rhino poaching problem is presented. The development as well as the evaluation of the model is performed from an expert perspective. Independent expert evaluation is performed in the form of queries that test different scenarios. Structuring the rhino poaching problem as a causal network yields a framework that can be used to reason about the problem, as well as inform the modeller of the type of data that has to be gathered.},
  year = {2017},
  journal = {Ecological Modelling},
  volume = {347},
  pages = {29-39},
  publisher = {Elsevier},
  isbn = {0304-3800},
  url = {https://www.sciencedirect.com/science/article/pii/S0304380016307621},
}
Gueorguiev V, Moodley D. Hyperparameter Optimization for Astronomy. 2017;Honours. http://projects.cs.uct.ac.za/honsproj/cgi-bin/view/2017/gueorguiev_henhaeyono_stopforth.zip/#downloads.

The task of phenomenon classification in astronomy provides a novel and challenging setting for the application of state-of-the-art techniques addressing the problem of combined algorithm selection and hyperparameter optimization (CASH) of machine learning algorithms, which find local applications such as at the data-intensive Square Kilometre Array (SKA). This work will use various algorithms for CASH to explore the possibility and efficacy of hyperparameter optimization on improving performance of machine learning techniques for astronomy. Then, with focus on the Galaxy Zoo project, these algorithms will be used to conduct an indepth comparison of state-of-the-art in hyperparameter optimization (HPO) along with techniques that aim to improve performance on large datasets and expensive function evaluations. Finally, the likelihood for an integration with a cognitive vision system for astronomy will be examined by conducting a brief exploration into different feature extraction and selection methods.

@phdthesis{180,
  author = {V. Gueorguiev and Deshen Moodley},
  title = {Hyperparameter Optimization for Astronomy},
  abstract = {The task of phenomenon classification in astronomy provides a novel and challenging setting for the application of state-of-the-art techniques addressing the problem of combined
algorithm selection and hyperparameter optimization (CASH) of machine learning algorithms, which find local applications such as at the data-intensive Square Kilometre Array
(SKA). This work will use various algorithms for CASH to explore the possibility and efficacy of hyperparameter optimization on improving performance of machine learning
techniques for astronomy. Then, with focus on the Galaxy Zoo project, these algorithms will be used to conduct an indepth comparison of state-of-the-art in hyperparameter optimization
(HPO) along with techniques that aim to improve performance on large datasets and expensive function evaluations. Finally, the likelihood for an integration with a cognitive
vision system for astronomy will be examined by conducting a brief exploration into different feature extraction and selection methods.},
  year = {2017},
  volume = {Honours},
  publisher = {University of Cape Town},
  url = {http://projects.cs.uct.ac.za/honsproj/cgi-bin/view/2017/gueorguiev_henhaeyono_stopforth.zip/#downloads},
}
Watson B, Strauss T, Kourie DG, Cleophas LGWA. CSP for Parallelising Brzozowski’s DFA Construction Algorithm. In: The Role Of Theory In Computer Science. World Scientific Publishing Co. Pte. Ltd.; 2017. https://doi.org/10.1142/9789813148208_0010.

No Abstract

@inbook{179,
  author = {Bruce Watson and T. Strauss and D.G Kourie and L.G.W.A. Cleophas},
  title = {CSP for Parallelising Brzozowski’s DFA Construction Algorithm},
  abstract = {No Abstract},
  year = {2017},
  journal = {The Role of Theory in Computer Science},
  pages = {217-243},
  publisher = {World Scientific Publishing Co. Pte. Ltd.},
  isbn = {978-981-3148-19-2},
  url = {https://doi.org/10.1142/9789813148208_0010},
}
van der Merwe B, Weideman N, Berglund M. Turning evil regexes harmless. Conference of South African Institute of Computer Scientists and Information Technologists (SAICSIT'17). 2017. https://dl.acm.org/citation.cfm?id=3129416.

No Abstract

@proceedings{178,
  author = {Brink van der Merwe and N. Weideman and Martin Berglund},
  title = {Turning evil regexes harmless},
  abstract = {No Abstract},
  year = {2017},
  journal = {Conference of South African Institute of Computer Scientists and Information Technologists (SAICSIT'17)},
  month = {26/09-28/09},
  publisher = {ACM},
  url = {https://dl.acm.org/citation.cfm?id=3129416},
}
  • CSIR
  • DSI
  • Covid-19