Primary tabs
2024
Brooks, W. ., Davel, M. H., & Mouton, C. . (2024). Does Simple Trump Complex? Comparing Strategies for Adversarial Robustness in DNNs. Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science, vol 2326. http://doi.org/https://doi.org/10.1007/978-3-031-78255-8_15
@article{520, author = {William Brooks and Marelie Davel and Coenraad Mouton}, title = {Does Simple Trump Complex? Comparing Strategies for Adversarial Robustness in DNNs}, abstract = {}, year = {2024}, journal = {Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science}, volume = {vol 2326}, pages = {253 - 269}, month = {12/2024}, publisher = {Springer Nature Switzerland}, address = {Cham}, doi = {https://doi.org/10.1007/978-3-031-78255-8_15}, }
Potgieter, H. L., Mouton, C. ., & Davel, M. H. (2024). Impact of Batch Normalization on Convolutional Network Representations. Artificial Intelligence Research (SACAIR 2024), vol 2326. http://doi.org/https://doi.org/10.1007/978-3-031-78255-8_14
Batch normalization (BatchNorm) is a popular layer normalization technique used when training deep neural networks. It has been shown to enhance the training speed and accuracy of deep learning models. However, the mechanics by which BatchNorm achieves these benefits is an active area of research, and different perspectives have been proposed. In this paper, we investigate the effect of BatchNorm on the resulting hidden representations, that is, the vectors of activation values formed as samples are processed at each hidden layer. Specifically, we consider the sparsity of these representations, as well as their implicit clustering – the creation of groups of representations that are similar to some extent. We contrast image classification models trained with and without batch normalization and highlight consistent differences observed. These findings highlight that BatchNorm’s effect on representational sparsity is not a significant factor affecting generalization, while the representations of models trained with BatchNorm tend to show more advantageous clustering characteristics.
@article{518, author = {Hermanus Potgieter and Coenraad Mouton and Marelie Davel}, title = {Impact of Batch Normalization on Convolutional Network Representations}, abstract = {Batch normalization (BatchNorm) is a popular layer normalization technique used when training deep neural networks. It has been shown to enhance the training speed and accuracy of deep learning models. However, the mechanics by which BatchNorm achieves these benefits is an active area of research, and different perspectives have been proposed. In this paper, we investigate the effect of BatchNorm on the resulting hidden representations, that is, the vectors of activation values formed as samples are processed at each hidden layer. Specifically, we consider the sparsity of these representations, as well as their implicit clustering – the creation of groups of representations that are similar to some extent. We contrast image classification models trained with and without batch normalization and highlight consistent differences observed. These findings highlight that BatchNorm’s effect on representational sparsity is not a significant factor affecting generalization, while the representations of models trained with BatchNorm tend to show more advantageous clustering characteristics.}, year = {2024}, journal = {Artificial Intelligence Research (SACAIR 2024)}, volume = {vol 2326}, pages = {235 - 252}, month = {12/2024}, publisher = {Springer Nature Switzerland}, address = {Cham}, doi = {https://doi.org/10.1007/978-3-031-78255-8_14}, }
Ramalepe, S. P., Modipa, T. I., & Davel, M. H. (2024). Pre-training a Transformer-Based Generative Model Using a Small Sepedi Dataset. Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science, vol 2326. http://doi.org/https://doi.org/10.1007/978-3-031-78255-8_19
Due to the scarcity of data in low-resourced languages, the development of language models for these languages has been very slow. Currently, pre-trained language models have gained popularity in natural language processing, especially, in developing domain-specific models for low-resourced languages. In this study, we experiment with the impact of using occlusion-based techniques when training a language model for a text generation task. We curate 2 new datasets, the Sepedi monolingual (SepMono) dataset from several South African resources and the Sepedi radio news (SepNews) dataset from the radio news domain. We use the SepMono dataset to pre-train transformer-based models using the occlusion and non-occlusion pre-training techniques and compare performance. The SepNews dataset is specifically used for fine-tuning. Our results show that the non-occlusion models perform better compared to the occlusion-based models when measuring validation loss and perplexity. However, analysis of the generated text using the BLEU score metric, which measures the quality of the generated text, shows a slightly higher BLEU score for the occlusion-based models compared to the non-occlusion models.
@article{517, author = {Simon Ramalepe and Thipe Modipa and Marelie Davel}, title = {Pre-training a Transformer-Based Generative Model Using a Small Sepedi Dataset}, abstract = {Due to the scarcity of data in low-resourced languages, the development of language models for these languages has been very slow. Currently, pre-trained language models have gained popularity in natural language processing, especially, in developing domain-specific models for low-resourced languages. In this study, we experiment with the impact of using occlusion-based techniques when training a language model for a text generation task. We curate 2 new datasets, the Sepedi monolingual (SepMono) dataset from several South African resources and the Sepedi radio news (SepNews) dataset from the radio news domain. We use the SepMono dataset to pre-train transformer-based models using the occlusion and non-occlusion pre-training techniques and compare performance. The SepNews dataset is specifically used for fine-tuning. Our results show that the non-occlusion models perform better compared to the occlusion-based models when measuring validation loss and perplexity. However, analysis of the generated text using the BLEU score metric, which measures the quality of the generated text, shows a slightly higher BLEU score for the occlusion-based models compared to the non-occlusion models.}, year = {2024}, journal = {Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science}, volume = {vol 2326}, pages = {319-333}, month = {12/2024}, publisher = {Springer Nature Switzerland}, address = {Cham}, doi = {https://doi.org/10.1007/978-3-031-78255-8_19}, }
Ngorima, S. A., Helberg, A. S. J., & Davel, M. H. (2024). Neural Network-Based Vehicular Channel Estimation Performance: Effect of Noise in the Training Set. Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science, vol 2326. http://doi.org/https://doi.org/10.1007/978-3-031-78255-8_12
Vehicular communication systems face significant challenges due to high mobility and rapidly changing environments, which affect the channel over which the signals travel. To address these challenges, neural network (NN)-based channel estimation methods have been suggested. These methods are primarily trained on high signal-to-noise ratio (SNR) with the assumption that training a NN in less noisy conditions can result in good generalisation. This study examines the effectiveness of training NN-based channel estimators on mixed SNR datasets compared to training solely on high SNR datasets, as seen in several related works. Estimators evaluated in this work include an architecture that uses convolutional layers and self-attention mechanisms; a method that employs temporal convolutional networks and data pilot-aided estimation; two methods that combine classical methods with multilayer perceptrons; and the current state-of-the-art model that combines Long-Short-Term Memory networks with data pilot-aided and temporal averaging methods as post processing. Our results indicate that using only high SNR data for training is not always optimal, and the SNR range in the training dataset should be treated as a hyperparameter that can be adjusted for better performance. This is illustrated by the better performance of some models in low SNR conditions when trained on the mixed SNR dataset, as opposed to when trained exclusively on high SNR data.
@article{516, author = {Simbarashe Ngorima and Albert Helberg and Marelie Davel}, title = {Neural Network-Based Vehicular Channel Estimation Performance: Effect of Noise in the Training Set}, abstract = {Vehicular communication systems face significant challenges due to high mobility and rapidly changing environments, which affect the channel over which the signals travel. To address these challenges, neural network (NN)-based channel estimation methods have been suggested. These methods are primarily trained on high signal-to-noise ratio (SNR) with the assumption that training a NN in less noisy conditions can result in good generalisation. This study examines the effectiveness of training NN-based channel estimators on mixed SNR datasets compared to training solely on high SNR datasets, as seen in several related works. Estimators evaluated in this work include an architecture that uses convolutional layers and self-attention mechanisms; a method that employs temporal convolutional networks and data pilot-aided estimation; two methods that combine classical methods with multilayer perceptrons; and the current state-of-the-art model that combines Long-Short-Term Memory networks with data pilot-aided and temporal averaging methods as post processing. Our results indicate that using only high SNR data for training is not always optimal, and the SNR range in the training dataset should be treated as a hyperparameter that can be adjusted for better performance. This is illustrated by the better performance of some models in low SNR conditions when trained on the mixed SNR dataset, as opposed to when trained exclusively on high SNR data.}, year = {2024}, journal = {Artificial Intelligence Research. SACAIR 2024. Communications in Computer and Information Science}, volume = {vol 2326}, pages = {192 - 206}, month = {12/2024}, publisher = {Springer Nature Switzerland}, address = {Cham}, isbn = {978-3-031-78255-8}, doi = {https://doi.org/10.1007/978-3-031-78255-8_12}, }
Ngorima, S. A., Helberg, A. S. J., & Davel, M. H. (2024). A Data Pilot-Aided Temporal Convolutional Network for Channel Estimation in IEEE 802.11p Vehicle-to-Vehicle Communications. Southern Africa Telecommunication Networks and Applications Conference (SATNAC).
In modern communication systems, having an accurate channel estimator is crucial. However, when there is mobility, it becomes difficult to estimate the channel and the pilot signals, which are used for channel estimation, become insufficient. In this paper, we introduce the use of Temporal
Convolutional Networks (TCNs) with data pilot-aided (DPA) channel estimation and temporal averaging (TA) to estimate vehicle-to-vehicle same direction with Wall (VTV-SDWW) channels. The TCN-DPA-TA estimator showed an improvement in Bit Error Rate (BER) performance of up to 1 order of magnitude. Furthermore, the BER performance of the TCN-DPA without TA also improved by up to 0.7 magnitude compared to the best classical estimator.
@article{515, author = {Simbarashe Ngorima and Albert Helberg and Marelie Davel}, title = {A Data Pilot-Aided Temporal Convolutional Network for Channel Estimation in IEEE 802.11p Vehicle-to-Vehicle Communications}, abstract = {In modern communication systems, having an accurate channel estimator is crucial. However, when there is mobility, it becomes difficult to estimate the channel and the pilot signals, which are used for channel estimation, become insufficient. In this paper, we introduce the use of TemporalConvolutional Networks (TCNs) with data pilot-aided (DPA) channel estimation and temporal averaging (TA) to estimate vehicle-to-vehicle same direction with Wall (VTV-SDWW) channels. The TCN-DPA-TA estimator showed an improvement in Bit Error Rate (BER) performance of up to 1 order of magnitude. Furthermore, the BER performance of the TCN-DPA without TA also improved by up to 0.7 magnitude compared to the best classical estimator.}, year = {2024}, journal = {Southern Africa Telecommunication Networks and Applications Conference (SATNAC)}, pages = {356–361}, }
Mouton, C. ., Rabe, R. ., Haasbroek, D. G., Theunissen, M. W., Potgieter, H. L., & Davel, M. H. (2024). Is network fragmentation a useful complexity measure?. NeurIPS 2024 Workshop SciForDL.
It has been observed that the input space of deep neural network classifiers can exhibit ‘fragmentation’, where the model function rapidly changes class as the input space is traversed. The severity of this fragmentation tends to follow the double descent curve, achieving a maximum at the interpolation regime. We study this phenomenon in the context of image classification and ask whether fragmentation could be predictive of generalization performance. Using a fragmentation-based complexity measure, we show this to be possible by achieving good performance on the PGDL (Predicting Generalization in Deep Learning) benchmark. In addition, we report on new observations related to fragmentation, namely (i) fragmentation is not limited to the input space but occurs in the hidden representations as well, (ii) fragmentation follows the trends in the validation error throughout training, and (iii) fragmentation is not a direct result of increased weight norms. Together, this indicates that fragmentation is a phenomenon worth investigating further when studying the generalization ability of deep neural networks.
@misc{514, author = {Coenraad Mouton and Randle Rabe and Daniël Haasbroek and Marthinus Theunissen and Hermanus Potgieter and Marelie Davel}, title = {Is network fragmentation a useful complexity measure?}, abstract = {It has been observed that the input space of deep neural network classifiers can exhibit ‘fragmentation’, where the model function rapidly changes class as the input space is traversed. The severity of this fragmentation tends to follow the double descent curve, achieving a maximum at the interpolation regime. We study this phenomenon in the context of image classification and ask whether fragmentation could be predictive of generalization performance. Using a fragmentation-based complexity measure, we show this to be possible by achieving good performance on the PGDL (Predicting Generalization in Deep Learning) benchmark. In addition, we report on new observations related to fragmentation, namely (i) fragmentation is not limited to the input space but occurs in the hidden representations as well, (ii) fragmentation follows the trends in the validation error throughout training, and (iii) fragmentation is not a direct result of increased weight norms. Together, this indicates that fragmentation is a phenomenon worth investigating further when studying the generalization ability of deep neural networks.}, year = {2024}, journal = {NeurIPS 2024 Workshop SciForDL}, month = {12/2024}, }
le Roux, V. ., Davel, M. H., & Bosman, J. . (2024). Parsimonious airfoil Parameterisation: A deep learning framework with Bidirectional LSTM and Gaussian Mixture models. Expert Systems With Applications, 255. http://doi.org/https://doi.org/10.1016/j.eswa.2024.124726
The choice of airfoil parameterisation method significantly influences the overall wing optimisation performance by affecting the flexibility and computational efficiency of the process. Ideally, one should be able to intuitively constrain airfoil shape and structural characteristics as input to the optimisation process. Current parameterisation techniques lack the flexibility to generate airfoils efficiently by specifying parsimonious shape and structural features. To address this limitation, a deep learning framework is proposed, enabling conditional airfoil generation from an airfoil’s shape and structural feature definition. Specifically, we demonstrate the application of Bidirectional Long Short Term Memory models and Bayesian Gaussian Mixture models to derive airfoil coordinates from a compact set of shape and structural characteristics that we define. The proposed framework is shown to achieve favorable airfoil performance optimisation due to improved exploration and exploitation of the design space, compared to traditional approaches. Overall, the proposed optimisation framework is able to realise a 9.04% performance improvement over an airfoil design optimised with traditional parameterisation techniques.
@article{513, author = {Vincent le Roux and Marelie Davel and Johan Bosman}, title = {Parsimonious airfoil Parameterisation: A deep learning framework with Bidirectional LSTM and Gaussian Mixture models}, abstract = {The choice of airfoil parameterisation method significantly influences the overall wing optimisation performance by affecting the flexibility and computational efficiency of the process. Ideally, one should be able to intuitively constrain airfoil shape and structural characteristics as input to the optimisation process. Current parameterisation techniques lack the flexibility to generate airfoils efficiently by specifying parsimonious shape and structural features. To address this limitation, a deep learning framework is proposed, enabling conditional airfoil generation from an airfoil’s shape and structural feature definition. Specifically, we demonstrate the application of Bidirectional Long Short Term Memory models and Bayesian Gaussian Mixture models to derive airfoil coordinates from a compact set of shape and structural characteristics that we define. The proposed framework is shown to achieve favorable airfoil performance optimisation due to improved exploration and exploitation of the design space, compared to traditional approaches. Overall, the proposed optimisation framework is able to realise a 9.04% performance improvement over an airfoil design optimised with traditional parameterisation techniques.}, year = {2024}, journal = {Expert Systems With Applications}, volume = {255}, month = {10 July 2024}, doi = {https://doi.org/10.1016/j.eswa.2024.124726}, }
Mouton, C. ., Theunissen, M. W., & Davel, M. H. (2024). Input margins can predict generalization too. In In Proceedings of the Thirty-Eighth AAAI Conference on Artificial Intelligence and Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence and Fourteenth Symposium on Educational Advances in Artificial Intelligence (AAAI’24/IAAI’24/E. AAAI Conference on Artificial Intelligence (AAAI).
Understanding generalization in deep neural networks is an active area of research. A promising avenue of exploration has been that of margin measurements: the shortest distance to the decision boundary for a given sample or its representation internal to the network. While margins have been shown to be correlated with the generalization ability of a model when measured at its hidden representations (hidden margins), no such link between large margins and generalization has been established for input margins. We show that while input margins are not generally predictive of generalization, they can be if the search space is appropriately constrained. We develop such a measure based on input margins, which we refer to as ‘constrained margins’. The predictive power of this new measure is demonstrated on the ‘Predicting Generalization in Deep Learning’ (PGDL) dataset and contrasted with hidden representation margins. We find that constrained margins achieve highly competitive scores and outperform other margin measurements in general. This provides a novel insight on the relationship between generalization and classification margins, and highlights the importance of considering the data manifold for investigations of generalization in DNNs
@inbook{512, author = {Coenraad Mouton and Marthinus Theunissen and Marelie Davel}, title = {Input margins can predict generalization too}, abstract = {Understanding generalization in deep neural networks is an active area of research. A promising avenue of exploration has been that of margin measurements: the shortest distance to the decision boundary for a given sample or its representation internal to the network. While margins have been shown to be correlated with the generalization ability of a model when measured at its hidden representations (hidden margins), no such link between large margins and generalization has been established for input margins. We show that while input margins are not generally predictive of generalization, they can be if the search space is appropriately constrained. We develop such a measure based on input margins, which we refer to as ‘constrained margins’. The predictive power of this new measure is demonstrated on the ‘Predicting Generalization in Deep Learning’ (PGDL) dataset and contrasted with hidden representation margins. We find that constrained margins achieve highly competitive scores and outperform other margin measurements in general. This provides a novel insight on the relationship between generalization and classification margins, and highlights the importance of considering the data manifold for investigations of generalization in DNNs}, year = {2024}, journal = {In Proceedings of the Thirty-Eighth AAAI Conference on Artificial Intelligence and Thirty-Sixth Conference on Innovative Applications of Artificial Intelligence and Fourteenth Symposium on Educational Advances in Artificial Intelligence (AAAI'24/IAAI'24/E}, pages = {14379 - 14387}, month = {20 February 2024}, publisher = {AAAI Conference on Artificial Intelligence (AAAI)}, }
2023
Davel, M. H., Lotz, S. ., Theunissen, M. W., de Villiers, A. ., Grant, C. ., Rabe, R. ., Schoombie, S. ., & Conacher, C. . (2023). Knowledge Discovery in Time Series Data. In Deep Learning Indaba 2023.
• Complex time series data often encountered in scientific and engineering domains. • Deep learning (DL) is particularly successful here: – large data sets, multivariate input and/or ouput, – highly complex sequences of interactions. • Model interpretability: – Ability to understand a model’s decisions in a given context [1]. – Techniques typically not originally developed for time series data. – Time series interpretations themselves become uninterpretable. • Knowledge Discovery: – DL has potential to reveal interesting patterns in large data sets. – Potential to produce novel insights about the task itself [2, 3]. • ‘know-it’: Collaborative project that studies knowledge discovery in time series data.
@{507, author = {Marelie Davel and Stefan Lotz and Marthinus Theunissen and Almaro de Villiers and Chara Grant and Randle Rabe and Stefan Schoombie and Cleo Conacher}, title = {Knowledge Discovery in Time Series Data}, abstract = {• Complex time series data often encountered in scientific and engineering domains. • Deep learning (DL) is particularly successful here: – large data sets, multivariate input and/or ouput, – highly complex sequences of interactions. • Model interpretability: – Ability to understand a model’s decisions in a given context [1]. – Techniques typically not originally developed for time series data. – Time series interpretations themselves become uninterpretable. • Knowledge Discovery: – DL has potential to reveal interesting patterns in large data sets. – Potential to produce novel insights about the task itself [2, 3]. • ‘know-it’: Collaborative project that studies knowledge discovery in time series data.}, year = {2023}, journal = {Deep Learning Indaba 2023}, month = {September 2023}, }
Olivier, J. C., & Barnard, E. . (2023). Minimum phase finite impulse response filter design. The Institute of Engineering and Technology, 17. http://doi.org/ https://doi.org/10.1049/sil2.12166
The design of minimum phase finite impulse response (FIR) filters is considered. The study demonstrates that the residual errors achieved by current state-of-the-art design methods are nowhere near the smallest error possible on a finite resolution digital computer. This is shown to be due to conceptual errors in the literature pertaining to what constitutes a factorable linear phase filter. This study shows that factorisation is possible with a zero residual error (in the absence of machine finite resolution error) if the linear operator or matrix representing the linear phase filter is positive definite. Methodology is proposed able to design a minimum phase filter that is optimal—in the sense that the residual error is limited only by the finite precision of the digital computer, with no systematic error. The study presents practical application of the proposed methodology by designing two minimum phase Chebyshev FIR filters. Results are compared to state-of-the-art methods from the literature, and it is shown that the proposed methodology is able to reduce currently achievable residual errors by several orders of magnitude.
@article{506, author = {Jan Olivier and Etienne Barnard}, title = {Minimum phase finite impulse response filter design}, abstract = {The design of minimum phase finite impulse response (FIR) filters is considered. The study demonstrates that the residual errors achieved by current state-of-the-art design methods are nowhere near the smallest error possible on a finite resolution digital computer. This is shown to be due to conceptual errors in the literature pertaining to what constitutes a factorable linear phase filter. This study shows that factorisation is possible with a zero residual error (in the absence of machine finite resolution error) if the linear operator or matrix representing the linear phase filter is positive definite. Methodology is proposed able to design a minimum phase filter that is optimal—in the sense that the residual error is limited only by the finite precision of the digital computer, with no systematic error. The study presents practical application of the proposed methodology by designing two minimum phase Chebyshev FIR filters. Results are compared to state-of-the-art methods from the literature, and it is shown that the proposed methodology is able to reduce currently achievable residual errors by several orders of magnitude.}, year = {2023}, journal = {The Institute of Engineering and Technology}, volume = {17}, edition = {7}, month = {July 2023}, doi = {https://doi.org/10.1049/sil2.12166}, }
Ngorima, S. A., Helberg, A. S. J., & Davel, M. H. (2023). Sequence Based Deep Neural Networks for Channel Estimation in Vehicular Communication Systems. In Artificial Intelligence Research. SACAIR 2023. Communications in Computer and Information Science (Vol. 1976). Springer, Cham. http://doi.org/https://doi.org/10.1007/978-3-031-49002-6_12
Channel estimation is a critical component of vehicular communications systems, especially in high-mobility scenarios. The IEEE 802.11p standard uses preamble-based channel estimation, which is not sufficient in these situations. Recent work has proposed using deep neural networks for channel estimation in IEEE 802.11p. While these methods improved on earlier baselines they still can perform poorly, especially in very high mobility scenarios. This study proposes a novel approach that uses two independent LSTM cells in parallel and averages their outputs to update cell states. The proposed approach improves normalised mean square error, surpassing existing deep learning approaches in very high mobility scenarios.
@inbook{504, author = {Simbarashe Ngorima and Albert Helberg and Marelie Davel}, title = {Sequence Based Deep Neural Networks for Channel Estimation in Vehicular Communication Systems}, abstract = {Channel estimation is a critical component of vehicular communications systems, especially in high-mobility scenarios. The IEEE 802.11p standard uses preamble-based channel estimation, which is not sufficient in these situations. Recent work has proposed using deep neural networks for channel estimation in IEEE 802.11p. While these methods improved on earlier baselines they still can perform poorly, especially in very high mobility scenarios. This study proposes a novel approach that uses two independent LSTM cells in parallel and averages their outputs to update cell states. The proposed approach improves normalised mean square error, surpassing existing deep learning approaches in very high mobility scenarios.}, year = {2023}, journal = {Artificial Intelligence Research. SACAIR 2023. Communications in Computer and Information Science}, volume = {1976}, pages = {176 - 186}, month = {29 November 2023}, publisher = {Springer, Cham}, isbn = {978-3-031-49001-9}, doi = {https://doi.org/10.1007/978-3-031-49002-6_12}, }
Lotz, S. ., Nel, A. ., Wicks, R. ., Roberts, O. ., Engelbrecht, N. ., Strauss, R. ., Botha, G. ., Kontar, E. ., Pitňa, A. ., & Bale, S. . (2023). The Radial Variation of the Solar Wind Turbulence Spectra near the Kinetic Break Scale from Parker Solar Probe Measurements. In The Astrophysical Journal (2nd ed., Vol. 942). The American Astronomical Society. http://doi.org/10.3847/1538-4357/aca903
In this study we examine the radial dependence of the inertial and dissipation range indices, as well as the spectral break separating the inertial and dissipation range in power density spectra of interplanetary magnetic field fluctuations using Parker Solar Probe data from the fifth solar encounter between ∼0.1 and ∼0.7 au. The derived break wavenumber compares reasonably well with previous estimates at larger radial distances and is consistent with gyro-resonant damping of Alfvénic fluctuations by thermal protons. We find that the inertial scale power-law index varies between approximately −1.65 and −1.45. This is consistent with either the Kolmogorov (−5/3) or Iroshnikov–Kraichnan (−3/2) values, and has a very weak radial dependence with a possible hint that the spectrum becomes steeper closer to the Sun. The dissipation range power-law index, however, has a clear dependence on radial distance (and turbulence age), decreasing from −3 near 0.7 au (4 days) to −4 [±0.3] at 0.1 au (0.75 days) closer to the Sun.
@inbook{503, author = {Stefan Lotz and Amore Nel and Robert Wicks and Owen Roberts and Nicholas Engelbrecht and Roelf Strauss and Gert Botha and Eduard Kontar and Alexander Pitňa and Stuart Bale}, title = {The Radial Variation of the Solar Wind Turbulence Spectra near the Kinetic Break Scale from Parker Solar Probe Measurements}, abstract = {In this study we examine the radial dependence of the inertial and dissipation range indices, as well as the spectral break separating the inertial and dissipation range in power density spectra of interplanetary magnetic field fluctuations using Parker Solar Probe data from the fifth solar encounter between ∼0.1 and ∼0.7 au. The derived break wavenumber compares reasonably well with previous estimates at larger radial distances and is consistent with gyro-resonant damping of Alfvénic fluctuations by thermal protons. We find that the inertial scale power-law index varies between approximately −1.65 and −1.45. This is consistent with either the Kolmogorov (−5/3) or Iroshnikov–Kraichnan (−3/2) values, and has a very weak radial dependence with a possible hint that the spectrum becomes steeper closer to the Sun. The dissipation range power-law index, however, has a clear dependence on radial distance (and turbulence age), decreasing from −3 near 0.7 au (4 days) to −4 [±0.3] at 0.1 au (0.75 days) closer to the Sun.}, year = {2023}, journal = {The Astrophysical Journal}, volume = {942}, edition = {2}, month = {01/2023}, publisher = {The American Astronomical Society}, doi = {10.3847/1538-4357/aca903}, }
Ramalepe, S. ., Modipa, T. I., & Davel, M. H. (2023). The Analysis of the Sepedi-English Code-switched Radio News Corpus. Journal of the Digital Humanities Association of Southern Africa, 4(Vol. 4 No. 01 (2022): Proceedings of the 3rd workshop on Resources for African Indigenous Languages (RAIL). http://doi.org/https://doi.org/10.55492/dhasa.v4i01.4444
Code-switching is a phenomenon that occurs mostly in multilingual countries where multilingual speakers often switch between languages in their conversations. The unavailability of large scale code-switched corpora hampers the development and training of language models for the generation of code-switched text. In this study, we explore the initial phase of collecting and creating Sepedi-English code-switched corpus for generating synthetic news. Radio news and the frequency of code-switching on read news were considered and analysed. We developed and trained a Transformer-based language model using the collected code-switched dataset. We observed that the frequency of code-switched data in the dataset was very low at 1.1%. We complemented our dataset with the news headlines dataset to create a new dataset. Although the frequency was still low, the model obtained the optimal loss rate of 2,361 with an accuracy of 66%.
@article{502, author = {Simon Ramalepe and Thipe Modipa and Marelie Davel}, title = {The Analysis of the Sepedi-English Code-switched Radio News Corpus}, abstract = {Code-switching is a phenomenon that occurs mostly in multilingual countries where multilingual speakers often switch between languages in their conversations. The unavailability of large scale code-switched corpora hampers the development and training of language models for the generation of code-switched text. In this study, we explore the initial phase of collecting and creating Sepedi-English code-switched corpus for generating synthetic news. Radio news and the frequency of code-switching on read news were considered and analysed. We developed and trained a Transformer-based language model using the collected code-switched dataset. We observed that the frequency of code-switched data in the dataset was very low at 1.1%. We complemented our dataset with the news headlines dataset to create a new dataset. Although the frequency was still low, the model obtained the optimal loss rate of 2,361 with an accuracy of 66%.}, year = {2023}, journal = {Journal of the Digital Humanities Association of Southern Africa}, volume = {4}, edition = {1}, month = {2023-01-25}, issue = {Vol. 4 No. 01 (2022): Proceedings of the 3rd workshop on Resources for African Indigenous Languages (RAIL)}, doi = {https://doi.org/10.55492/dhasa.v4i01.4444}, }
Ramalepe, S. ., Modipa, T. I., & Davel, M. H. (2023). Transformer-based text generation for code-switched Sepedi-English news. In Southern African Conference for Artificial Intelligence Research (SACAIR).
Code-switched data is rarely available in written form and this makes the development of large datasets required to train codeswitched language models difficult. Currently, available Sepedi-English code-switched corpora are not large enough to train a Transformer-based model for this language pair. In prior work, larger synthetic datasets have been constructed using a combination of a monolingual and a parallel corpus to approximate authentic code-switched text. In this study, we develop and analyse a new Sepedi-English news dataset (SepEnews). We collect and curate data from local radio news bulletins and use this to augment two existing sources collected from Sepedi newspapers and news headlines, respectively. We then develop and train a Transformer-based model for generating historic code-switched news, and demonstrate and analyse the system’s performance.
@{501, author = {Simon Ramalepe and Thipe Modipa and Marelie Davel}, title = {Transformer-based text generation for code-switched Sepedi-English news}, abstract = {Code-switched data is rarely available in written form and this makes the development of large datasets required to train codeswitched language models difficult. Currently, available Sepedi-English code-switched corpora are not large enough to train a Transformer-based model for this language pair. In prior work, larger synthetic datasets have been constructed using a combination of a monolingual and a parallel corpus to approximate authentic code-switched text. In this study, we develop and analyse a new Sepedi-English news dataset (SepEnews). We collect and curate data from local radio news bulletins and use this to augment two existing sources collected from Sepedi newspapers and news headlines, respectively. We then develop and train a Transformer-based model for generating historic code-switched news, and demonstrate and analyse the system’s performance.}, year = {2023}, journal = {Southern African Conference for Artificial Intelligence Research (SACAIR)}, pages = {84 - 97}, month = {December 2023}, }
Middel, C. ., & Davel, M. H. (2023). Comparing Transformer-based and GBDT models on tabular data: A Rossmann Store Sales case study. In Southern African Conference for Artificial Intelligence Research (SACAIR).
Heterogeneous tabular data is a common and important data format. This empirical study investigates how the performance of deep transformer models compares against benchmark gradient boosting decision tree (GBDT) methods, the more typical modelling approach. All models are optimised using a Bayesian hyperparameter optimisation protocol, which provides a stronger comparison than the random grid search hyperparameter optimisation utilized in earlier work. Since feature skewness is typically handled differently for GBDT and transformer-based models, we investigate the effect of a pre-processing step that normalises feature distribution on the model comparison process. Our analysis is based on the Rossmann Store Sales dataset, a widely recognized benchmark for regression tasks.
@{500, author = {Coenraad Middel and Marelie Davel}, title = {Comparing Transformer-based and GBDT models on tabular data: A Rossmann Store Sales case study}, abstract = {Heterogeneous tabular data is a common and important data format. This empirical study investigates how the performance of deep transformer models compares against benchmark gradient boosting decision tree (GBDT) methods, the more typical modelling approach. All models are optimised using a Bayesian hyperparameter optimisation protocol, which provides a stronger comparison than the random grid search hyperparameter optimisation utilized in earlier work. Since feature skewness is typically handled differently for GBDT and transformer-based models, we investigate the effect of a pre-processing step that normalises feature distribution on the model comparison process. Our analysis is based on the Rossmann Store Sales dataset, a widely recognized benchmark for regression tasks.}, year = {2023}, journal = {Southern African Conference for Artificial Intelligence Research (SACAIR)}, pages = {115 - 129}, month = {December 2023}, }