the bee says you are to apply sound changes. submissions may be written in any language.
a sound change is a change in the pronunciation of a language over time. sometimes, linguistics people get bored, make languages, and simulate their evolution. it's like playing with toys for them, except instead of toys they are languages and the way they are playing is by evolving them. when they are slightly less bored, they get a program to do this for them. today you will make that program.
let me show you with somenotation1. the easiest sound changes to apply just shift a sound to another:
f > h
but sound changes can also be conditioned by an environment (the sounds around it), so
ə > ∅ / __#
could mean the elision of word-final ə (# is a common symbol for a word boundary), or even
{p,t,k} > {b,d,g} / V__V
which seems to turn unvoiced plosives into their voiced counterparts when they are between vowels.
there is no fixed syntax for the sound changes; you may parse rules out of an input file, or even hardcode some pleasing ones into your program.
your challenge, given a lexicon, is to evolve the words in it according to a list of sound changes. as every language is allowed, there is no fixed API.
haha I did the bad for accessibility thing you're supposed to not do that they like on TVTropes where you make each word be a different link↩
using System.Diagnostics.CodeAnalysis;
using ReactiveUI;
namespace Baum.AvaloniaApp.Models;
public class LanguageModel : ReactiveObject
{
public LanguageModel(string? name, int? parentId = null, string soundChange = "")
=> (_name, _parentId, _soundChange) = (name, parentId, soundChange);
public int Id { get; set; }
string? _name;
public string? Name { get => _name; set => this.RaiseAndSetIfChanged(ref _name, value); }
int? _parentId;
public int? ParentId { get => _parentId; set => this.RaiseAndSetIfChanged(ref _parentId, value); }
string _soundChange;
public string SoundChange { get => _soundChange; set => this.RaiseAndSetIfChanged(ref _soundChange, value); }
}
WordModel.csASCII text, with CRLF line terminators
1 2 3 4 5 6 7 8 91011121314151617181920
using ReactiveUI;
namespace Baum.AvaloniaApp.Models;
public class WordModel : ReactiveObject
{
public WordModel(string name, string ipa)
=> (_name, _ipa) = (name, ipa);
public required bool Transient { get; set; }
public int Id { get; set; }
public int? AncestorId { get; set; }
public required int LanguageId { get; set; }
string _name;
public string Name { get => _name; set => this.RaiseAndSetIfChanged(ref _name, value); }
string _ipa;
public string IPA { get => _ipa; set => this.RaiseAndSetIfChanged(ref _ipa, value); }
}
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Threading.Tasks;
using Microsoft.EntityFrameworkCore;
using Baum.Phonology;
using Baum.Data;
using Baum.AvaloniaApp.Models;
namespace Baum.AvaloniaApp.Services;
class ProjectDatabase : IProjectDatabase
{
FileInfo File { get; }
public ProjectDatabase(FileInfo fileInfo) => File = fileInfo;
public async Task AddAsync(LanguageModel languageModel)
{
using var context = new ProjectContext(File);
await context.Languages.AddAsync(new Language
{
Name = languageModel.Name,
ParentId = languageModel.ParentId,
SoundChange = languageModel.SoundChange,
});
await context.SaveChangesAsync();
}
public async Task UpdateAsync(LanguageModel languageModel)
{
using var context = new ProjectContext(File);
var language = await context.Languages.FindAsync(languageModel.Id);
if (language == null) throw new InvalidOperationException("No language found in database");
language.Name = languageModel.Name;
language.SoundChange = languageModel.SoundChange;
await context.SaveChangesAsync();
}
public async Task<IEnumerable<LanguageModel>> GetChildrenAsync(int languageId)
{
using var context = new ProjectContext(File);
var entity = await context.Languages.FindAsync(languageId);
if (entity == null) throw new InvalidOperationException();
return await (
from language in context.Languages
where language.ParentId == languageId
select new LanguageModel(
language.Name,
language.ParentId,
language.SoundChange) { Id = language.Id }).ToArrayAsync();
}
public async Task<IEnumerable<LanguageModel>> GetLanguagesAsync()
{
using var context = new ProjectContext(File);
return await context.Languages
.Select(l => new LanguageModel(l.Name,l.ParentId,l.SoundChange) { Id = l.Id })
.ToArrayAsync();
}
async Task<WordModel> GetWordAsync(int wordId)
{
using var context = new ProjectContext(File);
var word = await context.Words.FindAsync(wordId);
if (word == null) throw new InvalidOperationException("Word doesn't exist");
return new WordModel(word.Name, word.IPA)
{
Transient = false,
Id = word.Id,
AncestorId = word.AncestorId,
LanguageId = word.LanguageId,
};
}
public async Task<IEnumerable<WordModel>> GetWordsAsync(int languageId, PhonologyData data)
{
using var context = new ProjectContext(File);
var language = await context.Languages.FindAsync(languageId);
if (language == null) throw new InvalidOperationException("No language found in database");
List<WordModel> words = new();
await foreach (var word in context.Entry(language).Collection(l => l.Words).Query().AsAsyncEnumerable())
{
words.Add(new WordModel(word.Name, word.IPA)
{
Transient = false,
Id = word.Id,
AncestorId = word.AncestorId,
LanguageId = word.LanguageId,
});
}
if (language.ParentId != null)
{
var parentWords = await GetWordsAsync((int)language.ParentId, data);
foreach (var parentWord in parentWords)
{
SoundChange.TryApply(parentWord.IPA, language.SoundChange, data, out var IPA);
words.Add(new WordModel(parentWord.Name, IPA)
{
Transient = true,
LanguageId = languageId,
AncestorId = parentWord.Transient ? parentWord.AncestorId : parentWord.Id
});
}
}
return words;
}
public async Task<IEnumerable<WordModel>> GetAncestryAsync(WordModel word, PhonologyData data)
{
using var context = new ProjectContext(File);
if (word.AncestorId == null)
return Enumerable.Empty<WordModel>();
var ancester = await context.Words.FindAsync(word.AncestorId);
if (ancester == null)
throw new InvalidOperationException("Ancestor does not exist");
var wordLanguage = await context.Languages.FindAsync(word.LanguageId);
if (wordLanguage == null)
throw new InvalidOperationException("Language does not exist");
await context.Entry(wordLanguage)
.Reference(l => l.Parent)
.LoadAsync();
List<Language> languageChain = new() { };
while (wordLanguage.Id != ancester.LanguageId)
{
if (!string.IsNullOrEmpty(wordLanguage.SoundChange))
languageChain.Add(wordLanguage);
await context.Entry(wordLanguage)
.Reference(l => l.Parent)
.LoadAsync();
wordLanguage = wordLanguage.Parent ?? throw new InvalidOperationException("Ancestor is not an ancestor");
}
List<WordModel> wordChain = new();
wordChain.Add(new(ancester.Name, ancester.IPA)
{
Transient = false,
AncestorId = ancester.AncestorId,
LanguageId = ancester.LanguageId,
});
foreach (Language intermediate in Enumerable.Reverse(languageChain))
{
var last = wordChain.Last();
SoundChange.TryApply(last.IPA, intermediate.SoundChange, data, out var next);
// TODO? Possibly do some error handling or notification here instead of just skipping
if (last.IPA == next) continue;
wordChain.Add(new WordModel(last.Name, next)
{
Transient = true,
AncestorId = ancester.Id,
LanguageId = intermediate.Id
});
}
return wordChain;
}
public async Task<WordModel> AddAsync(WordModel word)
{
using var context = new ProjectContext(File);
var language = await context.Languages.FindAsync(word.LanguageId);
if (language == null) throw new InvalidOperationException("Language doesn't exist");
var entry = await context.Words.AddAsync(new Word
{
Language = language,
Name = word.Name,
IPA = word.IPA,
AncestorId = word.AncestorId,
});
await context.SaveChangesAsync();
word.Id = entry.Entity.Id;
word.Transient = false;
return word;
}
public async Task UpdateAsync(WordModel wordModel)
{
using var context = new ProjectContext(File);
var word = await context.Words.FindAsync(wordModel.Id);
if (word == null) throw new InvalidOperationException("Word doesn't exist");
word.Name = wordModel.Name;
word.IPA = wordModel.IPA;
word.AncestorId = wordModel.AncestorId;
await context.SaveChangesAsync();
}
public bool HasMigrations()
{
using var context = new ProjectContext(File);
return context.Database.GetMigrations().Any();
}
public async Task MigrateAsync()
{
using var context = new ProjectContext(File);
await context.Database.MigrateAsync();
}
public void SaveToFile(FileInfo fileInfo)
{
if (fileInfo != File)
{
File.CopyTo(fileInfo.FullName, true); // TODO: Prompt user to confirm overwrite
}
}
}
using System.IO;
namespace Baum.AvaloniaApp.Services;
class ProjectDatabaseFactory : IProjectDatabaseFactory
{
public IProjectDatabase Create(FileInfo fileInfo) => new ProjectDatabase(fileInfo);
}
using Avalonia.ReactiveUI;
using ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class LanguageForestView : ReactiveUserControl<LanguageForestViewModel>
{
public LanguageForestView()
{
InitializeComponent();
this.WhenActivated(async d => await ViewModel!.LoadAsync());
}
}
using Avalonia.ReactiveUI;
using ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class LanguageTreeView : ReactiveUserControl<LanguageTreeViewModel>
{
public LanguageTreeView()
{
InitializeComponent();
this.WhenActivated(async d => await ViewModel!.LoadAsync());
}
}
using Avalonia.ReactiveUI;
using ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class LanguageView : ReactiveUserControl<LanguageViewModel>
{
public LanguageView()
{
InitializeComponent();
this.WhenActivated(async d => await ViewModel!.LoadAsync());
}
}
using Avalonia.Controls;
namespace Baum.AvaloniaApp.Views;
public partial class WordEntryView : UserControl
{
public WordEntryView()
{
InitializeComponent();
}
}
using Avalonia.Controls;
namespace Baum.AvaloniaApp.Views;
public partial class WordView : UserControl
{
public WordView()
{
InitializeComponent();
}
}
using Avalonia.ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class HomeView : ReactiveUserControl<HomeViewModel>
{
public HomeView()
{
InitializeComponent();
}
}
using System;
using Avalonia;
using Avalonia.Controls;
using Avalonia.Markup.Xaml;
using Avalonia.ReactiveUI;
using ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class MigrationConfirmationWindow : ReactiveWindow<MigrationConfirmationWindowViewModel>
{
public MigrationConfirmationWindow()
{
InitializeComponent();
this.WhenActivated(d => {
d(ViewModel!.ConfirmCommand.Subscribe(b => Close(b)));
d(ViewModel.RejectCommand.Subscribe(b => Close(b)));
});
}
}
using Avalonia.ReactiveUI;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp.Views;
public partial class ProjectView : ReactiveUserControl<ProjectViewModel>
{
public ProjectView()
{
InitializeComponent();
}
}
using Avalonia;
using Avalonia.Controls.ApplicationLifetimes;
using Avalonia.Markup.Xaml;
using Baum.AvaloniaApp.Services;
using Baum.AvaloniaApp.ViewModels;
using Baum.AvaloniaApp.Views;
namespace Baum.AvaloniaApp;
public partial class App : Application
{
public override void Initialize()
{
AvaloniaXamlLoader.Load(this);
}
public override void OnFrameworkInitializationCompleted()
{
if (ApplicationLifetime is IClassicDesktopStyleApplicationLifetime desktop)
{
// Dependency stuff
ProjectDatabaseFactory databaseFactory = new();
desktop.MainWindow = new MainWindow
{
DataContext = new MainWindowViewModel(databaseFactory),
};
}
base.OnFrameworkInitializationCompleted();
}
}
Program.csUnicode text, UTF-8 (with BOM) text, with CRLF line terminators
1 2 3 4 5 6 7 8 910111213141516171819202122
using Avalonia;
using Avalonia.ReactiveUI;
using System;
namespace Baum.AvaloniaApp;
class Program
{
// Initialization code. Don't use any Avalonia, third-party APIs or any
// SynchronizationContext-reliant code before AppMain is called: things aren't initialized
// yet and stuff might break.
[STAThread]
public static void Main(string[] args) => BuildAvaloniaApp()
.StartWithClassicDesktopLifetime(args);
// Avalonia configuration, don't remove; also used by visual designer.
public static AppBuilder BuildAvaloniaApp()
=> AppBuilder.Configure<App>()
.UsePlatformDetect()
.LogToTrace()
.UseReactiveUI();
}
using System;
using Avalonia.Controls;
using Avalonia.Controls.Templates;
using Baum.AvaloniaApp.ViewModels;
namespace Baum.AvaloniaApp;
public class ViewLocator : IDataTemplate
{
public IControl Build(object data)
{
var name = data.GetType().FullName!.Replace("ViewModel", "View");
var type = Type.GetType(name);
if (type != null)
{
return (Control)Activator.CreateInstance(type)!;
}
return new TextBlock { Text = "Not Found: " + name };
}
public bool Match(object data)
{
return data is ViewModelBase;
}
}
app.manifestUnicode text, UTF-8 (with BOM) text, with CRLF line terminators
1 2 3 4 5 6 7 8 9101112131415161718
<?xml version="1.0" encoding="utf-8"?>
<assembly manifestVersion="1.0" xmlns="urn:schemas-microsoft-com:asm.v1">
<!-- This manifest is used on Windows only.
Don't remove it as it might cause problems with window transparency and embeded controls.
For more details visit https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests -->
<assemblyIdentity version="1.0.0.0" name="AvaloniaTest.Desktop"/>
<compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
<application>
<!-- A list of the Windows versions that this application has been tested on
and is designed to work with. Uncomment the appropriate elements
and Windows will automatically select the most compatible environment. -->
<!-- Windows 10 -->
<supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}" />
</application>
</compatibility>
</assembly>
using System.Collections.Generic;
using System.ComponentModel.DataAnnotations.Schema;
namespace Baum.Data;
public class Language
{
public int Id { get; set; }
public string? Name { get; set; }
public List<Word> Words { get; set; } = new();
public string SoundChange { get; set; } = "";
public int? ParentId { get; set; }
public Language? Parent { get; set; }
[InverseProperty(nameof(Parent))]
public List<Language> Children { get; set; } = new();
}
using System.IO;
using Microsoft.EntityFrameworkCore;
using Microsoft.EntityFrameworkCore.Design;
using Microsoft.Data.Sqlite;
namespace Baum.Data;
public class ProjectContext : DbContext
{
string ConnectionString { get; init; }
public DbSet<Language> Languages { get; set; } = default!;
public DbSet<Word> Words { get; set; } = default!;
public ProjectContext(string connectionString)
{
ConnectionString = connectionString;
}
public ProjectContext(FileInfo fileInfo)
{
ConnectionString = new SqliteConnectionStringBuilder
{
DataSource = fileInfo.FullName,
Mode = SqliteOpenMode.ReadWriteCreate,
}.ConnectionString;
}
protected override void OnConfiguring(DbContextOptionsBuilder options)
=> options.UseSqlite(ConnectionString);
}
class ProjectContextFactory : IDesignTimeDbContextFactory<ProjectContext>
{
public ProjectContext CreateDbContext(string[] opts)
{
return new ProjectContext("dummy");
}
}
Word.csUnicode text, UTF-8 (with BOM) text, with CRLF line terminators
1 2 3 4 5 6 7 8 91011121314151617181920
using System.ComponentModel.DataAnnotations.Schema;
namespace Baum.Data;
public class Word
{
public int Id { get; set; }
public int? AncestorId { get; set; }
[ForeignKey(nameof(AncestorId))]
public Word? Ancestor { get; set; }
public required string Name { get; set; }
public required string IPA { get; set; }
public int LanguageId { get; set; }
public required Language Language { get; set; }
}
dir Baum.Phonology
dir Notation
MatchNode.csASCII text, with CRLF line terminators
namespace Baum.Phonology.Notation;
public abstract record MatchNode
{
public abstract T Accept<T>(IMatchNodeVisitor<T> visitor);
}
public record FeatureSetMatchNode(IReadOnlySet<Feature> Included, IReadOnlySet<Feature> Excluded) : MatchNode
{
public override T Accept<T>(IMatchNodeVisitor<T> visitor) => visitor.Visit(this);
}
public record SoundMatchNode(IReadOnlySet<Feature> Features) : MatchNode
{
public override T Accept<T>(IMatchNodeVisitor<T> visitor) => visitor.Visit(this);
}
// List is used for enumeration ordering guarantees
public record MatchListNode(List<MatchNode> Nodes) : MatchNode
{
public override T Accept<T>(IMatchNodeVisitor<T> visitor) => visitor.Visit(this);
}
public record EmptyNode : MatchNode
{
public override T Accept<T>(IMatchNodeVisitor<T> visitor) => visitor.Visit(this);
}
public record EndMatchNode : MatchNode
{
public override T Accept<T>(IMatchNodeVisitor<T> visitor) => visitor.Visit(this);
}
namespace Baum.Phonology.Notation;
public interface IMatchNodeVisitor<T>
{
T Visit(FeatureSetMatchNode node);
T Visit(SoundMatchNode node);
T Visit(MatchListNode node);
T Visit(EmptyNode node);
T Visit(EndMatchNode node);
}
using Baum.Rewrite;
namespace Baum.Phonology.Notation;
public ref struct NotationParser
{
public static IRewriter<IReadOnlySet<Feature>> Parse(string source, PhonologyData data)
=> new NotationParser(source, data).Parse();
IEnumerator<Token> _tokens;
bool _isValid;
PhonologyData _data;
Token? CurrentToken => _isValid ? _tokens.Current : null;
public NotationParser(string source, PhonologyData data)
{
_data = data;
_tokens = new Tokenization(source, data).GetEnumerator();
Advance();
}
public IRewriter<IReadOnlySet<Feature>> Parse()
{
var match = NextMatchNode();
Consume<DerivationSymbol>();
var replace = NextMatchNode();
var rewriter = replace.Accept(match.Accept(new SoundChangeRewriteParser()));
if (_isValid)
{
Consume<Slash>();
rewriter = ParseCondition(rewriter);
}
if (_isValid)
throw new Exception("Incomplete parse: There are still tokens left, but it couln't be parsed");
return rewriter;
}
List<MatchNode> NextMatchSequence()
{
List<MatchNode> matchNodes = new();
while (_isValid && _tokens.Current is SoundToken or OpenBracket or OpenBrace or EndToken)
{
matchNodes.Add(NextMatchNode());
}
return matchNodes;
}
// MatchNode : IPASymbol
// | FeatureSet
// | List
// | End
MatchNode NextMatchNode()
{
switch (CurrentToken)
{
case SoundToken { Features: var features }:
Advance();
return new SoundMatchNode(features);
case EndToken:
Advance();
return new EndMatchNode();
case OpenBracket:
return NextFeatureSet();
case OpenBrace:
return NextMatchList();
default:
throw new NotImplementedException();
}
}
FeatureSetMatchNode NextFeatureSet()
{
Advance();
HashSet<Feature> included = new(), excluded = new();
while (true)
{
if (CurrentToken is PositiveFeature { Feature: var positive })
{
Advance();
included.Add(positive);
}
else if (CurrentToken is NegativeFeature { Feature: var negative })
{
Advance();
excluded.Add(negative);
}
else
{
break;
}
if (CurrentToken is Comma)
Advance();
}
Consume<CloseBracket>();
return new FeatureSetMatchNode(included, excluded);
}
MatchNode NextMatchList()
{
Consume<OpenBrace>();
List<MatchNode> nodes = new();
while (true)
{
if (CurrentToken is SoundToken or OpenBracket or OpenBrace or EndToken)
{
nodes.Add(NextMatchNode());
}
else
{
break;
}
if (CurrentToken is Comma)
Advance();
}
Consume<CloseBrace>();
if (nodes.Any())
return new MatchListNode(nodes);
else
return new EmptyNode();
}
IRewriter<IReadOnlySet<Feature>> ParseCondition(IRewriter<IReadOnlySet<Feature>> changeRewriter)
{
var rewriter = new SequenceRewriter<IReadOnlySet<Feature>>();
foreach (var match in NextMatchSequence())
rewriter.Add(match.Accept(new SoundMatchRewriteMatchParser()));
Consume<Underscore>();
rewriter.Add(changeRewriter);
foreach (var match in NextMatchSequence())
rewriter.Add(match.Accept(new SoundMatchRewriteMatchParser()));
return rewriter;
}
void Consume<T>() where T : Token
{
if (_tokens.Current is T)
{
Advance();
}
else
{
throw new InvalidOperationException();
}
}
void Advance() => _isValid = _tokens.MoveNext();
}
using Baum.Rewrite;
using Baum.Phonology.Notation;
namespace Baum.Phonology.Notation;
class SoundChangeRewriteParser : IMatchNodeVisitor<IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>>
{
public IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>> Visit(FeatureSetMatchNode matchNode)
=> new SoundMatchRewriteParser(featureSet
=> featureSet.IsSupersetOf(matchNode.Included)
&& !featureSet.Intersect(matchNode.Excluded).Any());
public IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>> Visit(SoundMatchNode matchNode)
=> new SoundMatchRewriteParser(featureSet => featureSet.SetEquals(matchNode.Features));
public IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>> Visit(EmptyNode matchNode)
=> new EmptyMatchRewriteParser();
public IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>> Visit(MatchListNode matchNode)
=> new MatchListRewriteParser(matchNode.Nodes);
public IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>> Visit(EndMatchNode node)
=> new EndMatchRewriteParser();
}
class SoundMatchRewriteParser : IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>
{
Predicate<IReadOnlySet<Feature>> Match;
public SoundMatchRewriteParser(Predicate<IReadOnlySet<Feature>> match) => Match = match;
public IRewriter<IReadOnlySet<Feature>> Visit(FeatureSetMatchNode replaceNode)
=> new MatchRewriter<IReadOnlySet<Feature>>(
Match,
match => new[] { new HashSet<Feature>(match.Except(replaceNode.Excluded).Union(replaceNode.Included)) });
public IRewriter<IReadOnlySet<Feature>> Visit(SoundMatchNode replaceNode)
=> new MatchRewriter<IReadOnlySet<Feature>>(Match, new[] { replaceNode.Features });
public IRewriter<IReadOnlySet<Feature>> Visit(EmptyNode node)
=> new MatchRewriter<IReadOnlySet<Feature>>(Match, Enumerable.Empty<IReadOnlySet<Feature>>());
public IRewriter<IReadOnlySet<Feature>> Visit(MatchListNode replaceNode)
// a > {b,c} makes no sense
=> throw new Exception("Cannot decide between replacements in list");
public IRewriter<IReadOnlySet<Feature>> Visit(EndMatchNode node)
=> throw new Exception("Word terminator is not a valid replacement");
}
class MatchListRewriteParser : IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>
{
List<MatchNode> MatchNodes { get; set; }
public MatchListRewriteParser(List<MatchNode> matchNodes) => MatchNodes = matchNodes;
public IRewriter<IReadOnlySet<Feature>> Visit(FeatureSetMatchNode replaceNode)
=> new AlternativeRewriter<IReadOnlySet<Feature>>(
MatchNodes.Select(matchNode => matchNode.Accept(new SoundChangeRewriteParser()).Visit(replaceNode)));
public IRewriter<IReadOnlySet<Feature>> Visit(SoundMatchNode replaceNode)
=> new AlternativeRewriter<IReadOnlySet<Feature>>(
MatchNodes.Select(matchNode => matchNode.Accept(new SoundChangeRewriteParser()).Visit(replaceNode)));
public IRewriter<IReadOnlySet<Feature>> Visit(MatchListNode replaceNode)
=> new AlternativeRewriter<IReadOnlySet<Feature>>(
Enumerable.Zip(MatchNodes, replaceNode.Nodes)
.Select(pair => pair.Second.Accept(pair.First.Accept(new SoundChangeRewriteParser()))));
public IRewriter<IReadOnlySet<Feature>> Visit(EmptyNode replaceNode)
=> new AlternativeRewriter<IReadOnlySet<Feature>>(
MatchNodes.Select(matchNode => matchNode.Accept(new SoundChangeRewriteParser()).Visit(replaceNode)));
public IRewriter<IReadOnlySet<Feature>> Visit(EndMatchNode node)
=> throw new Exception("Word terminator is not a valid replacement");
}
class EndMatchRewriteParser : IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>
{
public IRewriter<IReadOnlySet<Feature>> Visit(FeatureSetMatchNode replaceNode)
// # > [+voiced] makes little sense
=> throw new Exception("Cannot add or subtract features from the word terminator");
public IRewriter<IReadOnlySet<Feature>> Visit(SoundMatchNode replaceNode)
=> new EndRewriter<IReadOnlySet<Feature>>(new[] { replaceNode.Features });
public IRewriter<IReadOnlySet<Feature>> Visit(EmptyNode replaceNode)
// # > {} also makes no sense
=> throw new Exception("Cannot replace nothing with nothing");
public IRewriter<IReadOnlySet<Feature>> Visit(MatchListNode node)
=> throw new Exception("Cannot decide between replacements in list");
public IRewriter<IReadOnlySet<Feature>> Visit(EndMatchNode node)
=> throw new Exception("Word terminator is not a valid replacement");
}
class EmptyMatchRewriteParser : IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>
{
public IRewriter<IReadOnlySet<Feature>> Visit(FeatureSetMatchNode replaceNode)
// {} > [+voiced] makes little sense
=> throw new Exception("Cannot add or subtract features from the empty symbol");
public IRewriter<IReadOnlySet<Feature>> Visit(SoundMatchNode replaceNode)
=> new EmptyRewriter<IReadOnlySet<Feature>>(new[] { replaceNode.Features });
public IRewriter<IReadOnlySet<Feature>> Visit(EmptyNode replaceNode)
// {} > {} also makes no sense
=> throw new Exception("Cannot replace nothing with nothing");
public IRewriter<IReadOnlySet<Feature>> Visit(MatchListNode node)
=> throw new Exception("Cannot decide between replacements in list");
public IRewriter<IReadOnlySet<Feature>> Visit(EndMatchNode node)
=> throw new Exception("Word terminator is not a valid replacement");
}
using Baum.Rewrite;
using Baum.Phonology.Notation;
namespace Baum.Phonology.Notation;
// Only does matching, not replacing
class SoundMatchRewriteMatchParser : IMatchNodeVisitor<IRewriter<IReadOnlySet<Feature>>>
{
public IRewriter<IReadOnlySet<Feature>> Visit(FeatureSetMatchNode matchNode)
=> new MatchRewriter<IReadOnlySet<Feature>>(featureSet
=> featureSet.IsSupersetOf(matchNode.Included)
&& !featureSet.Intersect(matchNode.Excluded).Any());
public IRewriter<IReadOnlySet<Feature>> Visit(SoundMatchNode matchNode)
=> new MatchRewriter<IReadOnlySet<Feature>>(featureSet => featureSet.SetEquals(matchNode.Features));
public IRewriter<IReadOnlySet<Feature>> Visit(EmptyNode node)
=> new EmptyRewriter<IReadOnlySet<Feature>>(Enumerable.Empty<IReadOnlySet<Feature>>());
public IRewriter<IReadOnlySet<Feature>> Visit(MatchListNode node)
=> new AlternativeRewriter<IReadOnlySet<Feature>>(node.Nodes.Select(node => node.Accept(this)));
public IRewriter<IReadOnlySet<Feature>> Visit(EndMatchNode node)
=> new EndRewriter<IReadOnlySet<Feature>>(Enumerable.Empty<IReadOnlySet<Feature>>());
}
Tokenizer.csASCII text, with CRLF line terminators
namespace Baum.Phonology.Utils;
public static class CsvLoader
{
public static async Task<IEnumerable<Sound>> LoadAsync(TextReader stream)
{
// TODO: Use header row for feature categories
var headerLine = await stream.ReadLineAsync();
if (headerLine == null)
throw new InvalidDataException("No header row found");
var header = headerLine.Split(',').Select(s => s.Trim());
List<Sound> sounds = new();
while (stream.ReadLine() is string line)
{
var fields = line.Split(',').Select(s => s.Trim());
var sound = new Sound(
fields.First(),
new HashSet<Feature>(fields.Skip(1).Select(field => new Feature(field))));
sounds.Add(sound);
}
return sounds;
}
}
namespace Baum.Phonology;
public class PhonologyData
{
IEnumerable<Sound> _sounds;
public PhonologyData(IEnumerable<Sound> sounds) => _sounds = sounds;
// TODO? Should this just return null?
public Sound GetStartSound(string symbol)
=> _sounds.Where(sound => symbol.StartsWith(sound.Symbol))
.MaxBy(sound => sound.Symbol.Length) ?? throw new Exception($"There is no sound for {symbol}");
public Sound GetSound(IEnumerable<Feature> features)
=> Enumerable.Single(_sounds.Where(sound => sound.Features.SetEquals(features)));
public IReadOnlySet<Sound> GetSounds(IReadOnlySet<Feature> includedFeatures, IReadOnlySet<Feature> excludedFeatures)
=> new HashSet<Sound>(_sounds.Where(sound =>
includedFeatures.IsSubsetOf(sound.Features) &&
!excludedFeatures.Intersect(sound.Features).Any()));
}
namespace Baum.Phonology;
public sealed record Feature(string Name);
public sealed record Sound(string Symbol, IReadOnlySet<Feature> Features) : IEquatable<Sound>
{
public override int GetHashCode()
=> Symbol.GetHashCode() ^ Features.Select(f => f.GetHashCode())
.Aggregate((a, b) => a ^ b);
public bool Equals(Sound? other)
=> other is not null
&& Symbol == other.Symbol
&& Features.SetEquals(other.Features);
}
SoundChange.csUnicode text, UTF-8 (with BOM) text, with CRLF line terminators
using Baum.Rewrite;
using Baum.Phonology.Notation;
namespace Baum.Phonology;
public class SoundChange
{
public static bool TryApply(string initial, string rule, PhonologyData data, out string after)
{
try
{
var rewriter = NotationParser.Parse(rule, data);
var change = new SoundChange
{
PhonologyData = data,
Rewriter = rewriter
};
after = change.Apply(initial);
return true;
}
catch (Exception) // TODO: Specialize exception
{
after = initial;
return false;
}
}
public required IRewriter<IReadOnlySet<Feature>> Rewriter { get; set; }
public required PhonologyData PhonologyData { get; set; }
string Apply(string str)
{
var featureString = new Tokenization(str, PhonologyData)
.Select(sound => ((SoundToken)sound).Features);
// TODO: Not quite sure if this algorithm is actually the best way to do this
// Replaces every match in the string
int maxLength = featureString.Count() * 3;
for (int pos = 0; pos < featureString.Count(); ++pos)
{
// Prevents infinite insertions like {} > p / p_ where the Count keeps growing
if (pos > maxLength)
throw new Exception("Word tripled in length, triggering infinite loop protection");
var rewrites = Rewriter.Rewrite(featureString, pos);
if (rewrites.Any())
{
var replacement = rewrites.MaxBy(pair => pair.RewritePosition);
featureString = featureString
.Take(pos)
.Concat(replacement.Rewrite)
.Concat(featureString.Skip(replacement.RewritePosition));
}
}
return string.Concat(featureString.Select(f => PhonologyData.GetSound(f).Symbol));
}
}
namespace Baum.Phonology.Tests;
public class FeatureSetTest
{
PhonologyData stubData = new(new[]
{
new Sound("a", new HashSet<Feature>() { new("vowel"), new("open")}),
new Sound("e", new HashSet<Feature>() { new("vowel"), new("close-mid") }),
new Sound("m", new HashSet<Feature>() { new("consonant"), new("nasal") } ),
new Sound("p", new HashSet<Feature>() { new("consonant"), new("plosive") } ),
new Sound("b", new HashSet<Feature>() { new("consonant"), new("plosive"), new("voiced") } ),
});
// p > [+voice]
// pat > bat
// p > [+voice] / _a
// pat > bat, put > put
// Or conditions
// Syllable/word boundary conditions
}
NotationTest.csUnicode text, UTF-8 text, with CRLF line terminators
namespace Baum.Rewrite;
public class EndRewriter<T> : IRewriter<T>
{
IEnumerable<T> Insertion;
public EndRewriter(IEnumerable<T> insertion) => Insertion = insertion;
public IEnumerable<RewritePair<T>> Rewrite(IEnumerable<T> sequence, int startPosition)
{
if (startPosition == sequence.Count())
{
return new RewritePair<T>[]
{
new RewritePair<T>
{
Rewrite = Insertion,
RewritePosition = startPosition
}
};
}
else {
return Enumerable.Empty<RewritePair<T>>();
}
}
}
IRewriter.csUnicode text, UTF-8 (with BOM) text, with CRLF line terminators
1 2 3 4 5 6 7 8 91011121314
using System.Diagnostics.CodeAnalysis;
namespace Baum.Rewrite;
public struct RewritePair<T>
{
public required IEnumerable<T> Rewrite;
public required int RewritePosition;
}
public interface IRewriter<T>
{
IEnumerable<RewritePair<T>> Rewrite(IEnumerable<T> sequence, int startPosition);
}
using System.Collections;
using System.Diagnostics.CodeAnalysis;
using System.Linq;
namespace Baum.Rewrite;
public class SequenceRewriter<T> : IRewriter<T>, IEnumerable<IRewriter<T>>
{
List<IRewriter<T>> Rewriters = new();
#region IRewriter<T>
// TODO: Consider: If Rewriters is empty, should it throw, return an empty result, or return an unchanged result
public IEnumerable<RewritePair<T>> Rewrite(IEnumerable<T> sequence, int startPosition)
{
var rewritePairs = Rewriters.First().Rewrite(sequence, startPosition);
foreach (var rewriter in Rewriters.Skip(1))
{
rewritePairs = rewritePairs.SelectMany(pair
=> rewriter.Rewrite(sequence, pair.RewritePosition)
.Select(nextPair => new RewritePair<T>
{
Rewrite = pair.Rewrite.Concat(nextPair.Rewrite),
RewritePosition = nextPair.RewritePosition
}));
}
return rewritePairs;
}
#endregion
#region IEnumerable<IRewriter<T>>
public IEnumerator<IRewriter<T>> GetEnumerator()
{
throw new NotImplementedException();
}
IEnumerator IEnumerable.GetEnumerator()
{
throw new NotImplementedException();
}
#endregion
public void Add(IRewriter<T> rewriter) => Rewriters.Add(rewriter);
}
namespace Baum.Rewrite.Tests;
public class SequenceRewriterTest
{
[Fact]
public void Test1()
{
SequenceRewriter<char> rewriter = new() {
new MatchRewriter<char>('a'),
new MatchRewriter<char>('b', "XY"),
new MatchRewriter<char>('c')
};
var result = string.Concat(rewriter.Rewrite("abc", 0).First().Rewrite);
Assert.Equal("aXYc", result);
}
[Fact]
public void InsertionAtEndWorks()
{
SequenceRewriter<char> rewriter = new() {
new MatchRewriter<char>('a'),
new MatchRewriter<char>('b'),
new EndRewriter<char>("c")
};
var result = string.Concat(rewriter.Rewrite("ab", 0).First().Rewrite);
Assert.Equal("abc", result);
}
[Fact]
public void EndDoesntMatchNotAtEnd()
{
SequenceRewriter<char> rewriter = new() {
new MatchRewriter<char>('a'),
new EndRewriter<char>("")
};
Assert.Empty(rewriter.Rewrite("ab", 0));
}
[Fact]
public void InsertionInBetweenMatchesWorks()
{
SequenceRewriter<char> rewriter = new() {
new MatchRewriter<char>('a'),
new EmptyRewriter<char>("b"),
new MatchRewriter<char>('c')
};
var result = string.Concat(rewriter.Rewrite("ac", 0).First().Rewrite);
Assert.Equal("abc", result);
}
}
post a comment