diff options
Diffstat (limited to 'utils/genwords.raku')
-rw-r--r-- | utils/genwords.raku | 90 |
1 files changed, 90 insertions, 0 deletions
diff --git a/utils/genwords.raku b/utils/genwords.raku new file mode 100644 index 0000000..1ba9df8 --- /dev/null +++ b/utils/genwords.raku @@ -0,0 +1,90 @@ +# Vocabulary generation utilities +# +# Copyright (C) 2021 Ngô Ngọc Đức Huy +# +# This file is a part of CreLang-corelibs and was cherry-picked for this document. +# +# CreLang-corelibs is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CreLang-corelibs is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CreLang-corelibs. If not, see <https://www.gnu.org/licenses/>. +use v6; + +enum ParseCategoryError <LongLHS EmptyLHS EmptyRHS>; + +sub get-categories ($raw-input) is export { + my %categories; + for $raw-input.lines -> $line { + next unless $line; + my ($left, $right) = $line.split("="); + my $key = $left.trim; + my @symbols = $right.words; + die LongLHS if $key.chars > 1; + die EmptyLHS if $key.chars == 0; + die EmptyRHS if @symbols.elems == 0; + %categories{$left.trim} = $right.words; + } + return %categories; +}; + +sub generate-words ($pattern, %categories) is export { + my @generated-words; + sub support ($to-do, $so-far) { + if $to-do.chars == 0 { + @generated-words.push: $so-far; + return; + } + my $current = $to-do.substr(0, 1); + my $remainder = $to-do.substr(1); + + die "Invalid category: $current" unless + $current ∈ %categories.keys; + for @(%categories{$current}) -> $char { + support($remainder, $so-far ~ $char) + } + } + support($pattern, ""); + return @generated-words; +} + +sub rewrite ($rules, $word) { + my $ret = $word; + for $rules.lines -> $rule { + my ($left, $right) = $rule.split(","); + if $word.match($left) { + $ret = $ret.subst($left, $right) + } + } + return $ret +} + +my $category-input = slurp "categories"; +my $rewrite-rules = slurp "rewrite"; +my $categories = get-categories($category-input); +my @possible-syllables; + +for "patterns".IO.lines -> $pattern { + @possible-syllables.append: generate-words($pattern, $categories); +} + +sub MAIN($max-syllables, $n-outputs) { + my $output = open "output.txt", :w; + + for (1..$n-outputs) { + my $word = ''; + my $n-syllables = $max-syllables.rand.ceiling; + for (1..$n-syllables) { + $word ~= @possible-syllables[@possible-syllables.elems.rand.floor]; + } + $output.say(rewrite($rewrite-rules, $word)) + } + $output.close +} |