about summary refs log tree commit diff
path: root/utils/genwords.raku
diff options
context:
space:
mode:
Diffstat (limited to 'utils/genwords.raku')
-rw-r--r--utils/genwords.raku90
1 files changed, 90 insertions, 0 deletions
diff --git a/utils/genwords.raku b/utils/genwords.raku
new file mode 100644
index 0000000..1ba9df8
--- /dev/null
+++ b/utils/genwords.raku
@@ -0,0 +1,90 @@
+# Vocabulary generation utilities
+#
+# Copyright (C) 2021 Ngô Ngọc Đức Huy
+#
+# This file is a part of CreLang-corelibs and was cherry-picked for this document.
+#
+# CreLang-corelibs is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# CreLang-corelibs is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CreLang-corelibs.  If not, see <https://www.gnu.org/licenses/>.
+use v6;
+
+enum ParseCategoryError <LongLHS EmptyLHS EmptyRHS>;
+
+sub get-categories ($raw-input) is export {
+	my %categories;
+	for $raw-input.lines -> $line {
+		next unless $line;
+		my ($left, $right) = $line.split("=");
+		my $key = $left.trim;
+		my @symbols = $right.words;
+		die LongLHS if $key.chars > 1;
+		die EmptyLHS if $key.chars == 0;
+		die EmptyRHS if @symbols.elems == 0;
+		%categories{$left.trim} = $right.words;
+	}
+	return %categories;
+};
+
+sub generate-words ($pattern, %categories) is export {
+	my @generated-words;
+	sub support ($to-do, $so-far) {
+		if $to-do.chars == 0 {
+			@generated-words.push: $so-far;
+			return;
+		}
+		my $current = $to-do.substr(0, 1);
+		my $remainder = $to-do.substr(1);
+
+		die "Invalid category: $current" unless
+			$current ∈ %categories.keys;
+		for @(%categories{$current}) -> $char {
+			support($remainder, $so-far ~ $char)
+		}
+	}
+	support($pattern, "");
+	return @generated-words;
+}
+
+sub rewrite ($rules, $word) {
+	my $ret = $word;
+	for $rules.lines -> $rule {
+		my ($left, $right) = $rule.split(",");
+		if $word.match($left) {
+			$ret = $ret.subst($left, $right)
+		}
+	}
+	return $ret
+}
+
+my $category-input = slurp "categories";
+my $rewrite-rules = slurp "rewrite";
+my $categories = get-categories($category-input);
+my @possible-syllables;
+
+for "patterns".IO.lines -> $pattern {
+	@possible-syllables.append: generate-words($pattern, $categories);
+}
+
+sub MAIN($max-syllables, $n-outputs) {
+	my $output = open "output.txt", :w;
+
+	for (1..$n-outputs) {
+		my $word = '';
+		my $n-syllables = $max-syllables.rand.ceiling;
+		for (1..$n-syllables) {
+			$word ~= @possible-syllables[@possible-syllables.elems.rand.floor];
+		}
+		$output.say(rewrite($rewrite-rules, $word))
+	}
+	$output.close
+}