From 7ec29364432b993336caceb155c2605890e91bd7 Mon Sep 17 00:00:00 2001 From: Yohei YASUKAWA Date: Sun, 21 Oct 2012 15:42:36 +0900 Subject: [PATCH] Parse lookup table --- yohei_tangstad/a_to_map.rb | 14 +++++++++++ yohei_tangstad/data.txt | 11 +++++++++ yohei_tangstad/dict-creator.rb | 45 ++++++++++++++++++++++++++++++++++ yohei_tangstad/reader.rb | 21 ++++++++++++++++ 4 files changed, 91 insertions(+) create mode 100644 yohei_tangstad/a_to_map.rb create mode 100644 yohei_tangstad/data.txt create mode 100644 yohei_tangstad/dict-creator.rb create mode 100644 yohei_tangstad/reader.rb diff --git a/yohei_tangstad/a_to_map.rb b/yohei_tangstad/a_to_map.rb new file mode 100644 index 0000000..780a0dd --- /dev/null +++ b/yohei_tangstad/a_to_map.rb @@ -0,0 +1,14 @@ +require 'rspec' + +def a_to_map(a) + target = a[0] + m = Hash.new + a.slice(1..-1).each {|value| m[value] = target} + m +end + +describe "a_to_map" do + it "should convert array with number to map to number" do + a_to_map(["0", "a", "b"]).should == {"a" => "0", "b" => "0"} + end +end diff --git a/yohei_tangstad/data.txt b/yohei_tangstad/data.txt new file mode 100644 index 0000000..dd15fcb --- /dev/null +++ b/yohei_tangstad/data.txt @@ -0,0 +1,11 @@ +0 | まる、ま | れい、れ | おう、ぜろ、ぜ +1 | ひとつ、ひと、ひ | いち、い | わん +2 | ふたつ、ふた、ふ | に | つ +3 | みつ、み | さん、さ | すりー +4 | よん、よ、よつ | し | ふぉー +5 | いつつ、いつ | ご、こ | ふぁいぶ、ふぁいぶ +6 | むつ、む | ろく、ろ | しっくす +7 | ななつ、なな、な | しち | せぶん、せゔん +8 | やつ、や | はち、は、ば | えーと +9 | ここのつ、こ | きゅう、く | ないん +10 | とお | じゅう、じ | てん diff --git a/yohei_tangstad/dict-creator.rb b/yohei_tangstad/dict-creator.rb new file mode 100644 index 0000000..eb66564 --- /dev/null +++ b/yohei_tangstad/dict-creator.rb @@ -0,0 +1,45 @@ +# -*- coding: utf-8 -*- + +require 'rspec' + +class String + def split_characters(chars) + array = [strip] + chars.each_char{ |c| + temp_array = [] + array.each{|s| + temp_array.concat s.split(c) + temp_array.delete_if{|a| a==""} + } + array = temp_array + } + # result = split('|') + return array.map{|l| l.strip} + end +end + +#"".split_characters +given_data = open('./data.txt').read + +given_data.each_line {|d| + puts d.split_characters('|、').to_s +} + + + +describe "split_characters" do + it "should split on character" do + "one two".split_characters(" ").should == ["one", "two"] + end + it "should split on multiple characters" do + "one two,three, four".split_characters(" ,").should == ["one", "two", "three", "four"] + end + it "should split our input properly" do + "0 | まる、ま | れい、れ | おう 、ぜろ、ぜ ".split_characters("| 、").should == ["0", "まる", "ま", "れい", "れ", "おう", "ぜろ", "ぜ"] + end +end + + + + + diff --git a/yohei_tangstad/reader.rb b/yohei_tangstad/reader.rb new file mode 100644 index 0000000..25372fb --- /dev/null +++ b/yohei_tangstad/reader.rb @@ -0,0 +1,21 @@ +# encoding: UTF-8 + +require 'rspec' + +class TableReader + def parseline(line) + a = Array.new + a.push(line) + a + end +end + +describe TableReader do + describe "#parseline" do + it "should return all hiragana words in a line" do + tr = TableReader.new + tr.parseline("ひらがな").should == ["ひらがな"] + end + end +end +