From 2432a8d3b1f219f55b9a915329b2480adb87ac52 Mon Sep 17 00:00:00 2001
From: BingLingFanSub <binglingfansub@gmail.com>
Date: Thu, 14 Feb 2019 16:26:26 +0800
Subject: [PATCH 1/5] Add cloud speech-to-text and translation language code to
 constants.py

---
 .gitignore           |   7 +-
 autosub/__init__.py  |  10 +--
 autosub/constants.py | 154 ++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 157 insertions(+), 14 deletions(-)

diff --git a/.gitignore b/.gitignore
index 10e73277d..491cdf911 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,9 @@ dist/
 .DS_Store
 MANIFEST
 *#*
-.vscode
\ No newline at end of file
+.vscode
+/.vs/autosub/v15/.suo
+/.vs
+/autosub.sln
+/autosub.pyproj
+/.idea
diff --git a/autosub/__init__.py b/autosub/__init__.py
index d85e4db90..86c92cc83 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -26,7 +26,7 @@
 from progressbar import ProgressBar, Percentage, Bar, ETA
 
 from autosub.constants import (
-    LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+    SPEECH_TO_TEXT_LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
 )
 from autosub.formatters import FORMATTERS
 
@@ -116,7 +116,7 @@ def __call__(self, data):
             return None
 
 
-class Translator(object): # pylint: disable=too-few-public-methods
+class Translator(object):  # pylint: disable=too-few-public-methods
     """
     Class for translating a sentence from a one language to another.
     """
@@ -329,14 +329,14 @@ def validate(args):
         )
         return False
 
-    if args.src_language not in LANGUAGE_CODES.keys():
+    if args.src_language not in SPEECH_TO_TEXT_LANGUAGE_CODES.keys():
         print(
             "Source language not supported. "
             "Run with --list-languages to see all supported languages."
         )
         return False
 
-    if args.dst_language not in LANGUAGE_CODES.keys():
+    if args.dst_language not in SPEECH_TO_TEXT_LANGUAGE_CODES.keys():
         print(
             "Destination language not supported. "
             "Run with --list-languages to see all supported languages."
@@ -386,7 +386,7 @@ def main():
 
     if args.list_languages:
         print("List of all languages:")
-        for code, language in sorted(LANGUAGE_CODES.items()):
+        for code, language in sorted(SPEECH_TO_TEXT_LANGUAGE_CODES.items()):
             print("{code}\t{language}".format(code=code, language=language))
         return 0
 
diff --git a/autosub/constants.py b/autosub/constants.py
index c776c1125..3391c9927 100644
--- a/autosub/constants.py
+++ b/autosub/constants.py
@@ -7,8 +7,132 @@
 GOOGLE_SPEECH_API_KEY = "AIzaSyBOti4mM-6x9WDnZIjIeyEU21OpBXqWBgw"
 GOOGLE_SPEECH_API_URL = "http://www.google.com/speech-api/v2/recognize?client=chromium&lang={lang}&key={key}" # pylint: disable=line-too-long
 
-LANGUAGE_CODES = {
+SPEECH_TO_TEXT_LANGUAGE_CODES = {
+    'af-ZA': 'Afrikaans (South Africa)',
+    'am-ET': 'Amharic (Ethiopia)',
+    'ar-AE': 'Arabic (United Arab Emirates)',
+    'ar-BH': 'Arabic (Bahrain)',
+    'ar-DZ': 'Arabic (Algeria)',
+    'ar-EG': 'Arabic (Egypt)',
+    'ar-IL': 'Arabic (Israel)',
+    'ar-IQ': 'Arabic (Iraq)',
+    'ar-JO': 'Arabic (Jordan)',
+    'ar-KW': 'Arabic (Kuwait)',
+    'ar-LB': 'Arabic (Lebanon)',
+    'ar-MA': 'Arabic (Morocco)',
+    'ar-OM': 'Arabic (Oman)',
+    'ar-PS': 'Arabic (State of Palestine)',
+    'ar-QA': 'Arabic (Qatar)',
+    'ar-SA': 'Arabic (Saudi Arabia)',
+    'ar-TN': 'Arabic (Tunisia)',
+    'az-AZ': 'Azerbaijani (Azerbaijan)',
+    'bg-BG': 'Bulgarian (Bulgaria)',
+    'bn-BD': 'Bengali (Bangladesh)',
+    'bn-IN': 'Bengali (India)',
+    'ca-ES': 'Catalan (Spain)',
+    'cmn-Hans-CN': 'Chinese, Mandarin (Simplified, China)',
+    'cmn-Hans-HK': 'Chinese, Mandarin (Simplified, Hong Kong)',
+    'cmn-Hant-TW': 'Chinese, Mandarin (Traditional, Taiwan)',
+    'cs-CZ': 'Czech (Czech Republic)',
+    'da-DK': 'Danish (Denmark)',
+    'de-DE': 'German (Germany)',
+    'el-GR': 'Greek (Greece)',
+    'en-AU': 'English (Australia)',
+    'en-CA': 'English (Canada)',
+    'en-GB': 'English (United Kingdom)',
+    'en-GH': 'English (Ghana)',
+    'en-IE': 'English (Ireland)',
+    'en-IN': 'English (India)',
+    'en-KE': 'English (Kenya)',
+    'en-NG': 'English (Nigeria)',
+    'en-NZ': 'English (New Zealand)',
+    'en-PH': 'English (Philippines)',
+    'en-SG': 'English (Singapore)',
+    'en-TZ': 'English (Tanzania)',
+    'en-US': 'English (United States)',
+    'en-ZA': 'English (South Africa)',
+    'es-AR': 'Spanish (Argentina)',
+    'es-BO': 'Spanish (Bolivia)',
+    'es-CL': 'Spanish (Chile)',
+    'es-CO': 'Spanish (Colombia)',
+    'es-CR': 'Spanish (Costa Rica)',
+    'es-DO': 'Spanish (Dominican Republic)',
+    'es-EC': 'Spanish (Ecuador)',
+    'es-ES': 'Spanish (Spain)',
+    'es-GT': 'Spanish (Guatemala)',
+    'es-HN': 'Spanish (Honduras)',
+    'es-MX': 'Spanish (Mexico)',
+    'es-NI': 'Spanish (Nicaragua)',
+    'es-PA': 'Spanish (Panama)',
+    'es-PE': 'Spanish (Peru)',
+    'es-PR': 'Spanish (Puerto Rico)',
+    'es-PY': 'Spanish (Paraguay)',
+    'es-SV': 'Spanish (El Salvador)',
+    'es-US': 'Spanish (United States)',
+    'es-UY': 'Spanish (Uruguay)',
+    'es-VE': 'Spanish (Venezuela)',
+    'eu-ES': 'Basque (Spain)',
+    'fa-IR': 'Persian (Iran)',
+    'fi-FI': 'Finnish (Finland)',
+    'fil-PH ': 'Filipino (Philippines)',
+    'fr-CA': 'French (Canada)',
+    'fr-FR': 'French (France)',
+    'gl-ES': 'Galician (Spain)',
+    'gu-IN': 'Gujarati (India)',
+    'he-IL': 'Hebrew (Israel)',
+    'hi-IN': 'Hindi (India)',
+    'hr-HR': 'Croatian (Croatia)',
+    'hu-HU': 'Hungarian (Hungary)',
+    'hy-AM': 'Armenian (Armenia)',
+    'id-ID': 'Indonesian (Indonesia)',
+    'is-IS': 'Icelandic (Iceland)',
+    'it-IT': 'Italian (Italy)',
+    'ja-JP': 'Japanese (Japan)',
+    'jv-ID': 'Javanese (Indonesia)',
+    'ka-GE': 'Georgian (Georgia)',
+    'km-KH': 'Khmer (Cambodia)',
+    'kn-IN': 'Kannada (India)',
+    'ko-KR': 'Korean (South Korea)',
+    'lo-LA': 'Lao (Laos)',
+    'lt-LT': 'Lithuanian (Lithuania)',
+    'lv-LV': 'Latvian (Latvia)',
+    'ml-IN': 'Malayalam (India)',
+    'mr-IN': 'Marathi (India)',
+    'ms-MY': 'Malay (Malaysia)',
+    'nb-NO': 'Norwegian Bokmal (Norway)',
+    'ne-NP': 'Nepali (Nepal)',
+    'nl-NL': 'Dutch (Netherlands)',
+    'pl-PL': 'Polish (Poland)',
+    'pt-BR': 'Portuguese (Brazil)',
+    'pt-PT': 'Portuguese (Portugal)',
+    'ro-RO': 'Romanian (Romania)',
+    'ru-RU': 'Russian (Russia)',
+    'si-LK': 'Sinhala (Sri Lanka)',
+    'sk-SK': 'Slovak (Slovakia)',
+    'sl-SI': 'Slovenian (Slovenia)',
+    'sr-RS': 'Serbian (Serbia)',
+    'su-ID': 'Sundanese (Indonesia)',
+    'sv-SE': 'Swedish (Sweden)',
+    'sw-KE': 'Swahili (Kenya)',
+    'sw-TZ': 'Swahili (Tanzania)',
+    'ta-IN': 'Tamil (India)',
+    'ta-LK': 'Tamil (Sri Lanka)',
+    'ta-MY': 'Tamil (Malaysia)',
+    'ta-SG': 'Tamil (Singapore)',
+    'te-IN': 'Telugu (India)',
+    'th-TH': 'Thai (Thailand)',
+    'tr-TR': 'Turkish (Turkey)',
+    'uk-UA': 'Ukrainian (Ukraine)',
+    'ur-IN': 'Urdu (India)',
+    'ur-PK': 'Urdu (Pakistan)',
+    'vi-VN': 'Vietnamese (Vietnam)',
+    'yue-Hant-HK': 'Chinese, Cantonese (Traditional, Hong Kong)',
+    'zu-ZA': 'Zulu (South Africa)'
+}
+
+TRANSLATION_LANGUAGE_CODES = {
     'af': 'Afrikaans',
+    'am': 'Amharic',
     'ar': 'Arabic',
     'az': 'Azerbaijani',
     'be': 'Belarusian',
@@ -17,6 +141,7 @@
     'bs': 'Bosnian',
     'ca': 'Catalan',
     'ceb': 'Cebuano',
+    'co': 'Corsican',
     'cs': 'Czech',
     'cy': 'Welsh',
     'da': 'Danish',
@@ -30,10 +155,14 @@
     'fa': 'Persian',
     'fi': 'Finnish',
     'fr': 'French',
+    'fy': 'Frisian',
     'ga': 'Irish',
+    'gd': 'Scots Gaelic',
     'gl': 'Galician',
     'gu': 'Gujarati',
     'ha': 'Hausa',
+    'haw': 'Hawaiian',
+    'he': 'Hebrew',
     'hi': 'Hindi',
     'hmn': 'Hmong',
     'hr': 'Croatian',
@@ -52,7 +181,10 @@
     'km': 'Khmer',
     'kn': 'Kannada',
     'ko': 'Korean',
+    'ku': 'Kurdish',
+    'ky': 'Kyrgyz',
     'la': 'Latin',
+    'lb': 'Luxembourgish',
     'lo': 'Lao',
     'lt': 'Lithuanian',
     'lv': 'Latvian',
@@ -64,39 +196,45 @@
     'mr': 'Marathi',
     'ms': 'Malay',
     'mt': 'Maltese',
-    'my': 'Myanmar (Burmese)',
+    'my': 'Myanmar(Burmese)',
     'ne': 'Nepali',
     'nl': 'Dutch',
     'no': 'Norwegian',
-    'ny': 'Chichewa',
+    'ny': 'Nyanja(Chichewa)',
     'pa': 'Punjabi',
     'pl': 'Polish',
-    'pt': 'Portuguese',
+    'ps': 'Pashto',
+    'pt': 'Portuguese(Portugal,Brazil)',
     'ro': 'Romanian',
     'ru': 'Russian',
-    'si': 'Sinhala',
+    'sd': 'Sindhi',
+    'si': 'Sinhala(Sinhalese)',
     'sk': 'Slovak',
     'sl': 'Slovenian',
+    'sm': 'Samoan',
+    'sn': 'Shona',
     'so': 'Somali',
     'sq': 'Albanian',
     'sr': 'Serbian',
     'st': 'Sesotho',
-    'su': 'Sudanese',
+    'su': 'Sundanese',
     'sv': 'Swedish',
     'sw': 'Swahili',
     'ta': 'Tamil',
     'te': 'Telugu',
     'tg': 'Tajik',
     'th': 'Thai',
-    'tl': 'Filipino',
+    'tl': 'Tagalog(Filipino)',
     'tr': 'Turkish',
     'uk': 'Ukrainian',
     'ur': 'Urdu',
     'uz': 'Uzbek',
     'vi': 'Vietnamese',
+    'xh': 'Xhosa',
     'yi': 'Yiddish',
     'yo': 'Yoruba',
+    'zh': 'Chinese (Simplified)',
     'zh-CN': 'Chinese (Simplified)',
     'zh-TW': 'Chinese (Traditional)',
-    'zu': 'Zulu',
+    'zu': 'Zulu'
 }

From 1290ce6db59eaaf0b033347cdc5cd25bd475c8f5 Mon Sep 17 00:00:00 2001
From: BingLingFanSub <binglingfansub@gmail.com>
Date: Thu, 14 Feb 2019 18:08:35 +0800
Subject: [PATCH 2/5] Add cloud speech-to-text and translation language codes
 to __init__.py

---
 autosub/__init__.py | 60 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 48 insertions(+), 12 deletions(-)

diff --git a/autosub/__init__.py b/autosub/__init__.py
index 86c92cc83..35871d95f 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -26,7 +26,7 @@
 from progressbar import ProgressBar, Percentage, Bar, ETA
 
 from autosub.constants import (
-    SPEECH_TO_TEXT_LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+    SPEECH_TO_TEXT_LANGUAGE_CODES, TRANSLATION_LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
 )
 from autosub.formatters import FORMATTERS
 
@@ -180,7 +180,7 @@ def extract_audio(filename, channels=1, rate=16000):
     if not os.path.isfile(filename):
         print("The given file does not exist: {}".format(filename))
         raise Exception("Invalid filepath: {}".format(filename))
-    if not which("ffmpeg"):
+    if not which("ffmpeg.exe"):
         print("ffmpeg: Executable not found on machine.")
         raise Exception("Dependency not found: ffmpeg")
     command = ["ffmpeg", "-y", "-i", filename,
@@ -332,14 +332,29 @@ def validate(args):
     if args.src_language not in SPEECH_TO_TEXT_LANGUAGE_CODES.keys():
         print(
             "Source language not supported. "
-            "Run with --list-languages to see all supported languages."
+            "Run with -lsc or --list-speech-to-text-codes "
+            "to see all supported languages."
         )
         return False
 
-    if args.dst_language not in SPEECH_TO_TEXT_LANGUAGE_CODES.keys():
+    if args.dst_language is None:
+        print(
+            "Destination language not provided. "
+            "Only performing speech recognition."
+        )
+        args.dst_language = args.src_language
+
+    elif args.dst_language == args.src_language:
+        print(
+            "Source language is the same as the Destination language. "
+            "Only performing speech recognition."
+        )
+
+    elif args.dst_language not in TRANSLATION_LANGUAGE_CODES.keys():
         print(
             "Destination language not supported. "
-            "Run with --list-languages to see all supported languages."
+            "Run with -ltc or --list-translation-codes "
+            "to see all supported languages."
         )
         return False
 
@@ -366,14 +381,29 @@ def main():
                         default=DEFAULT_SUBTITLE_FORMAT)
     parser.add_argument('-S', '--src-language', help="Language spoken in source file",
                         default=DEFAULT_SRC_LANGUAGE)
-    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles",
-                        default=DEFAULT_DST_LANGUAGE)
+    parser.add_argument('-D', '--dst-language', help="Desired language for the subtitles")
     parser.add_argument('-K', '--api-key',
-                        help="The Google Translate API key to be used. \
+                        help="The Google Translation API key to be used. \
                         (Required for subtitle translation)")
-    parser.add_argument('--list-formats', help="List all available subtitle formats",
+    parser.add_argument('-lf', '--list-formats', help="List all available subtitle formats",
+                        action='store_true')
+    parser.add_argument('-lsc', '--list-speech-to-text-codes',
+                        help="""List all available source language codes, which mean the speech-to-text
+                              available language codes.
+                              [WARNING]: Its name format is different from 
+                                         the destination language codes.
+                                         And it's Google who make that difference
+                                         not the developers of the autosub.
+                              Reference: https://cloud.google.com/speech-to-text/docs/languages""",
                         action='store_true')
-    parser.add_argument('--list-languages', help="List all available source/destination languages",
+    parser.add_argument('-ltc', '--list-translation-codes',
+                        help="""List all available destination language codes, which mean the translation
+                             language codes.
+                             [WARNING]: Its name format is different from 
+                                        the source language codes.
+                                        And it's Google who make that difference
+                                        not the developers of the autosub.
+                             Reference: https://cloud.google.com/translate/docs/languages""",
                         action='store_true')
 
     args = parser.parse_args()
@@ -384,12 +414,18 @@ def main():
             print("{format}".format(format=subtitle_format))
         return 0
 
-    if args.list_languages:
-        print("List of all languages:")
+    if args.list_speech_to_text_codes:
+        print("List of all source language codes:")
         for code, language in sorted(SPEECH_TO_TEXT_LANGUAGE_CODES.items()):
             print("{code}\t{language}".format(code=code, language=language))
         return 0
 
+    if args.list_translation_codes:
+        print("List of all destination language codes:")
+        for code, language in sorted(TRANSLATION_LANGUAGE_CODES.items()):
+            print("{code}\t{language}".format(code=code, language=language))
+        return 0
+
     if not validate(args):
         return 1
 

From 41303bb1590a930973f5f893623b3f89d9585143 Mon Sep 17 00:00:00 2001
From: BingLingFanSub <binglingfansub@gmail.com>
Date: Thu, 14 Feb 2019 18:14:38 +0800
Subject: [PATCH 3/5] Fix pylint code format issues

---
 autosub/__init__.py | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/autosub/__init__.py b/autosub/__init__.py
index 35871d95f..b826ca14f 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -26,7 +26,8 @@
 from progressbar import ProgressBar, Percentage, Bar, ETA
 
 from autosub.constants import (
-    SPEECH_TO_TEXT_LANGUAGE_CODES, TRANSLATION_LANGUAGE_CODES, GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
+    SPEECH_TO_TEXT_LANGUAGE_CODES, TRANSLATION_LANGUAGE_CODES,
+    GOOGLE_SPEECH_API_KEY, GOOGLE_SPEECH_API_URL,
 )
 from autosub.formatters import FORMATTERS
 
@@ -388,7 +389,8 @@ def main():
     parser.add_argument('-lf', '--list-formats', help="List all available subtitle formats",
                         action='store_true')
     parser.add_argument('-lsc', '--list-speech-to-text-codes',
-                        help="""List all available source language codes, which mean the speech-to-text
+                        help="""List all available source language codes,
+                              which mean the speech-to-text
                               available language codes.
                               [WARNING]: Its name format is different from 
                                          the destination language codes.
@@ -397,7 +399,8 @@ def main():
                               Reference: https://cloud.google.com/speech-to-text/docs/languages""",
                         action='store_true')
     parser.add_argument('-ltc', '--list-translation-codes',
-                        help="""List all available destination language codes, which mean the translation
+                        help="""List all available destination language codes,
+                             which mean the translation
                              language codes.
                              [WARNING]: Its name format is different from 
                                         the source language codes.

From 77144567b77814aeb86834c276959fd44d7f9669 Mon Sep 17 00:00:00 2001
From: BingLingFanSub <binglingfansub@gmail.com>
Date: Thu, 14 Feb 2019 20:05:00 +0800
Subject: [PATCH 4/5] Reverse ffmpeg dependency check issue

---
 autosub/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/autosub/__init__.py b/autosub/__init__.py
index b826ca14f..888f7ebb9 100644
--- a/autosub/__init__.py
+++ b/autosub/__init__.py
@@ -181,7 +181,7 @@ def extract_audio(filename, channels=1, rate=16000):
     if not os.path.isfile(filename):
         print("The given file does not exist: {}".format(filename))
         raise Exception("Invalid filepath: {}".format(filename))
-    if not which("ffmpeg.exe"):
+    if not which("ffmpeg"):
         print("ffmpeg: Executable not found on machine.")
         raise Exception("Dependency not found: ffmpeg")
     command = ["ffmpeg", "-y", "-i", filename,

From c3643a2f6d22222a64e818549cadb58da60b83e4 Mon Sep 17 00:00:00 2001
From: BingLingFanSub <binglingfansub@gmail.com>
Date: Thu, 14 Feb 2019 20:16:29 +0800
Subject: [PATCH 5/5] Add help message to readme.md

---
 README.md | 25 +++++++++++++++++++------
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index b89840c51..a6e7f2a8a 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,7 @@ Autosub is a utility for automatic speech recognition and subtitle generation. I
 ```
 $ autosub -h
 usage: autosub [-h] [-C CONCURRENCY] [-o OUTPUT] [-F FORMAT] [-S SRC_LANGUAGE]
-               [-D DST_LANGUAGE] [-K API_KEY] [--list-formats]
-               [--list-languages]
+               [-D DST_LANGUAGE] [-K API_KEY] [-lf] [-lsc] [-ltc]
                [source_path]
 
 positional arguments:
@@ -36,10 +35,24 @@ optional arguments:
   -D DST_LANGUAGE, --dst-language DST_LANGUAGE
                         Desired language for the subtitles
   -K API_KEY, --api-key API_KEY
-                        The Google Translate API key to be used. (Required for
-                        subtitle translation)
-  --list-formats        List all available subtitle formats
-  --list-languages      List all available source/destination languages
+                        The Google Translation API key to be used. (Required
+                        for subtitle translation)
+  -lf, --list-formats   List all available subtitle formats
+  -lsc, --list-speech-to-text-codes
+                        List all available source language codes, which mean
+                        the speech-to-text available language codes.
+                        [WARNING]: Its name format is different from the
+                        destination language codes. And it's Google who make
+                        that difference not the developers of the autosub.
+                        Reference: https://cloud.google.com/speech-to-
+                        text/docs/languages
+  -ltc, --list-translation-codes
+                        List all available destination language codes, which
+                        mean the translation language codes. [WARNING]: Its
+                        name format is different from the source language
+                        codes. And it's Google who make that difference not
+                        the developers of the autosub. Reference:
+                        https://cloud.google.com/translate/docs/languages
 ```
 
 ### License