Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added .DS_Store
Binary file not shown.
46 changes: 46 additions & 0 deletions .github/workflows/gcmd.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
name: Get GCMD Stats

on: [push]

jobs:
run:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v1
with:
python-version: '3.x'
- name: Install dependencies
run: |
git config --local user.email "action@github.com"
git config --local user.name "github-actions"
git pull
python -m pip install --upgrade pip
pip install pandas
pip install matplotlib
pip install xmltodict
echo "done"
- name: Run GCMD Count script
run: |
cd RDA-Datasets
python3 gcmd.py
- name: Commit files
id: commit
run: |
git config --local user.email "action@github.com"
git config --local user.name "github-actions"
git add --all
if [ -z "$(git status --porcelain)" ]; then
echo "::set-output name=push::false"
else
git commit -m "Add changes" -a
echo "::set-output name=push::true"
fi
shell: bash
- name: Push changes
if: steps.commit.outputs.push == 'true'
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
Binary file added RDA-Datasets/.DS_Store
Binary file not shown.
Binary file added RDA-Datasets/__pycache__/gcmdsupport.cpython-39.pyc
Binary file not shown.
80 changes: 80 additions & 0 deletions RDA-Datasets/gcmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
import glob
import sys
import xml.etree.ElementTree as ET
import os
from xml.etree.ElementTree import parse
import time
import xmltodict
import pprint
from gcmdsupport import getGCMDfromXML
import os
from os import path
from collections import Counter


path1 = '.'
if(path.exists("rda_gcmd.txt")):
os.remove("rda_gcmd.txt")
fo = open("rda_gcmd.txt", "w+")

for filename in os.listdir(path1):
if not filename.endswith('.xml'): continue
fullname = os.path.join(path1, filename)
keywords = getGCMDfromXML(fullname)
for keyword in keywords:
fo.writelines(keyword)
fo.writelines("\n")

fo.close()

level1 = 0
level2 = 0
level3 = 0
level4 = 0
level5 = 0
level6 = 0

# with open('rda_gcmd.txt') as f:
# seen = set()
# for line in f:
# line_lower = line.lower()
# if line_lower in seen:
# print(line)
# else:
# seen.add(line_lower)

if(path.exists("rda_gcmd_counts_levels.txt")):
os.remove("rda_gcmd_counts_levels.txt")

sys.stdout = open("rda_gcmd_counts_levels.txt", "w")

with open('rda_gcmd.txt') as f:
c=Counter(c.strip().lower() for c in f if c.strip()) #for case-insensitive search
for line in c:
if c[line]>1:
level = line.count('>') + 1
if(level == 1):
level1 += 1
elif(level == 2):
level2 += 1
elif(level == 3):
level3 += 1
elif(level == 4):
level4 += 1
elif(level == 5):
level5 += 1
elif(level == 6):
level6 += 1
count = "Count: " + str(c[line])
level = "Level " + str(level)
fileline = ''.join((line, ' ', count, ' ', level, '\n'))
print(fileline)

print("Level 1:", level1)
print("Level 2:", level2)
print("Level 3:", level3)
print("Level 4:", level4)
print("Level 5:", level5)
print("Level 6:", level6)

sys.stdout.close()
36 changes: 36 additions & 0 deletions RDA-Datasets/gcmdsupport.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import xmltodict
import pprint

def getGCMDfromXML(filename):
with open(filename) as fd:
doc = xmltodict.parse(fd.read())

pp = pprint.PrettyPrinter(indent=1)

#print(doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords'])
try:
gcmd_ref = doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords'][2]['gmd:MD_Keywords']['gmd:keyword']
except IndexError:
pass
except KeyError:
gcmd_ref = doc['gmd:MD_Metadata']['gmd:identificationInfo']['gmd:MD_DataIdentification']['gmd:descriptiveKeywords']

is_local = "gcmd_ref" in locals()

keywords = []

if is_local:
for elem in gcmd_ref:
try:
keywords.append(elem['gco:CharacterString'])
except TypeError:
try:
keywords.append(gcmd_ref['gco:CharacterString'])
except KeyError:
keywords.append("no keywords")
except KeyError:
keywords.append("no keywords")
else:
keywords.append("no keywords")

return keywords
Loading