ModuleNotFoundError: No module named 'pdfminer.pdfpage'

I am using Anaconda and used conda forge to install pdfminer3k

**Error:**

runfile('C:/Phoenix/Python/listpdfsandcountwords.py', wdir='C:/Phoenix/Python')
Traceback (most recent call last):

  File "<ipython-input-9-dc6cf5598b9d>", line 1, in <module>
    runfile('C:/Phoenix/Python/listpdfsandcountwords.py', wdir='C:/Phoenix/Python')

  File "C:\Work\lib\site-packages\spyder\utils\site\sitecustomize.py", line 710, in runfile
    execfile(filename, namespace)

  File "C:\Work\lib\site-packages\spyder\utils\site\sitecustomize.py", line 101, in execfile
    exec(compile(f.read(), filename, 'exec'), namespace)

  File "C:/Phoenix/Python/listpdfsandcountwords.py", line 14, in <module>
    from pdfminer.pdfpage import PDFPage

ModuleNotFoundError: No module named 'pdfminer.pdfpage'

**Conda Environment:**

(C:\Work) C:\Users\dparamanand>conda info
Current conda install:

               platform : win-64
          conda version : 4.3.29
       conda is private : False
      conda-env version : 4.3.29
    conda-build version : 3.0.27
         python version : 3.6.3.final.0
       requests version : 2.18.4
       root environment : C:\Work  (writable)
    default environment : C:\Work
       envs directories : C:\Work\envs
                          C:\Users\dparamanand\AppData\Local\conda\conda\envs
                          C:\Users\dparamanand\.conda\envs
          package cache : C:\Work\pkgs
                          C:\Users\dparamanand\AppData\Local\conda\conda\pkgs
           channel URLs : https://repo.continuum.io/pkgs/main/win-64
                          https://repo.continuum.io/pkgs/main/noarch
                          https://repo.continuum.io/pkgs/free/win-64
                          https://repo.continuum.io/pkgs/free/noarch
                          https://repo.continuum.io/pkgs/r/win-64
                          https://repo.continuum.io/pkgs/r/noarch
                          https://repo.continuum.io/pkgs/pro/win-64
                          https://repo.continuum.io/pkgs/pro/noarch
                          https://repo.continuum.io/pkgs/msys2/win-64
                          https://repo.continuum.io/pkgs/msys2/noarch
            config file : C:\Users\dparamanand\.condarc
             netrc file : None
           offline mode : False
             user-agent : conda/4.3.29 requests/2.18.4 CPython/3.6.3 Windows/10 Windows/10.0.16299
          administrator : False

**Code:**

# -*- coding: utf-8 -*-
"""
Created on Fri Sep 29 10:43:29 2017

@author: dpar0004
"""

import os
#for reading the pdf
from io import StringIO
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from nltk.corpus import stopwords
from nltk.collocations import TrigramCollocationFinder
from nltk.collocations import QuadgramCollocationFinder


#for counting the sentences and words
import nltk
import collections
from nltk import word_tokenize
from collections import Counter


#for couting most frequent words
import re

def convert(filename, pages=None):
    if not pages:
        pagenums = set()
    else:
        pagenums = set(pages)

    output = StringIO()
    manager = PDFResourceManager()
    converter = TextConverter(manager, output, laparams=LAParams())
    interpreter = PDFPageInterpreter(manager, converter)

    infile = open(filename, 'rb')
    for page in PDFPage.get_pages(infile, pagenums):
        interpreter.process_page(page)
    infile.close()
    converter.close()
    text = output.getvalue()
    output.close
    return text

pdfFiles = []
dir_name='C:\Phoenix\Documents from Bryan'
for filename in os.listdir(dir_name):
       if filename.endswith('.pdf') or filename.endswith('.PDF') or filename.endswith('.Pdf') or filename.endswith('.pDf') or filename.endswith('.pdF') or filename.endswith('.pDF') or filename.endswith('.pDf') or filename.endswith('.PDF'):
         pdfFiles.append(filename)
         text=convert(os.path.join(dir_name, filename))
         sentence_count = len(nltk.tokenize.sent_tokenize(text))
         word_count = len(nltk.tokenize.word_tokenize(text))
         print('\nThe file ',filename,' has ',word_count, 'words and ', sentence_count,' sentences in it.\n')
         
         #use findall for counting most common words, quadgrams, trigrams
         all_text = re.findall(r'\w+', text)
         all_text =map(lambda x: x.lower(), all_text)
         filtered_words = list(filter(lambda word: word not in stopwords.words('english') and word.isalpha(), all_text))

         word_counts = Counter(filtered_words).most_common(20)
         print('The 20 most commonly occuring words in this file are : \n\n', word_counts)
         
         print('\nThe 10 most common 3 word combinations appearing in this file are: \n')
         trigram = TrigramCollocationFinder.from_words(filtered_words)
         print(sorted(trigram.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10])
         
         fourgrams=QuadgramCollocationFinder.from_words(filtered_words)
         print('\nThe 10 most common 4 word combinations appearing in this file are: \n')
         print(sorted(fourgrams.ngram_fd.items(), key=lambda t: (-t[1], t[0]))[:10])
         
         print('----------------------------------------------------------------------------------------------------')
         


  

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

ModuleNotFoundError: No module named 'pdfminer.pdfpage' #7

-- coding: utf-8 --

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

ModuleNotFoundError: No module named 'pdfminer.pdfpage' #7

Description

-- coding: utf-8 --

Metadata

Metadata

Assignees

Labels

Projects

Milestone

Relationships

Development

Issue actions