diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..f49b547 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,9 @@ +[run] +source = . +omit = .tox/* + setup.py + example.py + plats.py + main.py + marmiton/__init__.py + tests/__init__.py \ No newline at end of file diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..16854a1 --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,38 @@ +name: CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + lint-format-test: + name: Format & Coverage + runs-on: ubuntu-latest + + steps: + - name: đŸ§Ÿ Cloner le dĂ©pĂŽt + uses: actions/checkout@v4 + + - name: 🐍 Installer Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: 📩 Installer tox + run: | + python -m pip install --upgrade pip + pip install tox + + - name: 🎹 VĂ©rification du format + run: tox -e check_format + + - name: đŸ§Ș ExĂ©cution des tests + couverture + run: tox -e test + + - name: đŸ“€ Publication du rapport de couverture HTML + uses: actions/upload-artifact@v4 + with: + name: htmlcov + path: htmlcov/ diff --git a/.gitignore b/.gitignore index 964bbf9..336a4ad 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,95 @@ +# Created by https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,visualstudio,jetbrains+all +# Edit at https://www.toptal.com/developers/gitignore?templates=python,visualstudiocode,visualstudio,jetbrains+all + +### JetBrains+all ### +# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 + +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based Rest Client +.idea/httpRequests + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +### JetBrains+all Patch ### +# Ignore everything but code style settings and run configurations +# that are supposed to be shared within teams. + +.idea/* + +!.idea/codeStyles +!.idea/runConfigurations + +### Python ### # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] @@ -8,7 +100,6 @@ __pycache__/ # Distribution / packaging .Python -env/ build/ develop-eggs/ dist/ @@ -21,9 +112,11 @@ parts/ sdist/ var/ wheels/ +share/python-wheels/ *.egg-info/ .installed.cfg *.egg +MANIFEST # PyInstaller # Usually these files are written by a python script from a template @@ -38,13 +131,17 @@ pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ +.nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover +*.py,cover .hypothesis/ +.pytest_cache/ +cover/ # Translations *.mo @@ -53,6 +150,8 @@ coverage.xml # Django stuff: *.log local_settings.py +db.sqlite3 +db.sqlite3-journal # Flask stuff: instance/ @@ -65,27 +164,61 @@ instance/ docs/_build/ # PyBuilder +.pybuilder/ target/ # Jupyter Notebook .ipynb_checkpoints +# IPython +profile_default/ +ipython_config.py + # pyenv -.python-version +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version -# celery beat schedule file +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff celerybeat-schedule +celerybeat.pid # SageMath parsed files *.sage.py -# dotenv +# Environments .env - -# virtualenv .venv +env/ venv/ ENV/ +env.bak/ +venv.bak/ # Spyder project settings .spyderproject @@ -99,5 +232,448 @@ ENV/ # mypy .mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +### Python Patch ### +# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration +poetry.toml + +# ruff +.ruff_cache/ + +# LSP config files +pyrightconfig.json + +### VisualStudioCode ### +.vscode/ +.vscode/* +!.vscode/settings.json +!.vscode/tasks.json +!.vscode/launch.json +!.vscode/extensions.json +!.vscode/*.code-snippets + +# Local History for Visual Studio Code +.history/ + +# Built Visual Studio Code Extensions +*.vsix + +### VisualStudioCode Patch ### +# Ignore all local history of files +.history +.ionide + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/main/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.tlog +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio 6 auto-generated project file (contains which files were open etc.) +*.vbp + +# Visual Studio 6 workspace and project file (working project files containing files to include in project) +*.dsw +*.dsp + +# Visual Studio 6 technical files + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# Visual Studio History (VSHistory) files +.vshistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# VS Code files for those working on multiple tools +*.code-workspace + +# Local History for Visual Studio Code + +# Windows Installer files from build outputs +*.cab +*.msi +*.msix +*.msm +*.msp + +# JetBrains Rider +*.sln.iml + +### VisualStudio Patch ### +# Additional files built by Visual Studio + +# End of https://www.toptal.com/developers/gitignore/api/python,visualstudiocode,visualstudio,jetbrains+all -.idea/ +recipes.json diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..62dd1f4 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +repos: + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.12.0 # Ruff version + hooks: + # Run the linter + - id: ruff-check + args: [ --fix ] + # Run the formatter + - id: ruff-format + + - repo: local + hooks: + - id: docformatter + name: docformatter + entry: docformatter + language: python + additional_dependencies: [docformatter] + types: [python] + args: ["--in-place", "--wrap-summaries", "88", "--wrap-descriptions", "88"] \ No newline at end of file diff --git a/README.md b/README.md index 8ac8b06..53546df 100644 --- a/README.md +++ b/README.md @@ -1,109 +1,75 @@ -# python-marmiton -##### v0.4.2 - -Python API to search & get recipes from the 'marmiton.com' website (web crawler, unofficial) -Useful, efficient and super simple to use. - -###### News (0.4.2) : Minor fix for images list -###### News (0.4.1) : Major update due to changes in the Marmiton website structure - everything working again -###### News (0.3.3) : Bug fixes & code improvements -###### News (0.3.2) : Quick bug fix when h1 class name differ in some pages -###### News (0.3.1) : New fields returned by the 'get' API (see the full list bellow) -###### News (0.2.3) : Package fully up to date on 2018-12-21 on version 0.2.3 according to marmiton website html recent changes - -### Installation : -`pip install python-marmiton==0.4.2` - -### Requirements : -`python >= 3.4` -`beautifulsoup4 >= 4.6` - -### API References - -##### Marmiton.search returns a list of dictionary like: -- name: name of the recipe. -- url: url of the detailed recipe on 'marmiton.com'. -- image: if exists, image of the recipe (url). -- rate: recipe rate bewteen 0 and 5. - -Note that in version 0.4 the "description" attribute has been removed as it is not available on Marmiton anymore (in search results). - -##### Marmiton.get returns a dictionary like: -- name: name of the recipe -- ingredients: string list of the recipe ingredients (including quantities) -- steps: string list of each step of the recipe -- images: list of string, images of the recipe (url). -- cook_time: string, cooking time of the recipe -- prep_time: string, estimated preparation time of the recipe -- total_time: string, estimated total time of the recipe (cooking + preparation time) -- author: string, name of the author of the recipe -- nb_comments: string, number of comments/rates left by users -- recipe_quantity: string, quantity indicator the recipie is made for -- budget: string, indicate the category of budget according to the website -- difficulty: string, indicate the category of difficulty according to the website -- rate, string: rate of the recipe out of 5 -- author_tip: string, note or tip left by the author - -Notes for version 0.4: -- the "tag" attribute has been removed as it doesn't exists in Marmiton anymore -- the "image" attribute has been replaced by a list of urls in the "images" attribute -- the "people_quantity" has been replace by the "recipe_quantity" attribute, which will include the quantity and the unit (not always number of people now) - -### Usage / Example : - -```python -from marmiton import Marmiton, RecipeNotFound - -# Search : -query_options = { - "aqt": "boeuf bourguignon", # Query keywords - separated by a white space - "dt": "platprincipal", # Plate type : "entree", "platprincipal", "accompagnement", "amusegueule", "sauce" (optional) - "exp": 2, # Plate price : 1 -> Cheap, 2 -> Medium, 3 -> Kind of expensive (optional) - "dif": 2, # Recipe difficulty : 1 -> Very easy, 2 -> Easy, 3 -> Medium, 4 -> Advanced (optional) - "veg": 0, # Vegetarien only : 0 -> False, 1 -> True (optional) -} -query_result = Marmiton.search(query_options) - -# Get : -recipe = query_result[0] -main_recipe_url = recipe['url'] - -try: - detailed_recipe = Marmiton.get(main_recipe_url) # Get the details of the first returned recipe (most relevant in our case) -except RecipeNotFound as e: - print(f"No recipe found for '{query_options['aqt']}'") - import sys - sys.exit(0) - -# Display result : -print("## %s\n" % detailed_recipe['name']) # Name of the recipe -print("Recette par '%s'" % (detailed_recipe['author'])) -print("NotĂ© %s/5 par %s personnes." % (detailed_recipe['rate'], detailed_recipe['nb_comments'])) -print("Temps de cuisson : %s / Temps de prĂ©paration : %s / Temps total : %s." % (detailed_recipe['cook_time'] if detailed_recipe['cook_time'] else 'N/A',detailed_recipe['prep_time'], detailed_recipe['total_time'])) -print("DifficultĂ© : '%s'" % detailed_recipe['difficulty']) -print("Budget : '%s'" % detailed_recipe['budget']) - -print("\nRecette pour %s :\n" % detailed_recipe['recipe_quantity']) -for ingredient in detailed_recipe['ingredients']: # List of ingredients - print("- %s" % ingredient) - -print("") - -for step in detailed_recipe['steps']: # List of cooking steps - print("# %s" % step) - -if detailed_recipe['author_tip']: - print("\nNote de l'auteur :\n%s" % detailed_recipe['author_tip']) -``` +# API Marmiton (Python) + +[![CI](https://github.com/COP-AMACO/scraping-marmiton/actions/workflows/ci.yml/badge.svg)](https://github.com/COP-AMACO/scraping-marmiton/actions/workflows/ci.yml) + +Ce projet permet de rechercher et d'obtenir des recettes du site [marmiton.org](https://www.marmiton.org/) via une API Python non officielle (web scraper). -### Ongoing features: -- Preparation time, Cooking time, Total time, etc (available on v0.2.2) -- Multiple images returned for the search / get requests -- Limit the number of returned query on search -- More returned data & query options +C'est un *fork* d'un projet existant [python-marmiton](https://github.com/remaudcorentin-dev/python-marmiton) dĂ©veloppĂ© par [Corentin Remaud](https://github.com/remaudcorentin-dev). -###### Limitation: This module is provided as it. As Marmiton makes regular updates to their website, this library might stop working temporarily at any time, the time that the code is updated to match the new Marmiton website structure. -##### Important: Please note that the owner of this project does not own any of the returned data, all data are property of MARMITON. This library is shared for free for educational purposes only. The owner declines any responsability of the usage made by the users, please refer to https://www.marmiton.org/sp/aide/conditions-generales-utilisation.html. If you own the website 'www.marmiton.org' and you do not agree with any of the content of this package please send an email to the address bellow. +## Installation : + +```PowerShell +python -m venv venv +venv\Scripts\Activate.ps1 +pip install -r requirements.txt +``` + -###### Support / Contact : remaudcorentin.dev@gmail.com +## RĂ©fĂ©rence de l'API + +- Fonction `Marmiton.search` : + +Cette fonction permet de simuler une recherche sur le site. + +#### ParamĂštres de recherche : + +| ParamĂštre | Type | Obligatoire | Description | Valeurs possibles | +|-----------|----------|-------------|-----------------------------------------------|---------------------------------------------------------------------------------------------------------------------| +| `aqt` | `string` | ✅ | Mots-clĂ©s de recherche (sĂ©parĂ©s par un espace) | | +| `dt` | `string` | ❌ | Type de plat | `accompagnement`, `amusegueule`, `boisson`, `confiserie`, `conseil`, `dessert`, `entree`, `platprincipal`, `sauce` | +| `exp` | `int` | ❌ | Prix du plat | `1` (pas cher), `2` (moyen), `3` (cher) | +| `dif` | `int` | ❌ | DifficultĂ© de la recette | `1` (trĂšs facile), `2` (facile), `3` (moyenne), `4` (avancĂ©e) | +| `prt` | `int` | ❌ | ParticularitĂ© de la recette | `1` (vĂ©gĂ©tarien), `2` (sans gluten), `3` (vegan), `4` (sans lactose), `5` (Ă©quilibrĂ©e) | +| `rct` | `int` | ❌ | Type de cuisson | `1` (Four), `2` (Plaque), `3` (Sans cuisson), `4` (Micro-ondes), `5` (Barbecue/Plancha) | +| `ttlt` | `int` | ❌ | Temps total en minutes | `15` (≀ 15 minutes), `30` (≀ 30 minutes), `45` (≀ 45 minutes) | + +#### RĂ©ponse : + +Elle retourne la liste de recettes (dictionnaires) correspondant aux critĂšres de recherche. + +| Champ | Type | Description | +|---------------|----------|------------- | +| `name` | `string` | Nom de la recette | +| `url` | `string` | URL du dĂ©tail de la recette | +| `image` | `string` | Image de la recette (si elle existe) | +| `rate` | `float` | Note de la recette entre 0 et 5 (par dĂ©faut 0.0) | +| `nb_comments` | `int` | Nombre de commentaire de la recette (par dĂ©faut 0) | + + +- Fonction `Marmiton.get` : + +Cette fonction permet d'obtenir les dĂ©tails d'une recette Ă  partir de son URL. Elle retourne un dictionnaire avec les informations dĂ©taillĂ©es de la recette. + +| Champ | Type | Description | +|-------------------|----------------|---------------------------------------------------------------| +| `url` | `string` | URL de la recette dĂ©taillĂ©e | +| `name` | `string` | Nom de la recette | +| `plate_type` | `string` | Type de plat (ex : "platprincipal", "entree", etc.) | +| `is_vegetarian` | `bool` | Recette vĂ©gĂ©tarienne ou non | +| `is_gluten_free` | `bool` | Recette sans gluten ou non | +| `is_vegan` | `bool` | Recette vegan ou non | +| `ingredients` | `list[dict]` | Liste des ingrĂ©dients avec nom, quantitĂ©, unitĂ© et image | +| `author` | `string` | Nom de l'auteur de la recette | +| `author_tip` | `string` | Astuce ou note laissĂ©e par l'auteur | +| `steps` | `list[string]` | Liste des Ă©tapes de la recette | +| `image_recipe` | `string` | Image principale de la recette (URL) | +| `images` | `list[string]` | Liste d'images de la recette ou des ingrĂ©dients (URL) | +| `rate` | `float` | Note de la recette sur 5 | +| `difficulty` | `string` | CatĂ©gorie de difficultĂ© | +| `budget` | `string` | CatĂ©gorie de budget | +| `cook_time_min` | `int` | Temps de cuisson de la recette en minutes | +| `prep_time_min` | `int` | Temps de prĂ©paration estimĂ© en minutes | +| `total_time_min` | `int` | Temps total estimĂ© (cuisson + prĂ©paration) en minutes | +| `recipe_quantity` | `str` | Indication de la quantitĂ© pour laquelle la recette est prĂ©vue | +| `nb_comments` | `string` | Nombre de commentaires ou avis laissĂ©s par les utilisateurs | diff --git a/example.py b/example.py index 5952845..3922a01 100644 --- a/example.py +++ b/example.py @@ -1,37 +1,73 @@ -from marmiton import Marmiton +import sys + +from marmiton import Marmiton, RecipeNotFound # Search : query_options = { - "aqt": "Fondue savoyarde", # Query keywords - separated by a white space - "dt": "platprincipal", # Plate type : "entree", "platprincipal", "accompagnement", "amusegueule", "sauce" (optional) - "exp": 2, # Plate price : 1 -> Cheap, 2 -> Medium, 3 -> Kind of expensive (optional) - "dif": 2, # Recipe difficulty : 1 -> Very easy, 2 -> Easy, 3 -> Medium, 4 -> Advanced (optional) - "veg": 0, # Vegetarien only : 0 -> False, 1 -> True (optional) + # Query keywords - separated by a white space + "aqt": "Pizza", + # Plate type: (optional) + # "accompagnement", "amusegueule", "boisson", "confiserie", "conseil", + # "dessert", "entree", "platprincipal", "sauce" + "dt": "platprincipal", + # Plate price: (optional) + # 1 -> Cheap, 2 -> Medium, 3 -> Kind of expensive + "exp": 1, + # Recipe difficulty: (optional) + # 1 -> Very easy, 2 -> Easy, 3 -> Medium, 4 -> Advanced + "dif": 1, + # Recipe particularity: (optional) + # 1 -> Vegetarian, 2 -> Gluten-free, 3 -> Vegan, + # 4 -> Lactose-free, 5 -> Balanced + "prt": 1, + # Cooking type: (optional) + # 1 -> Oven, 2 -> Stovetop, 3 -> No-cook, 4 -> Microwave, + # 5 -> Barbecue/Plancha + "rct": 1, + # Total time in minutes: (optional) + # 15, 30, or 45 + "ttlt": 45, } -query_result = Marmiton.search(query_options) + +try: + # Search for recipes with the given options + query_result = Marmiton.search(query_options) +except RecipeNotFound as e: + print(f"No recipe found for '{query_options['aqt']}'") + print(e) + sys.exit(0) # Get : recipe = query_result[0] -main_recipe_url = recipe['url'] - -detailed_recipe = Marmiton.get(main_recipe_url) # Get the details of the first returned recipe (most relevant in our case) - -# Display result : -print("## %s\n" % detailed_recipe['name']) # Name of the recipe -print("Recette par '%s'" % (detailed_recipe['author'])) -print("NotĂ© %s/5 par %s personnes." % (detailed_recipe['rate'], detailed_recipe['nb_comments'])) -print("Temps de cuisson : %s / Temps de prĂ©paration : %s / Temps total : %s." % (detailed_recipe['cook_time'] if detailed_recipe['cook_time'] else 'N/A',detailed_recipe['prep_time'], detailed_recipe['total_time'])) -print("DifficultĂ© : '%s'" % detailed_recipe['difficulty']) -print("Budget : '%s'" % detailed_recipe['budget']) +main_recipe_url = recipe["url"] +try: + # Get the details of the first returned recipe + detailed_recipe = Marmiton.get(main_recipe_url) +except RecipeNotFound: + print(f"No recipe found for '{query_options['aqt']}'") + sys.exit(0) -print("\nRecette pour %s :\n" % detailed_recipe['recipe_quantity']) -for ingredient in detailed_recipe['ingredients']: # List of ingredients - print("- %s" % ingredient) +# Print the result +print(f"# {detailed_recipe['name']}\n") +print(f"Recette par '{detailed_recipe['author']}'") +print( + f"NotĂ© {detailed_recipe['rate']}/5 par {detailed_recipe['nb_comments']} personnes" +) +print(f""" + Temps de cuisson : {detailed_recipe["cook_time_min"]} min. | + Temps de prĂ©paration : {detailed_recipe["prep_time_min"]} min. | + Temps total : {detailed_recipe["total_time_min"]} min. + """) +print(f"DifficultĂ© : '{detailed_recipe['difficulty']}'") +print(f"Budget : '{detailed_recipe['budget']}'") -print("") +print(f"\nIngrĂ©dient(s) pour {detailed_recipe['recipe_quantity']} :") +for ingredient in detailed_recipe["ingredients"]: + print(f"- {ingredient['name']} ({ingredient['quantity']} {ingredient['unit']})") -for step in detailed_recipe['steps']: # List of cooking steps - print("# %s" % step) +print("\nÉtapes :") +for step in detailed_recipe["steps"]: + print(f"# {step}") -if detailed_recipe['author_tip']: - print("\nNote de l'auteur :\n%s" % detailed_recipe['author_tip']) +if detailed_recipe["author_tip"]: + print(f"\nNote de l'auteur :\n{detailed_recipe['author_tip']}") diff --git a/main.py b/main.py new file mode 100644 index 0000000..753348b --- /dev/null +++ b/main.py @@ -0,0 +1,39 @@ +import json + +from marmiton import Marmiton, RecipeNotFound +from plats import PLATS + +if __name__ == "__main__": + recipes = [] + for index, plat in enumerate(PLATS): + print(f"[{index + 1}/{len(PLATS)}] Recherche de la recette pour '{plat}'...") + query_options = { + "aqt": plat, # Query keywords - separated by a white space + } + try: + query_result = Marmiton.search(query_options) + except RecipeNotFound: + print(f"No recipe found for '{plat}'") + continue + + if not query_result: + print(f"No recipe found for '{plat}'") + continue + + main_recipe_url = query_result[0]["url"] + + try: + # Get the details of the first returned recipe + detailed_recipe = Marmiton.get(main_recipe_url) + # Verify if we don't have a recipe with the same name already + if any(recipe["name"] == detailed_recipe["name"] for recipe in recipes): + continue + recipes.append(detailed_recipe) + except RecipeNotFound: + print(f"No recipe found for '{plat}'") + continue + # time.sleep(1 + (index % 3) * 0.5) + + # Enregistrer le rĂ©sultat dans un fichier JSON + with open("recipes.json", "w", encoding="utf-8") as f: + json.dump(recipes, f, ensure_ascii=False, indent=4) diff --git a/marmiton/__init__.py b/marmiton/__init__.py index 4e6b904..94e24fa 100644 --- a/marmiton/__init__.py +++ b/marmiton/__init__.py @@ -1,180 +1,423 @@ # -*- coding: utf-8 -*- -from bs4 import BeautifulSoup - +import re +import ssl import urllib.parse import urllib.request -import re -import ssl +from bs4 import BeautifulSoup + +from marmiton.parse_duration import parse_duration_to_minutes class RecipeNotFound(Exception): - pass + pass class Marmiton(object): - - @staticmethod - def search(query_dict): - """ - Search recipes parsing the returned html data. - Options: - 'aqt': string of keywords separated by a white space (query search) - Optional options : - 'dt': "entree" | "platprincipal" | "accompagnement" | "amusegueule" | "sauce" (plate type) - 'exp': 1 | 2 | 3 (plate expense 1: cheap, 3: expensive) - 'dif': 1 | 2 | 3 | 4 (recipe difficultie 1: easy, 4: advanced) - 'veg': 0 | 1 (vegetarien only: 1) - 'rct': 0 | 1 (without cook: 1) - 'sort': "markdesc" (rate) | "popularitydesc" (popularity) | "" (empty for relevance) - """ - base_url = "http://www.marmiton.org/recettes/recherche.aspx?" - query_url = urllib.parse.urlencode(query_dict) - - url = base_url + query_url - - handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context()) - opener = urllib.request.build_opener(handler) - response = opener.open(url) - html_content = response.read() - - soup = BeautifulSoup(html_content, 'html.parser') - - search_data = [] - - articles = soup.findAll("a", href=True) - articles = [a for a in articles if a["href"].startswith("/recettes/recette_")] - - iterarticles = iter(articles) - for article in iterarticles: - data = {} - try: - data["name"] = article.find("h4").get_text().strip(' \t\n\r') - data["url"] = article['href'] - try: - data["rate"] = article.find("span").get_text().split("/")[0] - except Exception as e0: - pass - try: - data["image"] = article.find('img')['data-src'] - except Exception as e1: - try: - data["image"] = article.find('img')['src'] - except Exception as e1: - pass - pass - except Exception as e2: - pass - if data: - search_data.append(data) - - return search_data - - @staticmethod - def _get_name(soup): - return soup.find("h1").get_text().strip(' \t\n\r') - - @staticmethod - def _get_ingredients(soup): - return [item.get_text().strip(' \t\n\r').replace("\xa0", " ") for item in soup.findAll("div", {"class": "MuiGrid-item"})] - - @staticmethod - def _get_author(soup): - return soup.find("div", text="Note de l'auteur :").parent.parent.findAll("div")[0].findAll("div")[1].get_text() - - @staticmethod - def _get_author_tip(soup): - return soup.find("div", text="Note de l'auteur :").parent.parent.findAll("div")[3].find_all("div")[1].get_text().replace("\xa0", " ").replace("\r\n", " ").replace(" ", " ").replace("« ", "").replace(" »", "") - - @staticmethod - def _get_steps(soup): - return [step.parent.parent.find("p").get_text().strip(' \t\n\r') for step in soup.find_all("h3", text=re.compile("^Étape"))] - - @staticmethod - def _get_images(soup): - return [img.get("data-src") for img in soup.find_all("img", {"height": 150}) if img.get("data-src")] - - @staticmethod - def _get_rate(soup): - return soup.find("h1").parent.next_sibling.find_all("span")[0].get_text().split("/")[0] - - @staticmethod - def _get_nb_comments(soup): - return soup.find("h1").parent.next_sibling.find_all("span")[1].get_text().split(" ")[0] - - @staticmethod - def _get_total_time__difficulty__budget(soup): - svg_data = "M13.207 22.759a2.151 2.151 0 1 0 0 4.302 2.151 2.151 0 0 0 0-4.302z" - return soup.find("path", {"d": svg_data}).parent.parent.parent.get_text().split("‱") - - @classmethod - def _get_total_time(cls, soup): - return cls._get_total_time__difficulty__budget(soup)[0].replace("\xa0", " ") - - @classmethod - def _get_difficulty(cls, soup): - return cls._get_total_time__difficulty__budget(soup)[1] - - @classmethod - def _get_budget(cls, soup): - return cls._get_total_time__difficulty__budget(soup)[2] - - @staticmethod - def _get_cook_time(soup): - return soup.find_all(text=re.compile("Cuisson"))[0].parent.next_sibling.next_sibling.get_text() - - @staticmethod - def _get_prep_time(soup): - return soup.find_all(text=re.compile("PrĂ©paration"))[1].parent.next_sibling.next_sibling.get_text().replace("\xa0", " ") - - @staticmethod - def _get_recipe_quantity(soup): - return " ".join([span.get_text() for span in soup.find("button", {"class": "MuiIconButton-root"}).parent.find_all("span") if span.get_text()]) - - @classmethod - def get(cls, uri): - """ - 'url' from 'search' method. - ex. "/recettes/recette_wraps-de-poulet-et-sauce-au-curry_337319.aspx" - """ - - base_url = "http://www.marmiton.org" - url = base_url + ("" if uri.startswith("/") else "/") + uri - - try: - handler = urllib.request.HTTPSHandler(context=ssl._create_unverified_context()) - opener = urllib.request.build_opener(handler) - response = opener.open(url) - html_content = response.read() - except urllib.error.HTTPError as e: - raise RecipeNotFound if e.code == 404 else e - - soup = BeautifulSoup(html_content, 'html.parser') - - elements = [ - {"name": "name", "default_value": ""}, - {"name": "ingredients", "default_value": []}, - {"name": "author", "default_value": "Anonyme"}, - {"name": "author_tip", "default_value": ""}, - {"name": "steps", "default_value": []}, - {"name": "images", "default_value": []}, - {"name": "rate", "default_value": ""}, - {"name": "difficulty", "default_value": ""}, - {"name": "budget", "default_value": ""}, - {"name": "cook_time", "default_value": ""}, - {"name": "prep_time", "default_value": ""}, - {"name": "total_time", "default_value": ""}, - {"name": "recipe_quantity", "default_value": ""}, - {"name": "nb_comments", "default_value": 0}, - ] - - data = {"url": url} - for element in elements: - try: - data[element["name"]] = getattr(cls, "_get_" + element["name"])(soup) - except: - data[element["name"]] = element["default_value"] - - return data - + @staticmethod + def search(query_dict): + """Search recipes by parsing the returned HTML data. + + Options: + 'aqt': string of keywords separated by a white space (query search) + + Optional options: + 'dt': "accompagnement" | "amusegueule" | "boisson" | "confiserie" | + "conseil" | "dessert" | "entree" | "platprincipal" | "sauce" + (plate type) + + 'exp': 1 | 2 | 3 + (plate expense: 1 = cheap, 2 = medium, 3 = expensive) + + 'dif': 1 | 2 | 3 | 4 + (recipe difficulty: 1 = very easy, ..., 4 = advanced) + + 'prt': 1 | 2 | 3 | 4 | 5 + (recipe particularity: 1 = vegetarian, 2 = gluten-free, + 3 = vegan, 4 = lactose-free, 5 = balanced) + + 'rct': 1 | 2 | 3 | 4 | 5 + (cooking type: 1 = Oven, 2 = Stovetop, 3 = No-cook, + 4 = Microwave, 5 = Barbecue/Plancha) + + 'ttlt': 15 | 30 | 45 + (total time in minutes: <= 15, 30, or 45) + """ + + base_url = "http://www.marmiton.org/recettes/recherche.aspx?" + query_url = urllib.parse.urlencode(query_dict) + + url = base_url + query_url + + try: + handler = urllib.request.HTTPSHandler( + context=ssl._create_unverified_context() + ) + opener = urllib.request.build_opener(handler) + response = opener.open(url) + html_content = response.read() + except Exception as e: + raise RecipeNotFound("Error: " + str(e)) + + soup = BeautifulSoup(html_content, "html.parser") + + search_data = [] + + articles = soup.find_all("a", href=True) + articles = [ + a + for a in articles + if a["href"].startswith("https://www.marmiton.org/recettes/recette") + ] + + iterarticles = iter(articles) + for article in iterarticles: + data = {} + try: + data["name"] = article.find("h4").get_text().strip(" \t\n\r") + data["url"] = article["href"] + # Image + try: + data["image"] = article.find("img")["data-src"] + except Exception: + try: + data["image"] = article.find("img")["src"] + except Exception: + pass + pass + # Rate + try: + data["rate"] = float( + article.find("div", {"class": "mrtn-home-rating__rating"}) + .get_text() + .strip(" \t\n\r") + .split("/")[0] + ) + except Exception: + data["rate"] = 0.0 + pass + # Number of comments + try: + data["nb_comments"] = int( + article.find("div", {"class": "mrtn-home-rating__nbreviews"}) + .get_text() + .strip(" \t\n\r") + .split(" ")[0] + ) + except Exception: + data["nb_comments"] = 0 + pass + + except Exception: + pass + if data: + search_data.append(data) + + return search_data + + @staticmethod + def _get_name(soup): + """Returns the name of the recipe.""" + return soup.find("h1").get_text().strip(" \t\n\r") + + @staticmethod + def _get_plate_type(soup): + """Returns the type of the recipe. + + Plate types are: "accompagnement", "amusegueule", "boisson", "confiserie", + "dessert", "entree", "platprincipal", "sauce" or "" + """ + tagsList = soup.find_all(True, {"class": "modal__tag"}) + for tag in tagsList: + tagText = tag.get_text().strip(" \t\n\r").lower() + if tagText == "accompagnement": + return "accompagnement" + elif tagText == "amuse-gueule": + return "amusegueule" + elif tagText == "boisson": + return "boisson" + elif tagText == "confiserie": + return "confiserie" + elif tagText == "dessert": + return "dessert" + elif tagText == "entrĂ©e": + return "entree" + elif tagText == "plat principal": + return "platprincipal" + elif tagText == "sauce": + return "sauce" + return "" + + @staticmethod + def _get_is_vegetarian(soup): + """Returns True if the recipe is vegetarian, False otherwise.""" + tagsList = soup.find_all(True, {"class": "modal__tag"}) + for tag in tagsList: + tagText = tag.get_text().strip(" \t\n\r").lower() + if tagText == "vegetarian": + return True + return False + + @staticmethod + def _get_is_gluten_free(soup): + """Returns True if the recipe is gluten-free, False otherwise.""" + tagsList = soup.find_all(True, {"class": "modal__tag"}) + for tag in tagsList: + tagText = tag.get_text().strip(" \t\n\r").lower() + if tagText == "gluten free": + return True + return False + + @staticmethod + def _get_is_vegan(soup): + """Returns True if the recipe is vegan, False otherwise.""" + tagsList = soup.find_all(True, {"class": "modal__tag"}) + for tag in tagsList: + tagText = tag.get_text().strip(" \t\n\r").lower() + if tagText == "recettes vegan": + return True + return False + + @staticmethod + def _get_ingredients(soup): + """Returns a list of ingredients for the recipe. Each item is a dictionary with + keys: + + - 'name': the name of the ingredient + - 'quantity': the quantity of the ingredient + - 'unit': the unit of measurement for the ingredient + - 'image': the image URL of the ingredient + """ + ingredients = [] + for element in soup.find_all("div", {"class": "card-ingredient"}): + ingredient_name = element.find("span", {"class": "ingredient-name"}) + ingredient_quantity = element.find("span", {"class": "count"}) + ingredient_unit = element.find("span", {"class": "unit"}) + ingredient_img = element.find("img") + ingredients.append( + { + "name": ingredient_name.get_text().strip(" \t\n\r") + if ingredient_name + else "", + "quantity": ingredient_quantity.get_text().strip(" \t\n\r") + if ingredient_quantity + else "", + "unit": ingredient_unit.get_text().strip(" \t\n\r") + if ingredient_unit + else "", + "image": ingredient_img.get("data-srcset") + .split(",")[-1] + .strip() + .split(" ")[0] + if ingredient_img and ingredient_img.get("data-srcset") + else "", + } + ) + return ingredients + + @staticmethod + def _get_author(soup): + """Returns the name of the author of the recipe.""" + return ( + soup.find("span", {"class": "recipe-author-note__author-name"}) + .get_text() + .strip(" \t\n\r") + ) + + @staticmethod + def _get_author_tip(soup): + """Returns the author's tip for the recipe.""" + return ( + soup.find("div", {"class": "mrtn-hide-on-print recipe-author-note"}) + .find("i") + .get_text() + .replace("\xa0", " ") + .replace("\r\n", " ") + .replace(" ", " ") + .replace("« ", "") + .replace(" »", "") + ) + + @staticmethod + def _get_steps(soup): + """Returns a list of preparation steps for the recipe.""" + return [ + step.parent.parent.find("p").get_text().strip(" \t\n\r") + for step in soup.find_all("span", text=re.compile("^Étape")) + ] + + @staticmethod + def _get_image_recipe(soup): + """Returns the main image URL of the recipe.""" + # Main picture of the recipe (some recipes do not have a main picture) + imgComponent = soup.find("img", {"id": "recipe-media-viewer-main-picture"}) + if imgComponent is not None: + return imgComponent.get("data-src") + # Return the first thumbnail of the recipe. There are multiple pictures + # resolution, so we take the last one (the biggest one) + return ( + soup.find("img", {"id": "recipe-media-viewer-thumbnail-0"}) + .get("data-srcset") + .split(",")[-1] + .strip() + .split(" ")[0] + ) + + @staticmethod + def _get_images(soup): + """Returns a list of image URLs associated with the recipe (not only the main + image of the recipe).""" + return [ + img.get("data-src") + for img in soup.find_all("img", {"height": 150}) + if img.get("data-src") + ] + + @staticmethod + def _get_rate(soup): + """Returns the recipe rate as a string.""" + return float( + soup.find("span", {"class": "recipe-header__rating-text"}) + .get_text() + .split("/")[0] + ) + + @classmethod + def _get_difficulty(cls, soup): + """Returns the difficulty level of the recipe.""" + difficulty_text = ( + soup.find_all("div", {"class": "recipe-primary__item"})[1] + .find("span") + .get_text() + .strip(" \t\n\r") + ) + if difficulty_text == "trĂšs facile": + return "very_easy" + elif difficulty_text == "facile": + return "easy" + elif difficulty_text == "moyenne": + return "medium" + elif difficulty_text == "difficile": + return "advanced" + else: + return "" + + @classmethod + def _get_budget(cls, soup): + """Returns the budget level of the recipe.""" + budget_text = ( + soup.find_all("div", {"class": "recipe-primary__item"})[2] + .find("span") + .get_text() + .strip(" \t\n\r") + ) + if budget_text == "bon marchĂ©": + return "cheap" + elif budget_text == "moyen": + return "medium" + elif budget_text == "assez cher": + return "expensive" + else: + return "" + + @staticmethod + def _get_cook_time_min(soup): + """Returns the cooking time for the recipe (in minutes).""" + cook_time = soup.find_all(text=re.compile("Cuisson"))[ + 0 + ].parent.next_sibling.next_sibling.get_text() + return parse_duration_to_minutes(cook_time) + + @staticmethod + def _get_prep_time_min(soup): + """Returns the preparation time for the recipe (in minutes).""" + preparation_time = ( + soup.find_all(text=re.compile("PrĂ©paration"))[1] + .parent.next_sibling.next_sibling.get_text() + .replace("\xa0", " ") + ) + return parse_duration_to_minutes(preparation_time) + + @classmethod + def _get_total_time_min(cls, soup): + """Returns the total time for the recipe (in minutes).""" + total_time = ( + soup.find_all("div", {"class": "recipe-primary__item"})[0] + .find("span") + .get_text() + .strip(" \t\n\r") + ) + return parse_duration_to_minutes(total_time) + + @staticmethod + def _get_recipe_quantity(soup): + """Returns the recipe quantity or number of servings.""" + divRecipeQuantity = soup.find( + "div", {"class": "mrtn-recette_ingredients-counter"} + ) + return ( + divRecipeQuantity["data-servingsnb"] + + " " + + divRecipeQuantity["data-servingsunit"] + ) + + @staticmethod + def _get_nb_comments(soup): + """Returns the number of comments on the recipe.""" + return int( + soup.find("div", {"class": "recipe-header__comment"}) + .find("a") + .get_text() + .strip(" \t\n\r") + .split(" ")[0] + ) + + @classmethod + def get(cls, url): + """'url' from 'search' method. + + ex. "https://www.marmiton.org/recettes/recette_boeuf-bourguignon_18889.aspx" + """ + + try: + handler = urllib.request.HTTPSHandler( + context=ssl._create_unverified_context() + ) + opener = urllib.request.build_opener(handler) + response = opener.open(url) + html_content = response.read() + except urllib.error.HTTPError as e: + raise RecipeNotFound if e.code == 404 else e + + soup = BeautifulSoup(html_content, "html.parser") + + elements = [ + {"name": "name", "default_value": ""}, + {"name": "plate_type", "default_value": ""}, + {"name": "is_vegetarian", "default_value": False}, + {"name": "is_gluten_free", "default_value": False}, + {"name": "is_vegan", "default_value": False}, + {"name": "ingredients", "default_value": []}, + {"name": "author", "default_value": "Anonyme"}, + {"name": "author_tip", "default_value": ""}, + {"name": "steps", "default_value": []}, + {"name": "image_recipe", "default_value": ""}, + {"name": "images", "default_value": []}, + {"name": "rate", "default_value": 0.0}, + {"name": "difficulty", "default_value": ""}, + {"name": "budget", "default_value": ""}, + {"name": "cook_time_min", "default_value": 0}, + {"name": "prep_time_min", "default_value": 0}, + {"name": "total_time_min", "default_value": 0}, + {"name": "recipe_quantity", "default_value": ""}, + {"name": "nb_comments", "default_value": 0}, + ] + + data = {"url": url} + for element in elements: + try: + data[element["name"]] = getattr(cls, "_get_" + element["name"])(soup) + except Exception: + data[element["name"]] = element["default_value"] + return data diff --git a/marmiton/parse_duration.py b/marmiton/parse_duration.py new file mode 100644 index 0000000..bbc5bab --- /dev/null +++ b/marmiton/parse_duration.py @@ -0,0 +1,26 @@ +import re + + +def parse_duration_to_minutes(duration: str) -> int: + """Converts a string (e.g., "1h10", "12 min", "1 h") to minutes. + + Input: + duration (str): The duration to convert formatted like "1h10", "12 min", "1 h" + Output: + int: The duration in minutes. + """ + duration = duration.lower().replace(" ", "") + hours = 0 + minutes = 0 + + # Search for hours (e.g., 1h, 2h, 1h30) + match_hours = re.search(r"(\d+)h", duration) + if match_hours: + hours = int(match_hours.group(1)) + + # Search for minutes (e.g., 10min, 45) + match_minutes = re.search(r"(\d+)(?:min)?$", duration) + if match_minutes: + minutes = int(match_minutes.group(1)) + + return hours * 60 + minutes diff --git a/plats.py b/plats.py new file mode 100644 index 0000000..cc1412c --- /dev/null +++ b/plats.py @@ -0,0 +1,334 @@ +PLATS = [ + "Spaghetti bolognaise", + "Ratatouille", + "Tacos al pastor", + "Sushi", + "Couscous", + "Pad thaĂŻ", + "Poulet tikka masala", + "Lasagnes", + "Boeuf bourguignon", + "Falafels", + "Chili con carne", + "Paella", + "Ramen", + "Burger maison", + "Pizza margherita", + "Gnocchis Ă  la crĂšme", + "Soupe pho", + "Bibimbap", + "Tajine de poulet", + "Curry vert thaĂŻ", + "Biryani", + "Gratin dauphinois", + "Croque-monsieur", + "Quiche lorraine", + "Tartiflette", + "Fondue savoyarde", + "Bouillabaisse", + "Poke bowl", + "Moussaka", + "Ceviche", + "Carbonade flamande", + "Cassoulet", + "Empanadas", + "Kefta", + "Feijoada", + "Poutine", + "RĂŽti de porc", + "PĂątes carbonara", + "Soupe miso", + "Okonomiyaki", + "Kebab", + "Salade niçoise", + "TaboulĂ©", + "Galettes bretonnes", + "CrĂȘpes salĂ©es", + "Banh mi", + "Polenta au fromage", + "Sarma (chou farci)", + "Rouleaux de printemps", + "Nasi goreng", + "Laksa", + "Katsu curry", + "Bulgogi", + "Risotto aux champignons", + "Sarma turque", + "Tortilla espagnole", + "Fajitas", + "Soupe minestrone", + "Yakitori", + "Boeuf Stroganoff", + "Choucroute", + "Clafoutis salĂ©", + "Tandoori", + "Mac & cheese", + "Wellington vĂ©gĂ©tarien", + "Lentilles aux saucisses", + "Poulet basquaise", + "Gaspacho", + "Salade de pĂątes", + "Salade grecque", + "Riz cantonais", + "Tofu sautĂ©", + "Gyoza", + "Onigiri", + "Boulettes suĂ©doises", + "Hachis parmentier", + "Tartare de thon", + "Osso buco", + "MafĂ©", + "Brochettes de lĂ©gumes", + "Tteokbokki", + "Calamars frits", + "Soupe de potiron", + "Salade d’endives", + "Galettes de lĂ©gumes", + "Chakchouka", + "Pain pita garni", + "Gratin de courgettes", + "Soupe Ă  l’oignon", + "Oeufs cocotte", + "Fish and chips", + "Katsu sando", + "Riz pilaf", + "Poulet au citron", + "Harira", + "Farfalle au pesto", + "Bruschetta", + "Tortellini ricotta Ă©pinards", + "Khao pad", + "Pizza napolitaine", + "Kitchari", + "Tamagoyaki", + "Soupe de lentilles", + "PoĂȘlĂ©e de lĂ©gumes", + "Croquettes de poisson", + "Olives marinĂ©es", + "Tapenade", + "Houmous", + "Guacamole", + "Tzatziki", + "Nems", + "Samoussas", + "Mini quiches", + "FeuilletĂ©s au fromage", + "Bruschetta", + "Rillettes de thon", + "Gressins au jambon cru", + "Blinis au saumon", + "Mini brochettes tomates mozzarella", + "Falafels", + "Empanadas", + "Mini croque-monsieur", + "Mini burgers", + "Oignons frits", + "Tortilla roulĂ©e au fromage", + "Beignets de crevettes", + "Toasts tapenade et chĂšvre", + "Saucisson sec", + "Fromage en cubes", + "Chips de lĂ©gumes", + "Nuggets de poulet", + "Accras de morue", + "Gyozas", + "Pakoras", + "Mini cakes salĂ©s", + "Pois chiches grillĂ©s", + "Popcorn salĂ©", + "Amandes grillĂ©es", + "Mini rouleaux de printemps", + "Mini samoussas lĂ©gumes", + "Quesadillas coupĂ©es", + "Sardines grillĂ©es sur toast", + "Pain pita et dips", + "Mini tartines nordiques", + "Crostinis variĂ©s", + "Crackers et fromage", + "Mini tacos", + "Patatas bravas", + "FeuilletĂ©s aux Ă©pinards", + "Tartinade de betterave", + "Petits roulĂ©s jambon fromage", + "Tempura de lĂ©gumes", + "Muffins salĂ©s", + "Toast d’avocat", + "Pommes de terre grenaille", + "Riz sautĂ© aux lĂ©gumes", + "Chili sin carne", + "Curry de pois chiches", + "Poulet au citron et quinoa", + "Ratatouille", + "Tajine de lĂ©gumes", + "Bibimbap sans sauce soja (ou avec tamari)", + "Salade de lentilles", + "Soupe de potiron au lait de coco", + "Galettes de sarrasin aux champignons", + "Poisson grillĂ© et patates douces", + "Boeuf sautĂ© aux lĂ©gumes", + "Salade de quinoa et feta", + "Omelette aux herbes", + "Soupe miso sans miso d’orge", + "Homard grillĂ© au beurre citronnĂ©", + "Risotto aux truffes", + "Filet de bƓuf Rossini", + "Paella aux fruits de mer", + "Tournedos de bƓuf aux morilles", + "Sushi au thon rouge et anguille", + "Foie gras poĂȘlĂ© sur pain briochĂ©", + "CĂŽte de veau aux girolles", + "Chateaubriand sauce bĂ©arnaise", + "CarrĂ© d’agneau en croĂ»te d’herbes", + "Noix de Saint-Jacques poĂȘlĂ©es au safran", + "Canard laquĂ© Ă  la pĂ©kinoise", + "Coquilles Saint-Jacques gratinĂ©es", + "Wellington de bƓuf", + "Caviar et blinis de sarrasin", + "Tartare de bƓuf Ă  l’italienne", + "Salade caprese", + "Soupe froide de concombre", + "VeloutĂ© de potimarron", + "Carpaccio de bƓuf", + "Carpaccio de saumon", + "Gaspacho andalou", + "TaboulĂ© libanais", + "Ceviche de poisson", + "Salade de quinoa", + "ƒufs mimosa", + "Tartare de thon", + "Soupe miso", + "Salade de lentilles", + "Avocat crevettes", + "Salade grecque", + "VeloutĂ© d’asperges", + "Soupe de lĂ©gumes", + "Rouleaux de printemps", + "Tartine avocat Ɠuf pochĂ©", + "Salade de cruditĂ©s", + "Tzatziki avec pain pita", + "Houmous et cruditĂ©s", + "Salade de carottes rĂąpĂ©es", + "Rillettes de saumon", + "Blinis au saumon fumĂ©", + "Salade de tomates anciennes", + "Bruschetta aux lĂ©gumes", + "Mini brochettes de crevettes", + "VeloutĂ© de courgettes", + "Soupe de champignons", + "Avocat farci au thon", + "Tartare de lĂ©gumes", + "Mini tarte fine aux oignons", + "Terrine de lĂ©gumes", + "Panna cotta salĂ©e au parmesan", + "Salade de betteraves et chĂšvre", + "ƒuf cocotte aux Ă©pinards", + "Petite quiche aux poireaux", + "CrĂšme de petits pois", + "VeloutĂ© de patates douces", + "Cappuccino de champignons", + "Mini flans de lĂ©gumes", + "Tartelette tomates moutarde", + "Caviar d’aubergine", + "Salade d’endives aux noix", + "Betteraves marinĂ©es", + "Toasts tapenade", + "Salade d’artichauts", + "Soupe thaĂŻ citronnelle", + "Mini clafoutis salĂ©s", + "PurĂ©e de pommes de terre", + "Riz pilaf", + "Gratin dauphinois", + "Frites maison", + "Ratatouille", + "LĂ©gumes rĂŽtis au four", + "Polenta crĂ©meuse", + "PĂątes Ă  l’huile d’olive et herbes", + "Semoule de couscous", + "Quinoa aux lĂ©gumes", + "Haricots verts Ă  l’ail", + "Pommes de terre grenaille", + "PurĂ©e de patates douces", + "Chou sautĂ© Ă  l’asiatique", + "Épinards Ă  la crĂšme", + "Galettes de lĂ©gumes", + "Salade verte croquante", + "Boulgour aux herbes", + "MaĂŻs grillĂ©", + "Champignons sautĂ©s", + "Pesto", + "Salsa verde", + "Sauce soja sucrĂ©e", + "Tzatziki", + "Chimichurri", + "Raita", + "Mayonnaise au citron", + "Sauce barbecue", + "Romesco", + "Gremolata", + "Sauce au yaourt", + "AĂŻoli", + "Sauce moutarde miel", + "Sauce tomate Ă©picĂ©e", + "Pesto rosso", + "Sauce aux champignons", + "Sauce au fromage bleu", + "Sauce aux poivrons rouges", + "Sauce aux herbes", + "Sauce au curry", + "Sauce Ă  l’ail rĂŽti", + "Sauce au poivre vert", + "Sauce aux cĂąpres", + "Sauce Ă  la moutarde ancienne", + "Sauce au fromage blanc", + "Ketchup", + "Tiramisu", + "CrĂšme brĂ»lĂ©e", + "Baklava", + "Mochi", + "Pavlova", + "Tarte Tatin", + "Pudding au caramel", + "Cheesecake", + "Gulab jamun", + "Sorbet aux fruits", + "Churros", + "CannelĂ©s", + "Brownie", + "Panna cotta", + "Clafoutis", + "Knafeh", + "Riz au lait", + "Mille-feuille", + "Pastel de nata", + "Apple pie", + "Beignets", + "Tarte au citron meringuĂ©e", + "Semifreddo", + "Éclair au chocolat", + "Profiteroles", + "Lamington", + "Tarte au chocolat", + "Banoffee pie", + "Glace vanille", + "Kouglof", + "Trifle", + "Flan pĂątissier", + "Baba au rhum", + "SoufflĂ© au chocolat", + "Poire Belle-HĂ©lĂšne", + "CrĂȘpes Suzette", + "Nougat glacĂ©", + "Cassata sicilienne", + "Strudel aux pommes", + "Tapioca au lait de coco", + "Mille-crĂȘpes", + "Halva", + "Charlotte aux fraises", + "Kheer", + "GĂąteau basque", + "Mont-Blanc", + "Carrot cake", + "Tarte Ă  la rhubarbe", + "Madeleines", + "Brigadeiro", +] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..1a644f7 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,12 @@ +[tool.ruff] +line-length = 88 +target-version = "py313" +exclude = [".venv", ".tox", "__pypackages__"] + +[tool.ruff.lint] +select = ["E", "W", "F", "I"] # E = PEP8, W = warnings, F = erreurs pyflakes, I = tri des imports +fixable = ["E", "W", "F", "I"] # Autoriser les corrections automatiques + +[tool.docformatter] +wrap-summaries = 88 +wrap-descriptions = 88 diff --git a/requirements.txt b/requirements.txt index cd91942..a0e9b46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,6 @@ -beautifulsoup4==4.6.0 +beautifulsoup4==4.13.4 +bs4==0.0.2 +colorama==0.4.6 +python-marmiton==0.4.2 +soupsieve==2.7 +typing_extensions==4.14.0 diff --git a/setup.py b/setup.py index 188f952..b0e44ce 100644 --- a/setup.py +++ b/setup.py @@ -1,15 +1,14 @@ - from setuptools import setup setup( - name='python-marmiton', - version='0.4.2', - description='Easy-to-use Python API for the marmiton.com cooking website.', - packages=['marmiton'], - url='https://github.com/remaudcorentin-dev/python-marmiton', - author='Corentin Remaud', - author_email='remaudcorentin.dev@gmail.com', - license='MIT', - zip_safe=False, - install_requires=['bs4'], - ) + name="scraping-marmiton", + version="0.4.2", + description="Script permettant de rĂ©cupĂ©rer des recettes du site Marmiton.org", + packages=["marmiton"], + url="https://github.com/remaudcorentin-dev/python-marmiton", + author="Corentin Remaud", + author_email="remaudcorentin.dev@gmail.com", + license="MIT", + zip_safe=False, + install_requires=["bs4"], +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/test_parse_duration.py b/tests/test_parse_duration.py new file mode 100644 index 0000000..65548e6 --- /dev/null +++ b/tests/test_parse_duration.py @@ -0,0 +1,98 @@ +import unittest + +from marmiton.parse_duration import parse_duration_to_minutes + + +class TestParseDurationToMinutes(unittest.TestCase): + """Unit tests for the parse_duration_to_minutes function.""" + + def test_hours_and_minutes(self): + """Test with durations containing hours and minutes.""" + self.assertEqual(parse_duration_to_minutes("1h10"), 70) + self.assertEqual(parse_duration_to_minutes("2h30"), 150) + self.assertEqual(parse_duration_to_minutes("3h45"), 225) + self.assertEqual(parse_duration_to_minutes("1h05"), 65) + + def test_hours_and_minutes_with_spaces(self): + """Test with durations containing spaces.""" + self.assertEqual(parse_duration_to_minutes("1 h 10"), 70) + self.assertEqual(parse_duration_to_minutes("2 h 30"), 150) + self.assertEqual(parse_duration_to_minutes("1 h 5"), 65) + + def test_only_minutes(self): + """Test with durations in minutes only.""" + self.assertEqual(parse_duration_to_minutes("12 min"), 12) + self.assertEqual(parse_duration_to_minutes("45 min"), 45) + self.assertEqual(parse_duration_to_minutes("30min"), 30) + self.assertEqual(parse_duration_to_minutes("5min"), 5) + + def test_only_minutes_without_unit(self): + """Test with durations in minutes without unit.""" + self.assertEqual(parse_duration_to_minutes("45"), 45) + self.assertEqual(parse_duration_to_minutes("30"), 30) + self.assertEqual(parse_duration_to_minutes("15"), 15) + self.assertEqual(parse_duration_to_minutes("5"), 5) + + def test_only_hours(self): + """Test with durations in hours only.""" + self.assertEqual(parse_duration_to_minutes("1 h"), 60) + self.assertEqual(parse_duration_to_minutes("2h"), 120) + self.assertEqual(parse_duration_to_minutes("3 h"), 180) + self.assertEqual(parse_duration_to_minutes("4h"), 240) + + def test_zero_values(self): + """Test with zero values.""" + self.assertEqual(parse_duration_to_minutes("0h"), 0) + self.assertEqual(parse_duration_to_minutes("0 min"), 0) + self.assertEqual(parse_duration_to_minutes("0"), 0) + self.assertEqual(parse_duration_to_minutes("0h0"), 0) + + def test_large_values(self): + """Test with large values.""" + self.assertEqual(parse_duration_to_minutes("10h30"), 630) + self.assertEqual(parse_duration_to_minutes("24h"), 1440) + self.assertEqual(parse_duration_to_minutes("120 min"), 120) + self.assertEqual(parse_duration_to_minutes("500"), 500) + + def test_case_insensitive(self): + """Test that the function is case insensitive.""" + self.assertEqual(parse_duration_to_minutes("1H30"), 90) + self.assertEqual(parse_duration_to_minutes("2H"), 120) + self.assertEqual(parse_duration_to_minutes("45 MIN"), 45) + self.assertEqual(parse_duration_to_minutes("1h30MIN"), 90) + + def test_mixed_formats(self): + """Test with mixed formats.""" + self.assertEqual(parse_duration_to_minutes("2h 30"), 150) + self.assertEqual(parse_duration_to_minutes("1h 0"), 60) + self.assertEqual(parse_duration_to_minutes("0h 45"), 45) + + def test_edge_cases(self): + """Test edge cases.""" + # Test with single digit numbers + self.assertEqual(parse_duration_to_minutes("1h1"), 61) + self.assertEqual(parse_duration_to_minutes("9h9"), 549) + + # Test with multi-digit numbers + self.assertEqual(parse_duration_to_minutes("12h15"), 735) + self.assertEqual(parse_duration_to_minutes("100h59"), 6059) + + def test_string_with_extra_whitespace(self): + """Test with extra whitespace.""" + self.assertEqual(parse_duration_to_minutes(" 1h30 "), 90) + self.assertEqual(parse_duration_to_minutes(" 45 min "), 45) + self.assertEqual(parse_duration_to_minutes(" 2 h "), 120) + + def test_various_minute_formats(self): + """Test with various minute formats.""" + # Minutes with explicit 'min' + self.assertEqual(parse_duration_to_minutes("30min"), 30) + self.assertEqual(parse_duration_to_minutes("15 min"), 15) + + # Minutes without 'min' (number only) + self.assertEqual(parse_duration_to_minutes("25"), 25) + self.assertEqual(parse_duration_to_minutes("60"), 60) + + +if __name__ == "__main__": # pragma: no cover + unittest.main() diff --git a/tox.ini b/tox.ini new file mode 100644 index 0000000..d61dba0 --- /dev/null +++ b/tox.ini @@ -0,0 +1,41 @@ +[tox] +envlist = py313, test, check_format, lint + +[testenv] +description = Tester le code et le coverage de nos tests +deps = + pytest + coverage +commands = + coverage erase + coverage run -m pytest + coverage report -m --fail-under=100 + coverage html + +[testenv:test] +description = Alias explicite pour lancer les tests +deps = + {[testenv]deps} +commands = + {[testenv]commands} + +[testenv:check_format] +description = VĂ©rification stricte du format (sans correction) +skip_install = true +deps = + ruff + docformatter +commands = + ruff format --check . + docformatter --check --recursive marmiton/ + +[testenv:lint] +description = Lint, tri des imports, suppression des inutiles (avec correction) +skip_install = true +deps = + ruff + docformatter +commands = + docformatter --in-place --recursive marmiton/ + ruff format . + ruff check . --fix