Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
language: java
jdk:
- oraclejdk8
script:
./gradlew checkstyleMain checkstyleTest findbugsMain findbugsTest pmdMain pmdTest test;
./gradlew build
cache:
directories:
- $HOME/.m2
- $HOME/.gradle

12 changes: 7 additions & 5 deletions src/main/java/com/pnikosis/html2markdown/HTML2Md.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package com.pnikosis.html2markdown;

import com.pnikosis.html2markdown.MDLine.MDLineType;
import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLine.MDLineType;
import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
Expand Down Expand Up @@ -136,19 +137,20 @@ private static String parseDocument(Document dirtyDoc) {
Document doc = cleaner.clean(dirtyDoc);
doc.outputSettings().escapeMode(EscapeMode.xhtml);

if (!title.trim().equals("")) {
return "# " + title + "\n\n" + getTextContent(doc);
String trimmedTitle = title.trim();
if (!"".equals(trimmedTitle)) {
return "# " + trimmedTitle + "\n\n" + getTextContent(doc);
} else {
return getTextContent(doc);
}
}

private static String getTextContent(Element element) {
ArrayList<MDLine> lines = new ArrayList<MDLine>();
ArrayList<MDLine> lines = new ArrayList<>();

List<Node> children = element.childNodes();
for (Node child : children) {
if (child instanceof TextNode) {
if (TextNode.class.isInstance(child)) {
TextNode textNode = (TextNode) child;
MDLine line = getLastLine(lines);
if (line.getContent().equals("")) {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class ALineConverter extends LineConverter {
private final String url;
private final String title;

/* package */ ALineConverter(String url, String title) {
this.url = url;
this.title = title;
}

@Override
public MDLines convert(String content) {
String mdLink = "[" + content + "](" + url;
if (title != null && title.length() > 0) {
mdLink = mdLink + " \"" + title + "\"";
}
mdLink = mdLink + ")";
return new MDLines().addLine(MDLine.MDLineType.None, 0, mdLink);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class BrLineConverter extends LineConverter {
@Override
public MDLines convert(String content) {
MDLines mdLines = new MDLines();
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
return mdLines;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class CodeLineConverter extends LineConverter {
@Override
public MDLines convert(String content) {
String[] contentLines = content.split("\\r?\\n", 0);
MDLines mdLines = new MDLines();
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
for (String line : contentLines) {
mdLines.add(new MDLine(MDLine.MDLineType.None, 1, line));
}
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
return mdLines;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class DivLineConverter extends LineConverter {
@Override
public MDLines convert(String content) {
MDLines mdLines = new MDLines();
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, content));
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
return mdLines;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class EmphasisLineConverter extends LineConverter {

@Override
public MDLines convert(String content) {
return new MDLines().addLine(MDLine.MDLineType.None, 0, "*" + content + "*");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class HLineConverter extends LineConverter {
private final int level;

/* package */ HLineConverter(int level) {
this.level = level;
}

@Override
public MDLines convert(String content) {
switch (level) {
case 1:
return new MDLines().addLine(MDLine.MDLineType.Head1, 0, content);
case 2:
return new MDLines().addLine(MDLine.MDLineType.Head2, 0, content);
case 3:
return new MDLines().addLine(MDLine.MDLineType.Head3, 0, content);
default:
return new MDLines().addLine(MDLine.MDLineType.Head3, 0, content);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class HorizontalRuleLineConverter extends LineConverter {
@Override
public MDLines convert(String content) {
return new MDLines().addLine(MDLine.MDLineType.HR, 0, "");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class ImgLineConverter extends LineConverter {
private final String src;
private final String alt;
private final String title;

/* package */ ImgLineConverter(String src, String alt, String title) {
this.src = src;
this.alt = alt;
this.title = title;
}

@Override
public MDLines convert(String content) {
String mdLink = "![" + alt + "](" + src;
if (title != null && title.length() > 0) {
mdLink = mdLink + " \"" + title + "\"";
}
mdLink = mdLink + ")";
return new MDLines().addLine(MDLine.MDLineType.None, 0, mdLink);
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.extractor.TextExtractor;
import com.pnikosis.html2markdown.md.MDLines;
import org.jsoup.nodes.Element;

public abstract class LineConverter {
public static MDLines convert(Element element, TextExtractor textExtractor) {
LineConverter lineConverter = getConverter(element);
return lineConverter.convert(textExtractor.extract(element.childNodes()));
}

private static LineConverter getConverter(Element element) {
String tag = element.tagName();

switch (tag) {
case "div":
return new DivLineConverter();
case "p":
return new PLineConverter();
case "br":
return new BrLineConverter();
case "strong":
case "b":
return new StrongLineConverter();
case "em":
return new EmphasisLineConverter();
case "hr":
return new HorizontalRuleLineConverter();
case "a":
return new ALineConverter(element.attr("href"), element.attr("title"));
case "img":
return new ImgLineConverter(element.attr("src"), element.attr("alt"), element.attr("title"));
case "code":
return new CodeLineConverter();
}

if (tag.matches("^h[0-9]+$")) {
int level = Integer.valueOf(element.tagName().substring(1));
return new HLineConverter(level);
}

return new TextConverter();
}

protected abstract MDLines convert(String content);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class PLineConverter extends LineConverter {
@Override
public MDLines convert(String content) {
MDLines mdLines = new MDLines();
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, content));
mdLines.add(new MDLine(MDLine.MDLineType.None, 0, ""));
return mdLines;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/* package */ class StrongLineConverter extends LineConverter {

@Override
public MDLines convert(String content) {
return new MDLines().addLine(MDLine.MDLineType.None, 0, "**" + content + "**");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package com.pnikosis.html2markdown.converters;

import com.pnikosis.html2markdown.md.MDLine;
import com.pnikosis.html2markdown.md.MDLines;

/*package*/ class TextConverter extends LineConverter {
@Override
protected MDLines convert(String content) {
return new MDLines().addLine(MDLine.MDLineType.None, 0, content);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
package com.pnikosis.html2markdown.extractor;

import java.util.List;
import org.jsoup.nodes.Node;

public interface TextExtractor {
String extract(List<Node> nodes);
}
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
package com.pnikosis.html2markdown;
package com.pnikosis.html2markdown.md;

public class MDLine {
private int level = 0;
private MDLineType type;
private StringBuilder content;
private String content;

public MDLine(MDLineType type, int level, String content) {
this.type = type;
this.level = level;
this.content = new StringBuilder(content);
this.content = content.trim();
}

public MDLine create(String line) {
public static MDLine create(String line) {
int spaces = 0;
while ((spaces < line.length()) && (line.charAt(spaces) == ' ')) {
spaces++;
Expand Down Expand Up @@ -60,7 +60,7 @@ public MDLine create(String line) {
return new MDLine(MDLineType.None, newLevel, content);
}

public MDLineType getListTypeName() {
public MDLineType getLineType() {
return type;
}

Expand Down Expand Up @@ -98,21 +98,21 @@ public String toString() {
}

public String getContent() {
return content.toString();
return content;
}

public void append(String appendContent) {
if (content.length() == 0) {
int i = 0;
while (i < appendContent.length() && Character.isWhitespace(appendContent.charAt(i))) {
i++;
}
content.append(appendContent.substring(i));
content = appendContent.trim();
} else {
content.append(appendContent);
content = content + appendContent;
}
}

public boolean isEmpty() {
return level == 0 && content.isEmpty() && type.equals(MDLineType.None);
}

@Override
public boolean equals(Object o) {
return o instanceof MDLine && ((MDLine) o).type.equals(this.type);
Expand Down
Loading