-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcontent.go
99 lines (85 loc) · 2.35 KB
/
content.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
/*
Terminews is a terminal based (TUI) RSS feed manager.
Copyright (C) 2017 Alexandros Ntavelos, a[dot]ntavelos[at]gmail[dot]com
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package main
import (
// "fmt"
"github.com/advancedlogic/GoOse"
// "golang.org/x/net/html"
// "net/http"
"strings"
)
// func ParseUrl(url string, ch chan string, done chan bool) {
// resp, err := http.Get(url)
// defer func() {
// // Notify that we're done after this function
// done <- true
// }()
// if err != nil {
// return
// }
// b := resp.Body
// defer b.Close() // close Body when the function returns
// z := html.NewTokenizer(b)
// inParagraph := false
// for {
// tt := z.Next()
// switch tt {
// case html.ErrorToken:
// // End of the document, we're done
// return
// case html.StartTagToken:
// t := z.Token()
// // Check if the token is an <p> tag
// isParagraph := t.Data == "p"
// if isParagraph {
// inParagraph = true
// }
// case html.EndTagToken:
// t := z.Token()
// isParagraph := t.Data == "p"
// if isParagraph {
// inParagraph = false
// }
// case html.TextToken:
// if inParagraph {
// t := fmt.Sprint(z.Token())
// ch <- strings.TrimSpace(html.UnescapeString(t))
// }
// }
// }
// }
// func GetContent(url string) []string {
// ch := make(chan string)
// done := make(chan bool)
// go ParseUrl(url, ch, done)
// content := []string{}
// for {
// select {
// case text := <-ch:
// content = append(content, text)
// case <-done:
// return content
// }
// }
// }
func GetContent(url string) ([]string, error) {
g := goose.New()
article, err := g.ExtractFromURL(url)
if err != nil {
return nil, err
}
lines := strings.Split(article.CleanedText, "\n\n")
return lines, nil
}