1// This program is free software: you can redistribute it and/or modify2// it under the terms of the GNU Affero General Public License as3// published by the Free Software Foundation, either version 3 of the4// License, or (at your option) any later version.5//6// This program is distributed in the hope that it will be useful, but7// WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU9// Affero General Public License for more details.10//11// You should have received a copy of the GNU Affero General Public12// License along with this program. If not, see <http://www.gnu.org/licenses/>.1314package url1516import (17 "errors"18 "fmt"19 "github.com/nmeum/marvin/irc"20 "github.com/nmeum/marvin/modules"21 "golang.org/x/net/html"22 "mime"23 "net/http"24 "regexp"25 "strings"26 "compress/zlib"27)2829type Module struct {30 regex *regexp.Regexp31 RegexStr string `json:"regex"`32}3334func Init(moduleSet *modules.ModuleSet) {35 moduleSet.Register(new(Module))36}3738func (m *Module) Name() string {39 return "url"40}4142func (m *Module) Help() string {43 return "Displays information about posted URLs."44}4546func (m *Module) Defaults() {47 m.RegexStr = `(?i)\b((http|https)\://(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s` + "`" + `!()\[\]{};:'".,<>?«»“”‘’]))`48}4950func (m *Module) Load(client *irc.Client) error {51 regex, err := regexp.Compile(m.RegexStr)52 if err != nil {53 return err54 }5556 m.regex = regex57 client.CmdHook("privmsg", m.urlCmd)5859 return nil60}6162func (m *Module) urlCmd(client *irc.Client, msg irc.Message) error {63 url := m.regex.FindString(msg.Data)64 if len(url) <= 0 {65 return nil66 }6768 resp, err := http.Head(url)69 if err != nil {70 return err71 }72 resp.Body.Close() // HEAD response doesn't have a body7374 info := m.infoString(resp)75 if len(info) <= 0 {76 return nil77 }7879 return client.Write("NOTICE %s :%s", msg.Receiver, info)80}8182func (m *Module) infoString(resp *http.Response) string {83 var mtype string84 var infos []string8586 ctype := resp.Header.Get("Content-Type")87 if len(ctype) > 0 {88 m, _, err := mime.ParseMediaType(ctype)89 if err == nil {90 mtype = m91 infos = append(infos, fmt.Sprintf("Type: %s", mtype))92 }93 }9495 csize := resp.ContentLength96 if csize >= 0 {97 infos = append(infos, fmt.Sprintf("Size: %s", m.humanize(csize)))98 }99100 if mtype == "text/html" {101 title, err := m.extractTitle(resp.Request.URL.String())102 if err == nil {103 infos = append(infos, fmt.Sprintf("Title: %s", title))104 }105 }106107 info := strings.Join(infos, " | ")108 if len(info) > 0 {109 info = fmt.Sprintf("%s -- %s", strings.ToUpper(m.Name()), info)110 }111112 return info113}114115func (m *Module) extractTitle(url string) (title string, err error) {116 resp, err := http.Get(url)117 if err != nil {118 return119 }120 defer resp.Body.Close()121122 var reader = resp.Body123 if resp.Header.Get("Content-Encoding") == "deflate" {124 readerZ, errZ := zlib.NewReader(resp.Body)125 defer readerZ.Close()126 if errZ == nil {127 reader = readerZ128 }129 }130131 doc, err := html.Parse(reader)132 if err != nil {133 return134 }135136 var parseFunc func(n *html.Node)137 parseFunc = func(n *html.Node) {138 if n.Type == html.ElementNode && n.Data == "title" {139 child := n.FirstChild140 if child != nil {141 title = child.Data142 } else {143 return144 }145 }146147 for c := n.FirstChild; c != nil; c = c.NextSibling {148 parseFunc(c)149 }150 }151152 parseFunc(doc)153 if len(title) <= 0 {154 err = errors.New("couldn't extract title")155 return156 }157158 return159}160161func (m *Module) humanize(count int64) string {162 switch {163 case count > (1 << 40):164 return fmt.Sprintf("%v TiB", count/(1<<40))165 case count > (1 << 30):166 return fmt.Sprintf("%v GiB", count/(1<<30))167 case count > (1 << 20):168 return fmt.Sprintf("%v MiB", count/(1<<20))169 case count > (1 << 10):170 return fmt.Sprintf("%v KiB", count/(1<<10))171 default:172 return fmt.Sprintf("%v B", count)173 }174}