all repos — honk @ 0aea6ded1cbefd6a585c05ba3571a698026a2a48

my fork of honk

import.go (view raw)

  1//
  2// Copyright (c) 2019 Ted Unangst <tedu@tedunangst.com>
  3//
  4// Permission to use, copy, modify, and distribute this software for any
  5// purpose with or without fee is hereby granted, provided that the above
  6// copyright notice and this permission notice appear in all copies.
  7//
  8// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
  9// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
 10// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
 11// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
 12// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
 13// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 14// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 15
 16package main
 17
 18import (
 19	"encoding/json"
 20	"fmt"
 21	"html"
 22	"io/ioutil"
 23	"log"
 24	"os"
 25	"regexp"
 26	"sort"
 27	"strings"
 28	"time"
 29
 30	"humungus.tedunangst.com/r/webs/htfilter"
 31)
 32
 33func importMain(username, flavor, source string) {
 34	switch flavor {
 35	case "mastodon":
 36		importMastodon(username, source)
 37	case "twitter":
 38		importTwitter(username, source)
 39	default:
 40		log.Fatal("unknown source flavor")
 41	}
 42}
 43
 44func importMastodon(username, source string) {
 45	user, err := butwhatabout(username)
 46	if err != nil {
 47		log.Fatal(err)
 48	}
 49	type Toot struct {
 50		Id           string
 51		Type         string
 52		To           []string
 53		Cc           []string
 54		Summary      string
 55		Content      string
 56		InReplyTo    string
 57		Conversation string
 58		Published    time.Time
 59		Tag          []struct {
 60			Type string
 61			Name string
 62		}
 63		Attachment []struct {
 64			Type      string
 65			MediaType string
 66			Url       string
 67			Name      string
 68		}
 69	}
 70	var outbox struct {
 71		OrderedItems []struct {
 72			Object Toot
 73		}
 74	}
 75	fd, err := os.Open(source + "/outbox.json")
 76	if err != nil {
 77		log.Fatal(err)
 78	}
 79	dec := json.NewDecoder(fd)
 80	err = dec.Decode(&outbox)
 81	if err != nil {
 82		log.Fatalf("error parsing json: %s", err)
 83	}
 84	fd.Close()
 85
 86	havetoot := func(xid string) bool {
 87		var id int64
 88		row := stmtFindXonk.QueryRow(user.ID, xid)
 89		err := row.Scan(&id)
 90		if err == nil {
 91			return true
 92		}
 93		return false
 94	}
 95
 96	re_tootid := regexp.MustCompile("[^/]+$")
 97	for _, item := range outbox.OrderedItems {
 98		toot := item.Object
 99		tootid := re_tootid.FindString(toot.Id)
100		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, tootid)
101		if havetoot(xid) {
102			continue
103		}
104		honk := Honk{
105			UserID:   user.ID,
106			What:     "honk",
107			Honker:   user.URL,
108			XID:      xid,
109			RID:      toot.InReplyTo,
110			Date:     toot.Published,
111			URL:      xid,
112			Audience: append(toot.To, toot.Cc...),
113			Noise:    toot.Content,
114			Convoy:   toot.Conversation,
115			Whofore:  2,
116			Format:   "html",
117			Precis:   toot.Summary,
118		}
119		if honk.RID != "" {
120			honk.What = "tonk"
121		}
122		if !loudandproud(honk.Audience) {
123			honk.Whofore = 3
124		}
125		for _, att := range toot.Attachment {
126			switch att.Type {
127			case "Document":
128				fname := fmt.Sprintf("%s/%s", source, att.Url)
129				data, err := ioutil.ReadFile(fname)
130				if err != nil {
131					log.Printf("error reading media: %s", fname)
132					continue
133				}
134				u := xfiltrate()
135				name := att.Name
136				desc := name
137				newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
138				fileid, err := savefile(u, name, desc, newurl, att.MediaType, true, data)
139				if err != nil {
140					log.Printf("error saving media: %s", fname)
141					continue
142				}
143				donk := &Donk{
144					FileID: fileid,
145				}
146				honk.Donks = append(honk.Donks, donk)
147			}
148		}
149		for _, t := range toot.Tag {
150			switch t.Type {
151			case "Hashtag":
152				honk.Onts = append(honk.Onts, t.Name)
153			}
154		}
155		savehonk(&honk)
156	}
157}
158
159func importTwitter(username, source string) {
160	user, err := butwhatabout(username)
161	if err != nil {
162		log.Fatal(err)
163	}
164
165	type Tweet struct {
166		ID_str                  string
167		Created_at              string
168		Full_text               string
169		In_reply_to_screen_name string
170		In_reply_to_status_id   string
171		Entities                struct {
172			Hashtags []struct {
173				Text string
174			}
175			Media []struct {
176				Url       string
177				Media_url string
178			}
179			Urls []struct {
180				Url          string
181				Expanded_url string
182			}
183		}
184		date   time.Time
185		convoy string
186	}
187
188	var tweets []*Tweet
189	fd, err := os.Open(source + "/tweet.js")
190	if err != nil {
191		log.Fatal(err)
192	}
193	// skip past window.YTD.tweet.part0 =
194	fd.Seek(25, 0)
195	dec := json.NewDecoder(fd)
196	err = dec.Decode(&tweets)
197	if err != nil {
198		log.Fatalf("error parsing json: %s", err)
199	}
200	fd.Close()
201	tweetmap := make(map[string]*Tweet)
202	for _, t := range tweets {
203		t.date, _ = time.Parse("Mon Jan 02 15:04:05 -0700 2006", t.Created_at)
204		tweetmap[t.ID_str] = t
205	}
206	sort.Slice(tweets, func(i, j int) bool {
207		return tweets[i].date.Before(tweets[j].date)
208	})
209	havetwid := func(xid string) bool {
210		var id int64
211		row := stmtFindXonk.QueryRow(user.ID, xid)
212		err := row.Scan(&id)
213		if err == nil {
214			return true
215		}
216		return false
217	}
218
219	for _, t := range tweets {
220		xid := fmt.Sprintf("%s/%s/%s", user.URL, honkSep, t.ID_str)
221		if havetwid(xid) {
222			continue
223		}
224		what := "honk"
225		noise := ""
226		if parent := tweetmap[t.In_reply_to_status_id]; parent != nil {
227			t.convoy = parent.convoy
228			what = "tonk"
229		} else {
230			t.convoy = "data:,acoustichonkytonk-" + t.ID_str
231			if t.In_reply_to_screen_name != "" {
232				noise = fmt.Sprintf("re: https://twitter.com/%s/status/%s\n\n",
233					t.In_reply_to_screen_name, t.In_reply_to_status_id)
234				what = "tonk"
235			}
236		}
237		audience := []string{thewholeworld}
238		honk := Honk{
239			UserID:   user.ID,
240			Username: user.Name,
241			What:     what,
242			Honker:   user.URL,
243			XID:      xid,
244			Date:     t.date,
245			Format:   "markdown",
246			Audience: audience,
247			Convoy:   t.convoy,
248			Public:   true,
249			Whofore:  2,
250		}
251		noise += t.Full_text
252		// unbelievable
253		noise = html.UnescapeString(noise)
254		for _, r := range t.Entities.Urls {
255			noise = strings.Replace(noise, r.Url, r.Expanded_url, -1)
256		}
257		for _, m := range t.Entities.Media {
258			u := m.Media_url
259			idx := strings.LastIndexByte(u, '/')
260			u = u[idx+1:]
261			fname := fmt.Sprintf("%s/tweet_media/%s-%s", source, t.ID_str, u)
262			data, err := ioutil.ReadFile(fname)
263			if err != nil {
264				log.Printf("error reading media: %s", fname)
265				continue
266			}
267			newurl := fmt.Sprintf("https://%s/d/%s", serverName, u)
268
269			fileid, err := savefile(u, u, u, newurl, "image/jpg", true, data)
270			if err != nil {
271				log.Printf("error saving media: %s", fname)
272				continue
273			}
274			donk := &Donk{
275				FileID: fileid,
276			}
277			honk.Donks = append(honk.Donks, donk)
278			noise = strings.Replace(noise, m.Url, "", -1)
279		}
280		for _, ht := range t.Entities.Hashtags {
281			honk.Onts = append(honk.Onts, "#"+ht.Text)
282		}
283		honk.Noise = noise
284		savehonk(&honk)
285	}
286}