Решение на Concurrent Crawling от Йордан Пулов

Обратно към всички решения

Към профила на Йордан Пулов

Резултати

  • 7 точки от тестове
  • 0 бонус точки
  • 7 точки общо
  • 8 успешни тест(а)
  • 3 неуспешни тест(а)

Код

package main
import (
"errors"
"fmt"
"io/ioutil"
"net/http"
"time"
)
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
// wrong numbers of workers
if workersCount <= 0 {
return "", errors.New("Workers are less than 1")
}
var urlsQueue []string
currentUrl := make(chan string)
gotIt := make(chan string)
closedChan := make(chan struct{})
// buffering the chans , cannot make it without another goroute
go func() {
for {
if len(urlsQueue) > 0 {
select {
case currentUrl <- urlsQueue[0]: // reading from the buffer
urlsQueue = urlsQueue[1:]
case urls, ok := <-chunkedUrlsToCheck: // writing in the buffer
if ok == false {
closedChan <- struct{}{}
return
}
for _, url := range urls {
urlsQueue = append(urlsQueue, url)
}
}
} else {
// insertion of the first array with urls
urls, ok := <-chunkedUrlsToCheck
if ok == false {
closedChan <- struct{}{}
return
}
for _, url := range urls {
urlsQueue = append(urlsQueue, url)
}
}
}
}()
// spawn all workers at once .... not good I know :(
for i := 0; i < workersCount; i++ {
go func() {
for {
// get the next url
url := <-currentUrl
resp, urlError := http.Get(url)
// check for url or status code
if urlError != nil || resp.StatusCode/100 != 2 {
break
}
// read the HTML
html, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
// problems in parsing the HTML
if err != nil {
break
}
//check for the callback
if callback(fmt.Sprintf("%s", html)) {
gotIt <- url
return
}
}
}()
}
// waiting for the result
select {
case url := <-gotIt:
// when the parent dies ... all children die as well
return url, nil
case <-time.After(15 * time.Second):
return "", errors.New("Time expired")
case <-closedChan:
return "", errors.New("Closed Chan")
}
}

Лог от изпълнението

[/tmp/go-build058675880/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithNegativeWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.005s
[/tmp/go-build851282701/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithZeroWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.005s
[/tmp/go-build525791759/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithInvalidCallback -test.timeout=120s]
--- FAIL: TestWithInvalidCallback-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (callback is nil)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	1.006s
[/tmp/go-build242740108/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithNilChannel -test.timeout=120s]
--- FAIL: TestWithNilChannel-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (channel is uninitialized)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	1.007s
[/tmp/go-build501991323/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithClosedChannelWhenStarting -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	0.007s
[/tmp/go-build124363101/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithClosedChannelMidway -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	5.005s
[/tmp/go-build887753376/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWhetherGlobalTimeoutIsHandled -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	15.006s
[/tmp/go-build633489714/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestWithLoremIpsum -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	2.006s
[/tmp/go-build220103003/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestIfTimeoutAndErrorCodesAreHonoured -test.timeout=120s]
--- FAIL: TestIfTimeoutAndErrorCodesAreHonoured-2 (5.00 seconds)
	solution_test.go:267: Function returned 'http://127.0.0.2:58595/page_over_3_seconds' when it should have returned 'http://127.0.0.2:58595/correct_page'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-1h9gz9s	5.009s
[/tmp/go-build102349078/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestRaceCondition -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	1.007s
[/tmp/go-build014744696/_/tmp/d20150111-16649-1h9gz9s/_test/d20150111-16649-1h9gz9s.test -test.run=TestCloseChannelBeforeFinish -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-1h9gz9s	1.007s

История (1 версия и 1 коментар)

Йордан обнови решението на 10.12.2014 22:48 (преди над 3 години)

+package main
+
+import (
+ "errors"
+ "fmt"
+ "io/ioutil"
+ "net/http"
+ "time"
+)
+
+func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
+ // wrong numbers of workers
+ if workersCount <= 0 {
+ return "", errors.New("Workers are less than 1")
+ }
+
+ var urlsQueue []string
+ currentUrl := make(chan string)
+ gotIt := make(chan string)
+ closedChan := make(chan struct{})
+
+ // buffering the chans , cannot make it without another goroute
+ go func() {
+ for {
+ if len(urlsQueue) > 0 {
+ select {
+ case currentUrl <- urlsQueue[0]: // reading from the buffer
+ urlsQueue = urlsQueue[1:]
+ case urls, ok := <-chunkedUrlsToCheck: // writing in the buffer
+ if ok == false {
+ closedChan <- struct{}{}
+ return
+ }
+ for _, url := range urls {
+ urlsQueue = append(urlsQueue, url)
+ }
+ }
+ } else {
+ // insertion of the first array with urls
+ urls, ok := <-chunkedUrlsToCheck
+ if ok == false {
+ closedChan <- struct{}{}
+ return
+ }
+ for _, url := range urls {
+ urlsQueue = append(urlsQueue, url)
+ }
+
+ }
+ }
+ }()
+
+ // spawn all workers at once .... not good I know :(
+ for i := 0; i < workersCount; i++ {
+ go func() {
+ for {
+ // get the next url
+ url := <-currentUrl
+ resp, urlError := http.Get(url)
+
+ // check for url or status code
+ if urlError != nil || resp.StatusCode/100 != 2 {
+ break
+ }
+
+ // read the HTML
+ html, err := ioutil.ReadAll(resp.Body)
+ resp.Body.Close()
+
+ // problems in parsing the HTML
+ if err != nil {
+ break
+ }
+
+ //check for the callback
+ if callback(fmt.Sprintf("%s", html)) {
+ gotIt <- url
+ return
+ }
+ }
+
+ }()
+ }
+
+ // waiting for the result
+ select {
+ case url := <-gotIt:
+ // when the parent dies ... all children die as well
+ return url, nil
+ case <-time.After(15 * time.Second):
+ return "", errors.New("Time expired")
+ case <-closedChan:
+ return "", errors.New("Closed Chan")
+ }
+}