Решение на Concurrent Crawling от Цветелина Борисова

Обратно към всички решения

Към профила на Цветелина Борисова

Резултати

  • 5 точки от тестове
  • 0 бонус точки
  • 5 точки общо
  • 6 успешни тест(а)
  • 5 неуспешни тест(а)

Код

package main
import (
"io/ioutil"
"net/http"
"time"
)
type TimeoutError struct {
Message string
}
func (e *TimeoutError) Error() string {
return e.Message
}
func checkUrl(callback func(string) bool, url string, c chan<- string) {
timeout := time.Duration(3 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get(url)
if err != nil {
c <- ""
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
c <- ""
return
}
if callback(string(body)) {
c <- url
} else {
c <- ""
}
return
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if workersCount < 0 || chunkedUrlsToCheck == nil {
return "", &TimeoutError{Message: "wow such much"}
}
resultChan := make(chan string, 20)
urlChan := make(chan string, 20)
start_time := time.Now().UTC()
avaliableWorkers := 0
for {
select {
case urls, ok := <-chunkedUrlsToCheck:
if ok {
for _, urll := range urls {
urlChan <- urll
}
} else {
return "", &TimeoutError{Message: "wow such much"}
}
case url := <-urlChan:
if avaliableWorkers < workersCount {
avaliableWorkers++
go checkUrl(callback, url, resultChan)
}
case v := <-resultChan:
avaliableWorkers--
if v != "" {
return v, nil
}
default:
timeout := time.Duration(15 * time.Second)
elapsed_time := time.Now().UTC()
if elapsed_time.After(start_time.Add(timeout)) {
return "", &TimeoutError{Message: "wow such much"}
} else {
time.Sleep(1 * time.Second)
}
}
}
return "", nil
}

Лог от изпълнението

[/tmp/go-build749706574/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithNegativeWorkersCount -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	0.005s
[/tmp/go-build194081525/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithZeroWorkersCount -test.timeout=120s]
--- FAIL: TestWithZeroWorkersCount-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (workersCount is zero)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-bchjfl	1.006s
[/tmp/go-build053640326/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithInvalidCallback -test.timeout=120s]
--- FAIL: TestWithInvalidCallback-2 (1.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: parameter errors should be immediately returned (callback is nil)
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-bchjfl	1.005s
[/tmp/go-build280440548/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithNilChannel -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	0.008s
[/tmp/go-build190914167/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithClosedChannelWhenStarting -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	0.005s
[/tmp/go-build284599876/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithClosedChannelMidway -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	5.005s
[/tmp/go-build489213549/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWhetherGlobalTimeoutIsHandled -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	15.007s
[/tmp/go-build503654518/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestWithLoremIpsum -test.timeout=120s]
--- FAIL: TestWithLoremIpsum-2 (4.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: sending new urls to the channel should not be blocked
	solution_test.go:43: Test exceeded allowed time of 4 seconds: Connecting to localhost should be pretty fast...
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-bchjfl	4.008s
[/tmp/go-build898731509/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestIfTimeoutAndErrorCodesAreHonoured -test.timeout=120s]
--- FAIL: TestIfTimeoutAndErrorCodesAreHonoured-2 (2.00 seconds)
	solution_test.go:43: Test exceeded allowed time of 1 seconds: sending new urls to the channel should not be blocked
	solution_test.go:267: Function returned 'http://127.0.0.2:39959/page_with_error_code' when it should have returned 'http://127.0.0.2:39959/correct_page'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-bchjfl	2.006s
[/tmp/go-build468673427/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestRaceCondition -test.timeout=120s]
--- FAIL: TestRaceCondition-2 (4.00 seconds)
	solution_test.go:293: Function returned 'http://127.0.0.2:45693/slow_success' when it should have returned 'http://127.0.0.2:45693/fast_success'
FAIL
exit status 1
FAIL	_/tmp/d20150111-16649-bchjfl	4.006s
[/tmp/go-build769075678/_/tmp/d20150111-16649-bchjfl/_test/d20150111-16649-bchjfl.test -test.run=TestCloseChannelBeforeFinish -test.timeout=120s]
PASS
ok  	_/tmp/d20150111-16649-bchjfl	2.006s

История (5 версии и 7 коментара)

Цветелина обнови решението на 08.12.2014 11:19 (преди над 3 години)

+package main
+
+import (
+ "io/ioutil"
+ "net/http"
+ "time"
+)
+
+func check(callback func(string) bool, url string, c chan string) error {
+ timeout := time.Duration(3 * time.Second)
+ client := http.Client{
+ Timeout: timeout,
+ }
+
+ resp, err := client.Get(url)
+
+ if err != nil {
+ return err
+ }
+ defer resp.Body.Close()
+ body, err := ioutil.ReadAll(resp.Body)
+ if err != nil {
+ return err
+ }
+ if callback(string(body)) {
+ c <- url
+ }
+
+ return nil
+}
+
+type MyTimeoutError struct {
+ Message string
+}
+
+func (e *MyTimeoutError) Error() string {
+ return e.Message
+}
+
+func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
+ someChan := make(chan string, 1)
+ start_time := time.Now().UTC()
+ for {
+ select {
+ case urls := <-chunkedUrlsToCheck:
+ for i := 0; i <= workersCount; i++ {
+ go func(c chan string) {
+ for _, url := range urls {
+ check(callback, url, c)
+ }
+ }(someChan)
+ }
+ case v := <-someChan:
+ return v, nil
+ default:
+ timeout := time.Duration(15 * time.Second)
+ elapsed_time := time.Now().UTC()
+ if elapsed_time.After(start_time.Add(timeout)) {
+ return "", &MyTimeoutError{Message: "timeout"}
+ }
+ }
+ }
+ return "", nil
+}

Цветелина обнови решението на 08.12.2014 11:54 (преди над 3 години)

package main
import (
"io/ioutil"
"net/http"
"time"
)
-func check(callback func(string) bool, url string, c chan string) error {
+func check(callback func(string) bool, url string, c chan<- string) error {
timeout := time.Duration(3 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if callback(string(body)) {
c <- url
}
return nil
}
type MyTimeoutError struct {
Message string
}
func (e *MyTimeoutError) Error() string {
return e.Message
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
- someChan := make(chan string, 1)
+ if workersCount < 0 || chunkedUrlsToCheck == nil {
+ return "", &MyTimeoutError{Message: "wow such much"}
+ }
+
+ resultChan := make(chan string, 1)
start_time := time.Now().UTC()
for {
select {
case urls := <-chunkedUrlsToCheck:
for i := 0; i <= workersCount; i++ {
- go func(c chan string) {
+ go func(c chan<- string) {
for _, url := range urls {
check(callback, url, c)
}
- }(someChan)
+ }(resultChan)
}
- case v := <-someChan:
+ case v := <-resultChan:
return v, nil
default:
timeout := time.Duration(15 * time.Second)
elapsed_time := time.Now().UTC()
if elapsed_time.After(start_time.Add(timeout)) {
- return "", &MyTimeoutError{Message: "timeout"}
+ return "", &MyTimeoutError{Message: "wow such much"}
}
}
}
return "", nil
}

Цветелина обнови решението на 08.12.2014 14:52 (преди над 3 години)

package main
import (
"io/ioutil"
"net/http"
"time"
)
func check(callback func(string) bool, url string, c chan<- string) error {
timeout := time.Duration(3 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if callback(string(body)) {
c <- url
}
return nil
}
type MyTimeoutError struct {
Message string
}
func (e *MyTimeoutError) Error() string {
return e.Message
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if workersCount < 0 || chunkedUrlsToCheck == nil {
return "", &MyTimeoutError{Message: "wow such much"}
}
resultChan := make(chan string, 1)
start_time := time.Now().UTC()
for {
select {
- case urls := <-chunkedUrlsToCheck:
- for i := 0; i <= workersCount; i++ {
- go func(c chan<- string) {
- for _, url := range urls {
- check(callback, url, c)
- }
- }(resultChan)
+ case urls, ok := <-chunkedUrlsToCheck:
+ if ok {
+ for i := 0; i <= workersCount; i++ {
+ go func(c chan<- string) {
+ for _, url := range urls {
+ check(callback, url, c)
+ }
+ }(resultChan)
+ }
+ } else {
+ return "", &MyTimeoutError{Message: "wow such much"}
}
case v := <-resultChan:
return v, nil
default:
timeout := time.Duration(15 * time.Second)
elapsed_time := time.Now().UTC()
if elapsed_time.After(start_time.Add(timeout)) {
return "", &MyTimeoutError{Message: "wow such much"}
}
}
}
return "", nil
}

Изглежда ми, че ти проверяваш всеки подаден url със всеки worker. Идеята на задачата е, че всеки си "хваща" по един url и го проверява. Примерно ако имаш лимит от 2 worker-а и получиш 5 url-а по канала, първия worker проверява първия url, втория worker съответно паралелно проверява втория url, който приключи първи проверява третия и т.н. Не е нужно 1 worker да е постоянен, може да го създаваш наново за всеки нов url, просто е важно url-ите да се проверяват конкурентно и да нямаш пуснати повече от workersCount проверки едновременно.

Също така, хвърли едно око и на функцията time.After. Тя ти връща канал, по който след определено от теб време получаваш нещо. Съответно може да замениш default case-а с нея.

Цветелина обнови решението на 10.12.2014 15:57 (преди над 3 години)

package main
import (
"io/ioutil"
"net/http"
"time"
)
-func check(callback func(string) bool, url string, c chan<- string) error {
+type TimeoutError struct {
+ Message string
+}
+
+func (e *TimeoutError) Error() string {
+ return e.Message
+}
+
+func checkUrl(callback func(string) bool, url string, c chan<- string) error {
timeout := time.Duration(3 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get(url)
if err != nil {
return err
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return err
}
if callback(string(body)) {
c <- url
}
return nil
}
-type MyTimeoutError struct {
- Message string
-}
-
-func (e *MyTimeoutError) Error() string {
- return e.Message
-}
-
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if workersCount < 0 || chunkedUrlsToCheck == nil {
- return "", &MyTimeoutError{Message: "wow such much"}
+ return "", &TimeoutError{Message: "wow such much"}
}
resultChan := make(chan string, 1)
start_time := time.Now().UTC()
- for {
- select {
- case urls, ok := <-chunkedUrlsToCheck:
- if ok {
- for i := 0; i <= workersCount; i++ {
+ for i := 0; i <= workersCount; i++ {
+ for {
+ select {
+ case urls, ok := <-chunkedUrlsToCheck:
+ if ok {
go func(c chan<- string) {
for _, url := range urls {
- check(callback, url, c)
+ checkUrl(callback, url, c)
}
}(resultChan)
+ } else {
+ return "", &TimeoutError{Message: "wow such much"}
}
- } else {
- return "", &MyTimeoutError{Message: "wow such much"}
- }
- case v := <-resultChan:
- return v, nil
- default:
- timeout := time.Duration(15 * time.Second)
- elapsed_time := time.Now().UTC()
- if elapsed_time.After(start_time.Add(timeout)) {
- return "", &MyTimeoutError{Message: "wow such much"}
+ case v := <-resultChan:
+ return v, nil
+ default:
+ timeout := time.Duration(15 * time.Second)
+ elapsed_time := time.Now().UTC()
+ if elapsed_time.After(start_time.Add(timeout)) {
+ return "", &TimeoutError{Message: "wow such much"}
+ }
}
}
}
return "", nil
}

Странно, виж самия пример в go документацията на time.After, нещо подобно трябва да е.

Също така, това нещо е доста странно:

       for i := 0; i <= workersCount; i++ {
            for {
                    select {

Помисли имаш ли нужда въобще от двата цикъла и дали там им е мястото.

Цветелина обнови решението на 10.12.2014 23:22 (преди над 3 години)

package main
import (
"io/ioutil"
"net/http"
"time"
)
type TimeoutError struct {
Message string
}
func (e *TimeoutError) Error() string {
return e.Message
}
-func checkUrl(callback func(string) bool, url string, c chan<- string) error {
+func checkUrl(callback func(string) bool, url string, c chan<- string) {
timeout := time.Duration(3 * time.Second)
client := http.Client{
Timeout: timeout,
}
resp, err := client.Get(url)
if err != nil {
- return err
+ c <- ""
+ return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
- return err
+ c <- ""
+ return
}
if callback(string(body)) {
c <- url
+ } else {
+ c <- ""
}
- return nil
+ return
}
func SeekAndDestroy(callback func(string) bool, chunkedUrlsToCheck <-chan []string, workersCount int) (string, error) {
if workersCount < 0 || chunkedUrlsToCheck == nil {
return "", &TimeoutError{Message: "wow such much"}
}
- resultChan := make(chan string, 1)
+ resultChan := make(chan string, 20)
+ urlChan := make(chan string, 20)
start_time := time.Now().UTC()
- for i := 0; i <= workersCount; i++ {
- for {
- select {
- case urls, ok := <-chunkedUrlsToCheck:
- if ok {
- go func(c chan<- string) {
- for _, url := range urls {
- checkUrl(callback, url, c)
- }
- }(resultChan)
- } else {
- return "", &TimeoutError{Message: "wow such much"}
+ avaliableWorkers := 0
+
+ for {
+ select {
+ case urls, ok := <-chunkedUrlsToCheck:
+ if ok {
+ for _, urll := range urls {
+ urlChan <- urll
}
- case v := <-resultChan:
+ } else {
+ return "", &TimeoutError{Message: "wow such much"}
+ }
+ case url := <-urlChan:
+ if avaliableWorkers < workersCount {
+ avaliableWorkers++
+ go checkUrl(callback, url, resultChan)
+ }
+ case v := <-resultChan:
+ avaliableWorkers--
+ if v != "" {
return v, nil
- default:
- timeout := time.Duration(15 * time.Second)
- elapsed_time := time.Now().UTC()
- if elapsed_time.After(start_time.Add(timeout)) {
- return "", &TimeoutError{Message: "wow such much"}
- }
+ }
+ default:
+ timeout := time.Duration(15 * time.Second)
+ elapsed_time := time.Now().UTC()
+ if elapsed_time.After(start_time.Add(timeout)) {
+ return "", &TimeoutError{Message: "wow such much"}
+ } else {
+ time.Sleep(1 * time.Second)
}
}
}
return "", nil
}

Благодаря за коментара. Надявам се сега да е малко по-добре. За time.After пробвах точно примера в документацията, но не знам защо не стана - пада теста за timeout. Чудя се дали така както съм го направила с каналите е ок и имам въпрос: като пусна една горутина: go func_name() - като функцията стигне до return, горутината умира нали?

Да, така определено изглежда по-добре логиката.

Относно time.After: май леко те обърках като посочих примера от документацията, извинявай. Там select-а не е във for цикъл, помисли защо това променя нещата в твоя случай.

И да, горутината умира след return, но ако имаш неща с defer те също се изпълняват и чак след това умира.